<a href="https://colab.research.google.com/github/imammarzuki/Eksperimen/blob/main/HybridCF_CBF_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install scikit-surprise



In [8]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_absolute_error, mean_squared_error, confusion_matrix
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

In [9]:
data = {
    'user_id': [1, 1, 2, 2, 3, 3, 4, 4],
    'place_id': [101, 102, 101, 103, 102, 104, 101, 104],
    'place_name': ['Place A', 'Place B', 'Place C', 'Place D', 'Place E', 'Place F', 'Place G', 'Place H'],
    'description': [
        'beautiful garden view', 'lake view near mountains', 'urban center shopping district',
        'quiet woodland area', 'historic town center', 'lively night district', 'peaceful seaside area',
        'scenic rural landscape'
    ],
    'price': [120, 150, 200, 90, 80, 220, 130, 110],
    'rating1': [5, 4, 5, 2, 3, 4, 1, 2],
    'rating2': [3, 3, 4, 1, 2, 5, 1, 2]
}

df = pd.DataFrame(data)
df['average_rating'] = df[['rating1', 'rating2']].mean(axis=1)

In [10]:
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['description'])

# Menghitung cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [11]:
# Data untuk CF
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df[['user_id', 'place_id', 'average_rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25)

# Menggunakan KNN
algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
algo.fit(trainset)
predictions = algo.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [12]:
# Prediksi CF
cf_predictions = np.array([pred.est for pred in predictions])

# Gabungkan dengan CBF
final_predictions = []
for pred in predictions:
    place_idx = df[df['place_id'] == pred.iid].index[0]
    cbf_score = np.mean(cosine_sim[place_idx])
    final_score = (cbf_score + pred.est) / 2
    final_predictions.append(final_score)

# Actual ratings
actual_ratings = np.array([pred.r_ui for pred in predictions])

# Hitung metrik
mae = mean_absolute_error(actual_ratings, final_predictions)
rmse = np.sqrt(mean_squared_error(actual_ratings, final_predictions))
cm = confusion_matrix(actual_ratings.round(), np.array(final_predictions).round())

print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print("Confusion Matrix:\n", cm)

MAE: 0.5011830490710324
RMSE: 0.5827985978300931
Confusion Matrix:
 [[2]]
