import dataset dan library

In [1]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3163758 sha256=e7ba74c0f76e58519b1d1b4939f8a95cd8e76f9131b3e19c81c342ee8c054694
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [2]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise import accuracy
from surprise import BaselineOnly
from surprise.model_selection import cross_validate, train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
df = pd.read_excel('.xlsx')
print(df.head())

   user_id  recipe_id  rating           dateLastModified  \
0        1     229860       5  2012-10-31T17:54:41.867\n   
1        2     229921       5   2012-11-17T16:49:10.54\n   
2        2     229940       5  2012-11-18T16:15:56.177\n   
3        2     229940       5   2012-11-19T19:12:26.82\n   
4        2     229906       5   2012-11-22T19:06:07.08\n   

                          recipe_name  
0  Chef John's Pumpkin Cinnamon Rolls  
1  Apple Chicken Sausage Brunch Braid  
2                 Chicken Ranch Dijon  
3                 Chicken Ranch Dijon  
4     Mom's Candied Yams with Caramel  


In [4]:
content_df = df[['recipe_id', 'recipe_name']]
content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)


tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df['Content'])
from sklearn.metrics.pairwise import euclidean_distances
content_distance = euclidean_distances(content_matrix)
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)

def get_content_based_recommendations(product_id, top_n):
    index = content_df[content_df['recipe_id'] == product_id].index[0]
    distance_scores = content_distance[index]
    similar_indices = distance_scores.argsort()[:top_n + 1]
    recommendations = content_df.loc[similar_indices, 'recipe_id'].values
    return recommendations


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)


In [16]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=.25)
all_predictions = []


for user_id, product_id, true_rating in testset:
    content_based_recommendations = get_content_based_recommendations(product_id, top_n=10)
    if product_id in content_based_recommendations:
        predicted_rating = 5.0
    else:
        predicted_rating = 1.0
    all_predictions.append((user_id, product_id, true_rating, predicted_rating,None))
accuracy.rmse(all_predictions)
accuracy.mae(all_predictions)

RMSE: 0.8764
MAE:  0.4320


0.432

In [7]:
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

def get_collaborative_filtering_recommendations(user_id, top_n):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == user_id, testset)
    predictions = algo.test(testset)
    predictions.sort(key=lambda x: x.est, reverse=True)
    recommendations = [prediction.iid for prediction in predictions[:top_n]]
    return recommendations

In [8]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=.25)

bsl_options = {'method': 'sgd', 'learning_rate': .00005,}
algo = BaselineOnly(bsl_options=bsl_options)


algo.fit(trainset)
predictions = algo.test(testset)
accuracy.rmse(predictions)
accuracy.mae(predictions)

Estimating biases using sgd...
RMSE: 0.8170
MAE:  0.6265


0.6264848219085672

In [9]:
from collections import Counter

def get_hybrid_recommendations(user_id, product_id, top_n):
    content_based_recommendations = get_content_based_recommendations(product_id, top_n)
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(user_id, top_n)

    all_recommendations = list(content_based_recommendations) + list(collaborative_filtering_recommendations)

    recommendation_counts = Counter(all_recommendations)
    hybrid_recommendations = sorted(recommendation_counts, key=lambda x: (-recommendation_counts[x], all_recommendations.index(x)))
    hybrid_recommendations = hybrid_recommendations[:top_n]

    hybrid_recommendations = content_df[content_df['recipe_id'].isin(hybrid_recommendations)].drop_duplicates(subset=['recipe_id'])
    hybrid_recommendations = hybrid_recommendations[['recipe_id', 'recipe_name']]

    return hybrid_recommendations

In [10]:
user_id = 1
product_id = 229875
top_n = 10
recommendations = get_hybrid_recommendations(user_id, product_id, top_n)

print(f"Hybrid Recommendations for User {user_id} based on Product {product_id}:")
for i, row in recommendations.iterrows():
    print(f"recipe_id: {row['recipe_id']}, Product Name: {row['recipe_name']}")

Hybrid Recommendations for User 1 based on Product 229875:
recipe_id: 229878, Product Name: Wonderful Gluten Free White Bread
recipe_id: 229875, Product Name: Gluten-Free European Apple Cake
recipe_id: 229949, Product Name: Creamy White Chili
recipe_id: 229957, Product Name: Slow Cooker Au Jus Pot Roast
recipe_id: 230107, Product Name: Apple Honey Glazed Chicken
recipe_id: 230132, Product Name: Chef John's Pumpkin Pie
recipe_id: 230118, Product Name: Gluten Free Rice Chicken Stuffing
recipe_id: 230169, Product Name: Banana-Nog Cake
recipe_id: 230303, Product Name: Apple Cinnamon Breakfast Quinoa
recipe_id: 230558, Product Name: (Gluten Free) Magic Cookie Bars
