In [7]:
#!pip install pandas scikit-surprise
import pandas as pd
from surprise import Dataset, Reader, KNNBasic, NMF

In [8]:
movie_ratings = pd.read_csv("ratings.csv").head(10000)

ratings_matrix = movie_ratings.pivot_table(index='userId', columns='movieId', values='rating')
ratings_matrix.head(100)

movieId,1,2,3,4,5,6,7,8,10,11,...,91529,91658,99114,106782,109487,112552,114060,115713,122882,131724
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,3.5,2.5,3.5,5.0,3.0,4.0,2.0,3.5,5.0,5.0
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
6,,4.0,5.0,3.0,5.0,4.0,4.0,3.0,3.0,4.0,...,,,,,,,,,,
7,4.5,,,,,,,,,,...,,,,,,,,,,


In [9]:
rating_reader = Reader(rating_scale=(0.5, 5))

ratings_data = Dataset.load_from_df(movie_ratings[['userId', 'movieId', 'rating']], rating_reader)

full_trainset = ratings_data.build_full_trainset()

knn_basic_model = KNNBasic()
nmf_model = NMF()

knn_basic_model.fit(full_trainset)
nmf_model.fit(full_trainset)

testset = full_trainset.build_anti_testset()

knn_predictions = knn_basic_model.test(testset)
nmf_predictions = nmf_model.test(testset)

knn_df = pd.DataFrame(knn_predictions, columns=['userId', 'movieId', 'actual', 'knn_est', 'details']).drop(['actual', 'details'], axis=1)
nmf_df = pd.DataFrame(nmf_predictions, columns=['userId', 'movieId', 'actual', 'nmf_est', 'details']).drop(['actual', 'details'], axis=1)

merged = pd.merge(knn_df, nmf_df, on=['userId', 'movieId'])

merged['average_pred'] = merged[['knn_est', 'nmf_est']].mean(axis=1)

averaged = merged.pivot_table(index='userId', columns='movieId', values='average_pred').reindex_like(ratings_matrix)

full_combined_ratings = ratings_matrix.combine_first(averaged)

result = full_combined_ratings.reindex(columns=ratings_matrix.columns)
result.head(100)

Computing the msd similarity matrix...
Done computing similarity matrix.


movieId,1,2,3,4,5,6,7,8,10,11,...,91529,91658,99114,106782,109487,112552,114060,115713,122882,131724
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,3.754252,4.0,2.930091,5.0,4.0,4.018602,2.843134,2.888952,4.038777,...,3.289954,2.353946,3.246897,4.835828,2.862609,3.612017,1.883922,3.405809,4.615622,4.654475
2,3.970672,3.741111,4.368326,2.825951,4.692703,4.091338,3.813058,2.82212,2.810383,3.729914,...,3.5,2.5,3.5,5.0,3.0,4.0,2.0,3.5,5.0,5.0
3,3.397774,3.108185,3.680159,2.4416,4.231505,3.290312,3.284584,2.397673,2.318362,3.196757,...,3.055889,2.562142,2.946287,3.600781,2.831934,2.915942,2.500885,2.957235,3.392225,3.196259
4,4.082511,3.503135,4.199376,2.722778,4.617394,4.137334,3.766842,2.814097,2.809476,3.810091,...,3.342969,2.273371,3.113115,4.490274,2.711994,3.54808,1.898801,3.416642,4.76029,4.428554
5,4.0,3.729392,4.257494,2.703567,4.642691,3.493204,3.852407,2.62733,2.749327,3.427357,...,3.34199,2.22018,3.122977,4.520035,2.769013,3.604292,1.835372,3.244864,4.607188,4.505601
6,3.80439,4.0,5.0,3.0,5.0,4.0,4.0,3.0,3.0,4.0,...,3.331276,2.277878,3.200584,4.553339,2.844145,3.708287,1.868945,3.304174,4.811049,4.581878
7,4.5,3.3018,4.001788,2.600524,4.428165,4.057849,3.74545,2.575844,2.579009,3.597207,...,3.448229,2.30771,3.124047,4.622819,2.735356,3.422884,1.924064,3.348997,4.684126,4.450489


In [10]:
result.to_csv("result.csv")