In [421]:
import os
os.chdir('C:\\Users\\user\\Documents\\GitHub\\svd-ivelmakina')

import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds


from IPython.display import display

from objects import file_path_ratings as ratings, file_path_movies as movies

In [422]:
df_ratings = pd.read_csv(ratings)
df_movies = pd.read_csv(movies)
ratings_matrix = df_ratings.pivot(index='userId', columns='movieId', values='rating')
ratings_matrix.shape

(610, 9724)

In [423]:
ratings_matrix = ratings_matrix.dropna(thresh=30, axis=0)
ratings_matrix = ratings_matrix.dropna(thresh=30, axis=1)
ratings_matrix.shape

(501, 860)

In [424]:
ratings_matrix_filled = ratings_matrix.apply(lambda x: x.fillna(x.mean()), axis=0)  # mean value for each movie
ratings_array = ratings_matrix_filled.values
user_ratings_mean = np.mean(ratings_array, axis=1)
ratings_array_demeaned = ratings_array - user_ratings_mean.reshape(-1, 1)

In [425]:
u, sigma, vt = svds(ratings_array_demeaned, k=10) # the bigger is k, the smaller is error, however calculations are more complicated and long
sigma = np.diag(sigma)
all_user_predicted_ratings = np.dot(np.dot(u, sigma), vt) + user_ratings_mean.reshape(-1, 1)
predictions_df = pd.DataFrame(all_user_predicted_ratings, columns=ratings_matrix.columns, index=ratings_matrix.index)

In [426]:
only_predictions_df = predictions_df.where(ratings_matrix.isna())

In [427]:
print("Data before prediction:")
display(ratings_matrix)

print("Data after prediction:")
display(predictions_df)

print("Only predicted data:")
display(only_predictions_df)

Data before prediction:


movieId,1,2,3,5,6,7,10,11,16,17,...,115617,116797,119145,122882,122886,122904,134130,134853,139385,152081
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,4.0,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
6,,4.0,5.0,5.0,4.0,4.0,3.0,4.0,4.0,4.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,2.5,,2.5,,4.0,...,,,,,,,,,,
607,4.0,,,,,,,3.0,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,4.0,,4.5,,...,,,,,,,,,,
609,3.0,,,,,,4.0,,,,...,,,,,,,,,,


Data after prediction:


movieId,1,2,3,5,6,7,10,11,16,17,...,115617,116797,119145,122882,122886,122904,134130,134853,139385,152081
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.195784,3.626511,3.407957,3.107135,4.050575,3.374519,3.707330,3.847921,3.984585,3.766584,...,3.841666,4.093569,4.089366,3.871815,3.979231,3.823717,4.023708,3.921437,4.001508,4.013699
3,3.932769,3.495318,3.222784,2.995556,3.972470,3.225234,3.508814,3.660832,3.855783,3.528047,...,3.814421,3.947659,3.964042,3.772776,3.902159,3.831028,3.925509,3.793553,3.829103,3.899557
4,3.876734,3.413726,3.137629,3.014071,3.629890,3.223909,3.512055,3.841191,3.851864,3.674902,...,3.840094,3.922281,3.919199,3.687062,3.967388,3.762097,4.034591,3.722944,3.766019,3.796932
5,3.884989,3.410584,3.232828,3.005900,3.877167,3.236270,3.480410,3.660302,3.933858,3.774737,...,3.820636,3.988587,3.978965,3.789539,3.931141,3.801009,4.012094,3.805802,3.875303,3.876413
6,4.297014,3.853933,3.565250,3.268872,4.042952,3.341156,3.778440,3.858538,3.885351,3.722365,...,3.928040,3.914941,3.970126,3.701603,4.030471,3.876709,4.014755,3.779504,3.794069,3.891055
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,3.863543,3.204941,3.276708,2.912293,4.022627,3.246874,3.194165,3.411309,4.042755,3.891897,...,3.760506,4.089580,4.047310,3.873157,3.870181,3.788480,4.037692,3.852218,3.955062,3.847356
607,3.912716,3.563611,3.530255,3.192687,4.060468,3.275294,3.560985,3.686753,4.023636,3.784929,...,3.810452,3.921673,3.925505,3.672614,3.925369,3.725645,3.977758,3.737312,3.829062,3.866060
608,1.950594,2.438343,2.376593,2.691255,4.300240,3.013021,3.932909,3.462225,4.314108,4.031803,...,3.972899,4.017542,4.023006,3.986621,4.111832,3.771696,4.223238,3.919794,3.978762,4.011957
609,3.817070,3.373585,3.198503,2.997310,3.916818,3.245510,3.501019,3.651315,3.947044,3.771464,...,3.838338,3.995797,3.992610,3.807771,3.945526,3.814395,4.025659,3.819393,3.880602,3.881207


Only predicted data:


movieId,1,2,3,5,6,7,10,11,16,17,...,115617,116797,119145,122882,122886,122904,134130,134853,139385,152081
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,3.626511,,3.107135,,3.374519,3.707330,3.847921,3.984585,3.766584,...,3.841666,4.093569,4.089366,3.871815,3.979231,3.823717,4.023708,3.921437,4.001508,4.013699
3,3.932769,3.495318,3.222784,2.995556,3.972470,3.225234,3.508814,3.660832,3.855783,3.528047,...,3.814421,3.947659,3.964042,3.772776,3.902159,3.831028,3.925509,3.793553,3.829103,3.899557
4,3.876734,3.413726,3.137629,3.014071,3.629890,3.223909,3.512055,3.841191,3.851864,3.674902,...,3.840094,3.922281,3.919199,3.687062,3.967388,3.762097,4.034591,3.722944,3.766019,3.796932
5,,3.410584,3.232828,3.005900,3.877167,3.236270,3.480410,3.660302,3.933858,3.774737,...,3.820636,3.988587,3.978965,3.789539,3.931141,3.801009,4.012094,3.805802,3.875303,3.876413
6,4.297014,,,,,,,,,,...,3.928040,3.914941,3.970126,3.701603,4.030471,3.876709,4.014755,3.779504,3.794069,3.891055
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,3.204941,3.276708,2.912293,4.022627,,3.194165,,4.042755,,...,3.760506,4.089580,4.047310,3.873157,3.870181,3.788480,4.037692,3.852218,3.955062,3.847356
607,,3.563611,3.530255,3.192687,4.060468,3.275294,3.560985,,4.023636,3.784929,...,3.810452,3.921673,3.925505,3.672614,3.925369,3.725645,3.977758,3.737312,3.829062,3.866060
608,,,,2.691255,4.300240,3.013021,,3.462225,,4.031803,...,3.972899,4.017542,4.023006,3.986621,4.111832,3.771696,4.223238,3.919794,3.978762,4.011957
609,,3.373585,3.198503,2.997310,3.916818,3.245510,,3.651315,3.947044,3.771464,...,3.838338,3.995797,3.992610,3.807771,3.945526,3.814395,4.025659,3.819393,3.880602,3.881207


In [428]:
def recommend_movies(user_id):
    recommended_movies_id = only_predictions_df.loc[user_id].sort_values(ascending=False)[:10].index.tolist()
    recommendations = df_movies[df_movies['movieId'].isin(recommended_movies_id)].reset_index()[["title", "genres"]]
    
    return recommendations

In [433]:
predictions = recommend_movies(11)

print("Top 10 recommended movies:")
display(predictions)

Top 10 recommended movies:


Unnamed: 0,title,genres
0,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
1,Schindler's List (1993),Drama|War
2,Rear Window (1954),Mystery|Thriller
3,Casablanca (1942),Drama|Romance
4,"Princess Bride, The (1987)",Action|Adventure|Comedy|Fantasy|Romance
5,Lawrence of Arabia (1962),Adventure|Drama|War
6,Goodfellas (1990),Crime|Drama
7,Patton (1970),Drama|War
8,American History X (1998),Crime|Drama
9,"Matrix, The (1999)",Action|Sci-Fi|Thriller


In [430]:
predictions = recommend_movies(109)

print("Top 10 recommended movies:")
display(predictions)

Top 10 recommended movies:


Unnamed: 0,title,genres
0,Dr. Strangelove or: How I Learned to Stop Worr...,Comedy|War
1,"Godfather, The (1972)",Crime|Drama
2,Casablanca (1942),Drama|Romance
3,"Princess Bride, The (1987)",Action|Adventure|Comedy|Fantasy|Romance
4,Lawrence of Arabia (1962),Adventure|Drama|War
5,"Godfather: Part II, The (1974)",Crime|Drama
6,Patton (1970),Drama|War
7,Cool Hand Luke (1967),Drama
8,Fight Club (1999),Action|Crime|Drama|Thriller
9,"Amelie (Fabuleux destin d'Amélie Poulain, Le) ...",Comedy|Romance


In [431]:
predictions = recommend_movies(607)

print("Top 10 recommended movies:")
display(predictions)

Top 10 recommended movies:


Unnamed: 0,title,genres
0,Forrest Gump (1994),Comedy|Drama|Romance|War
1,Rear Window (1954),Mystery|Thriller
2,Casablanca (1942),Drama|Romance
3,Reservoir Dogs (1992),Crime|Mystery|Thriller
4,12 Angry Men (1957),Drama
5,Patton (1970),Drama|War
6,Life Is Beautiful (La Vita è bella) (1997),Comedy|Drama|Romance|War
7,American History X (1998),Crime|Drama
8,Fight Club (1999),Action|Crime|Drama|Thriller
9,City of God (Cidade de Deus) (2002),Action|Adventure|Crime|Drama|Thriller
