In [1]:
import numpy as np
import pandas as pd
import pickle
import matrix_factorization_utilities

## Training the model

In [2]:
# Load user ratings
raw_dataset_df = pd.read_csv('movie_ratings_data_set.csv')

# Convert the running list of user ratings into a matrix
ratings_df = pd.pivot_table(raw_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max)

# Normalize the ratings (center them around their mean)
normalized_ratings, means = matrix_factorization_utilities.normalize_ratings(ratings_df.to_numpy())

# Apply matrix factorization to find the latent features
U, M = matrix_factorization_utilities.low_rank_matrix_factorization(normalized_ratings,
                                                                    num_features=11,
                                                                    regularization_amount=1.1)

# Find all predicted ratings by multiplying U and M
predicted_ratings = np.matmul(U, M)

# Add back in the mean ratings for each product to de-normalize the predicted results
predicted_ratings = predicted_ratings + means

# Save features and predicted ratings to files for later use
pickle.dump(U, open("user_features.dat", "wb"))
pickle.dump(M, open("product_features.dat", "wb"))
pickle.dump(predicted_ratings, open("predicted_ratings.dat", "wb"))
pickle.dump(means, open("means.dat", "wb"))

Optimization terminated successfully.
         Current function value: 105.620378
         Iterations: 455
         Function evaluations: 660
         Gradient evaluations: 660


## Cold Start Recommendations

In [4]:
# Load prediction rules from data files
means = pickle.load(open("means.dat", "rb"))

# Load movie titles
movies_df = pd.read_csv('movies.csv', index_col='movie_id')

# Just use the average movie ratings directly as the user's predicted ratings
user_ratings = means

print("Movies we will recommend:")

movies_df['rating'] = user_ratings
movies_df = movies_df.sort_values(by=['rating'], ascending=False)

print(movies_df[['title', 'genre', 'rating']].head(5))

Movies we will recommend:
                            title                   genre    rating
movie_id                                                           
6               Attack on Earth 1          sci-fi, action  4.900000
10        Surrounded by Zombies 1  horror, zombie fiction  4.882353
3                   The Sheriff 2    crime drama, western  4.818182
12                     Horrorfest                  horror  4.800000
5            The Big City Judge 2             legal drama  4.785714


## Make Recommendations from Data Files

In [6]:
# Load prediction rules from data files
U = pickle.load(open("user_features.dat", "rb"))
M = pickle.load(open("product_features.dat", "rb"))
predicted_ratings = pickle.load(open("predicted_ratings.dat", "rb"))

# Load movie titles
movies_df = pd.read_csv('movies.csv', index_col='movie_id')

print("Enter a user_id to get recommendations (Between 1 and 100):")
user_id_to_search = int(input())

print("Movies we will recommend:")

user_ratings = predicted_ratings[user_id_to_search - 1]
movies_df['rating'] = user_ratings
movies_df = movies_df.sort_values(by=['rating'], ascending=False)

print(movies_df[['title', 'genre', 'rating']].head(5))

Enter a user_id to get recommendations (Between 1 and 100):
9
Movies we will recommend:
                            title                   genre    rating
movie_id                                                           
6               Attack on Earth 1          sci-fi, action  4.980102
3                   The Sheriff 2    crime drama, western  4.974564
10        Surrounded by Zombies 1  horror, zombie fiction  4.949720
13                  The Sheriff 3    crime drama, western  4.936344
5            The Big City Judge 2             legal drama  4.855532
