### ECE475 Freq Machine Learning Recommendation System Assignment
Jing Jiang

I used MovieLens 100K dataset (MovieLens Small) in this assignment <br>
See https://grouplens.org/datasets/movielens/

In [4]:
import numpy as np
import pandas as pd

import os

r = pd.read_csv("./ml-latest-small/ratings.csv")
m = pd.read_csv("./ml-latest-small/movies.csv")

In [5]:
r.shape

(100836, 4)

In [6]:
m.shape

(9742, 3)

#### Matrix Factorization

In [7]:
x = r.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
x.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
X = x.as_matrix()
rating_mean = np.mean(X, axis = 1)
X_demean = X - rating_mean.reshape(-1, 1)

  """Entry point for launching an IPython kernel.


#### Using SVD to predict

In [9]:
# SVD (Singular Value Decomposition)
from scipy.sparse.linalg import svds
u, sigma, vt = svds(X_demean, k = 50)
sigma = np.diag(sigma)

In [10]:
pred_rating = np.dot(np.dot(u, sigma), vt) + rating_mean.reshape(-1, 1)
preds = pd.DataFrame(pred_rating, columns = x.columns)

#### Recommendation Function

In [20]:
# Get and construct the user's rating data
def getRated(user, movies, ratings):
    user_data = ratings[ratings.userId == (user)]
    user_info = (user_data.merge(movies, how = 'left', left_on = 'movieId', 
                                 right_on = 'movieId').sort_values(['rating'], ascending=False))
    return user_info

In [21]:
# Predict the movies that the user may like in the list of movies he hasn't watched.
def recommend(pred, user_info, movies, num_recommendations=5):
    urow = user_info['userId'][0] - 1
    sorted_pred = pred.iloc[urow].sort_values(ascending=False)
    
    recommendations = (movies[~movies['movieId'].isin(user_info['movieId'])].
                       merge(pd.DataFrame(sorted_pred).reset_index(), how = 'left',
                              left_on = 'movieId', right_on = 'movieId').
                       rename(columns = {urow: 'predictions'}).
                       sort_values('predictions', ascending = False).
                       iloc[:num_recommendations, :-1])

    return recommendations

#### Recommendation Example
Let's say we want to make recommendations to User with ID number 2

In [24]:
user2 = getRated(2, m, r)
user2
# This is the list of movies user2 has rated. He has rated 29 movies.
# This guy actually has a good taste of movie. Mad Max Fury Road was awesome!

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
28,2,131724,5.0,1445714851,The Jinx: The Life and Deaths of Robert Durst ...,Documentary
27,2,122882,5.0,1445715272,Mad Max: Fury Road (2015),Action|Adventure|Sci-Fi|Thriller
22,2,106782,5.0,1445714966,"Wolf of Wall Street, The (2013)",Comedy|Crime|Drama
18,2,89774,5.0,1445715189,Warrior (2011),Drama
9,2,60756,5.0,1445714980,Step Brothers (2008),Comedy
16,2,80906,5.0,1445715172,Inside Job (2010),Documentary
2,2,1704,4.5,1445715228,Good Will Hunting (1997),Drama|Romance
8,2,58559,4.5,1445715141,"Dark Knight, The (2008)",Action|Crime|Drama|IMAX
10,2,68157,4.5,1445715154,Inglourious Basterds (2009),Action|Drama|War
15,2,80489,4.5,1445715340,"Town, The (2010)",Crime|Drama|Thriller


In [25]:
user2_pred = recommend(preds, user2, m, 15)
user2_pred
# We made 15 recommendations for User2 that he hasn't rated before. The movies are listed below.

Unnamed: 0,movieId,title,genres
2223,2959,Fight Club (1999),Action|Crime|Drama|Thriller
1936,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller
7398,80463,"Social Network, The (2010)",Drama
312,356,Forrest Gump (1994),Comedy|Drama|Romance|War
8850,134130,The Martian (2015),Adventure|Drama|Sci-Fi
508,593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller
6323,48780,"Prestige, The (2006)",Drama|Mystery|Sci-Fi|Thriller
3634,4993,"Lord of the Rings: The Fellowship of the Ring,...",Adventure|Fantasy
7032,69122,"Hangover, The (2009)",Comedy|Crime
4795,7153,"Lord of the Rings: The Return of the King, The...",Action|Adventure|Drama|Fantasy
