## Movie Recommendation System Models

In [62]:
#Import necessary libraries
import numpy as np
import pandas as pd
from surprise import SVD
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise import Dataset, Reader
from surprise import accuracy
from surprise.model_selection import cross_validate, train_test_split, GridSearchCV


In [63]:
#Import data into a DataFrame and drop unnecessary columns 
df = pd.read_csv('cleaneddata', index_col=False)
df2 = df[['userId', 'movieId', 'rating']]

  interactivity=interactivity, compiler=compiler, result=result)


In [64]:
df2.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [65]:
#Check sparsity of matrix
numratings = len(df2['rating'])
numusers = len(df2['userId'].unique())
numitems = len(df2['movieId'].unique())

sparse = 1 - (numratings / (numusers*numitems))
sparse

0.9829821819213007

Our matrix is very sparse, which could negatively impact our model results. In order to improve this issue, we will remove any users that have rated less than 200 movies. 

In [66]:
#Remove users who have rated less than 200 movies
df3 = df2.groupby('userId').filter(lambda x : len(x)>200)
df4 = df3.groupby('movieId').filter(lambda x : len(x)>10)
df4.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 47232 entries, 0 to 100817
Data columns (total 3 columns):
userId     47232 non-null int64
movieId    47232 non-null int64
rating     47232 non-null float64
dtypes: float64(1), int64(2)
memory usage: 1.4 MB


In [67]:
#Check sparsity of new matrix
numratings = len(df4['rating'])
numusers = len(df4['userId'].unique())
numitems = len(df4['movieId'].unique())

sparse = 1 - (numratings / (numusers*numitems))
sparse

0.7931695867508024

This result looks pretty good; our resulting matrix is much less sparse, and is less than 95%, so we hope to see improvements in our SVD models. Let's begin modeling and investigate our results.

In [68]:
#Look at the distribution of ratings again 
df4.rating.value_counts()

4.0    12991
3.0     9065
3.5     6970
5.0     5598
4.5     4265
2.0     3239
2.5     2911
1.0     1030
1.5      716
0.5      447
Name: rating, dtype: int64

In [69]:
#Instansiate reader and data 
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(df4, reader) 

In [11]:
#Train test split 
trainset, testset = train_test_split(data, test_size=.2)

In [12]:
# Print number of uses and items for the trainset 
print('Number of users in train set : ', trainset.n_users, '\n')
print('Number of items in train set : ', trainset.n_items, '\n')


Number of users in train set :  133 

Number of items in train set :  1717 



### Baseline Model

In [13]:
#Instansiate a baseline model using KNNBaseline 
baseline = KNNBaseline(random_state=42)

In [14]:
#Fit model on the trainset 
baseline.fit(trainset)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0x7fa8b91b4710>

In [15]:
#Predict on the test set 
baselinepreds = baseline.test(testset)

In [16]:
#Check RMSE and MAE results 
accuracy.rmse(baselinepreds)
accuracy.mae(baselinepreds)

RMSE: 0.8051
MAE:  0.6194


0.6194205295150128

In [17]:
#Run 3-fold cross validation on the data and print results 
cv_baseline = cross_validate(baseline, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBaseline on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8133  0.8190  0.8123  0.8148  0.0029  
MAE (testset)     0.6222  0.6280  0.6232  0.6245  0.0025  
Fit time          0.07    0.06    0.08    0.07    0.01    
Test time         1.16    1.07    1.06    1.10    0.05    


In [18]:
# Print out the RMSE score for each fold 
for i in cv_baseline.items():
    print(i)

('test_rmse', array([0.81329786, 0.81896769, 0.81226815]))
('test_mae', array([0.62221371, 0.62801596, 0.62323509]))
('fit_time', (0.06694602966308594, 0.05843925476074219, 0.07941699028015137))
('test_time', (1.1640589237213135, 1.0677199363708496, 1.0586040019989014))


In [19]:
#Find the average test RMSE from the 3-Fold cross-validation
np.mean(cv_baseline['test_rmse'])

0.8148445650245287

### Model 1

In [20]:
#Set parameters for GridSearch on SVD model 
parameters = {'n_factors': [50, 100, 150],
             'reg_all': [0.02, 0.05, 0.1],
             'n_epochs': [10, 20, 30, 40],
             'lr_all': [.005, .075, .01]}
gridsvd = GridSearchCV(SVD, param_grid=parameters, n_jobs=-1)

In [21]:
#Fit SVD model on data
gridsvd.fit(data)

In [22]:
#Print best score and best parameters from the GridSearch 
print(gridsvd.best_score)
print(gridsvd.best_params)

{'rmse': 0.7939562890292544, 'mae': 0.6086735292467842}
{'rmse': {'n_factors': 150, 'reg_all': 0.1, 'n_epochs': 40, 'lr_all': 0.01}, 'mae': {'n_factors': 150, 'reg_all': 0.1, 'n_epochs': 40, 'lr_all': 0.01}}


In [23]:
#Reinstantiate the model with the best parameters fromGridSearch 
svdtuned = SVD(n_factors=150,
               reg_all=0.1,
               n_epochs=40,
               lr_all=0.01)

In [24]:
#Fit and predict the model 
svdtuned.fit(trainset)
svdpreds = svdtuned.test(testset)

In [25]:
#Print RMSE and MAE results 
accuracy.rmse(svdpreds)
accuracy.mae(svdpreds)

RMSE: 0.7894
MAE:  0.6076


0.6076471235784057

In [26]:
#Perform 3-Fold cross validation for SVD tuned model
cv_svd_tuned = cross_validate(svdtuned, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8068  0.8093  0.7946  0.8036  0.0064  
MAE (testset)     0.6194  0.6207  0.6106  0.6169  0.0045  
Fit time          3.98    4.13    3.88    4.00    0.10    
Test time         0.23    0.12    0.10    0.15    0.06    


In [27]:
#Display the results for all 3-folds 
for i in cv_svd_tuned.items():
    print(i)

('test_rmse', array([0.80684602, 0.80933796, 0.79458173]))
('test_mae', array([0.6193717 , 0.62073186, 0.61057025]))
('fit_time', (3.9808249473571777, 4.128509998321533, 3.8788743019104004))
('test_time', (0.23160505294799805, 0.12190890312194824, 0.0969080924987793))


In [28]:
# Print out the average RMSE score for the test set
np.mean(cv_svd_tuned['test_rmse'])

0.803588570096028

In [38]:
#have to double loop through all the users for each item and loop through movieID for each item 
estimated = svdtuned.predict(1,1)[3]
estimated

4.577685369227332

In [40]:
svdtuned.predict(1,1)

Prediction(uid=1, iid=1, r_ui=None, est=4.577685369227332, details={'was_impossible': False})

In [None]:
#do post model EDA on userId, movieID and estimated rating 
#is there popularity bias 
#can do a histogram of all the errors - distribution of the errors (leave to end)

### Model 2

In [29]:
# Set parameters to be used in KNN models 
knn_params = {'name': ['cosine', 'pearson'],
              'user_based':[True, False], 
              'min_support':[True, False],
            'min_k' : [1, 2]}

In [30]:
# Apply GridSearch to the KNN Basic model to identify the best parameters
gsknnbasic = GridSearchCV(KNNBasic, knn_params, measures=['rmse', 'mae'], cv=3)
gsknnbasic.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

In [31]:
#Display the best scores and parameters from GridSearch
print(gsknnbasic.best_score)
print(gsknnbasic.best_params)

{'rmse': 0.8844436575119833, 'mae': 0.6856261763250499}
{'rmse': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}, 'mae': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}}


In [41]:
#Reinstantiate the model with the best parameters from GridSearch 
knnbasic_tuned = KNNBasic(sim_options={'name': 'cosine', 
                                       'user_based': True, 
                                       'min_support':True, 
                                       'min_k':2, })

In [42]:
#Fit on the train set and predict on the test set 
knnbasic_tuned.fit(trainset)
knnbpreds = knnbasic_tuned.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [43]:
#Print RMSE and MAE results 
accuracy.rmse(knnbpreds)
accuracy.mae(knnbpreds)

RMSE: 0.8926
MAE:  0.6958


0.6958356375030111

Another way to evalute the model is to perform a cross validation and print the resulting scores. We will explore this below:

In [44]:
#Conduct cross validation for the KNNBasic tuned model 
cv_knn_basic = cross_validate(knnbasic_tuned, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9030  0.9067  0.9077  0.9058  0.0020  
MAE (testset)     0.7003  0.7066  0.7055  0.7041  0.0027  
Fit time          0.03    0.03    0.03    0.03    0.00    
Test time         0.74    0.75    0.81    0.77    0.03    


In [45]:
# Print out results from the cross-valdiatoin 
for i in cv_knn_basic.items():
    print(i)

('test_rmse', array([0.90296961, 0.90670276, 0.90767707]))
('test_mae', array([0.70033263, 0.70655386, 0.70547007]))
('fit_time', (0.02528095245361328, 0.02921605110168457, 0.028800010681152344))
('test_time', (0.7427136898040771, 0.7487399578094482, 0.811363935470581))


In [46]:
# Print out the average RMSE score for the test set
np.mean(cv_knn_basic['test_rmse'])

0.9057831446845106

This average of test RMSE results in our cross validation is approximately 0.97, similar to the RMSE we found above. This is a significant improvement from our baseline model which had an RMSE of 0.873.

### Model 3

In [47]:
#Apply KNN GridSearch parameters on the KNNBaseline model 
gsknnbaseline = GridSearchCV(KNNBaseline, knn_params, measures=['rmse', 'mae'], cv=3)
gsknnbaseline.fit(data)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matr

In [48]:
#Display the best score and the best parameters 
print(gsknnbaseline.best_score)
print(gsknnbaseline.best_params)

{'rmse': 0.8156309474360843, 'mae': 0.6248167213951324}
{'rmse': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}, 'mae': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}}


In [49]:
#Reinstantiate the model with the best parameters from GridSearch 
knnbaseline_tuned = KNNBaseline(sim_options={'name': 'cosine', 
                                       'user_based': True, 
                                       'min_support':True, 
                                       'min_k':2, })

In [50]:
#Fit the trainset and predict on the test set 
knnbaseline_tuned.fit(trainset)
knnbaselinepreds = knnbaseline_tuned.test(testset)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [51]:
#Print the RMSE and MAE scores 
accuracy.rmse(knnbaselinepreds)
accuracy.mae(knnbaselinepreds)

RMSE: 0.8077
MAE:  0.6217


0.6217226517719584

In [52]:
#Perform 3 fold cross validation 
cv_knn_baseline = cross_validate(knnbaseline_tuned, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBaseline on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8203  0.8175  0.8161  0.8180  0.0018  
MAE (testset)     0.6316  0.6253  0.6242  0.6270  0.0033  
Fit time          0.08    0.07    0.07    0.07    0.01    
Test time         1.34    1.22    1.20    1.25    0.06    


In [53]:
#Show the mean RMSE score for the test set 
np.mean(cv_knn_baseline['test_rmse'])

0.817969008736477

### Model 4

In [54]:
#Apply GridSearch to the KNNWithMeans model 
gsknnWM = GridSearchCV(KNNWithMeans, knn_params, measures=['rmse', 'mae'], cv=3)
gsknnWM.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

In [55]:
#Display the best score and best parameters from GridSearch 
print(gsknnWM.best_score)
print(gsknnWM.best_params)

{'rmse': 0.8245677827387942, 'mae': 0.6315291639141705}
{'rmse': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}, 'mae': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}}


In [56]:
#Reinstansiate the model with the best parameters 
knnwm_tuned = KNNWithMeans(sim_options={'name': 'cosine', 
                                       'user_based': True, 
                                       'min_support':True, 
                                       'min_k':2, })

In [57]:
#Fit on the trainset, predict on the testset 
knnwm_tuned.fit(trainset)
knnwmpreds = knnwm_tuned.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [58]:
#Print RMSE and MAE results
accuracy.rmse(knnwmpreds)
accuracy.mae(knnwmpreds)

RMSE: 0.8151
MAE:  0.6278


0.6278212573950473

In [59]:
#Perform 3-Fold cross validation on KNNWithMeans model 
cv_knn_wm = cross_validate(knnwm_tuned, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8367  0.8155  0.8266  0.8263  0.0087  
MAE (testset)     0.6385  0.6272  0.6336  0.6331  0.0046  
Fit time          0.04    0.04    0.04    0.04    0.00    
Test time         0.88    0.92    0.86    0.88    0.03    


In [60]:
#Print the average RMSE score for the test set 
np.mean(cv_knn_wm['test_rmse'])

0.8262884866129457

### All results

In [61]:
#Create a dictionary for each models' results 
baselineresult = {'model': 'baseline','RMSE': accuracy.rmse(baselinepreds), 'MAE': accuracy.mae(baselinepreds), 'CV': np.mean(cv_baseline['test_rmse'])}
svdresult = {'model':'svd', 'RMSE': accuracy.rmse(svdpreds), 'MAE': accuracy.mae(svdpreds), 'CV': np.mean(cv_svd_tuned['test_rmse'])}
knnbasicresult = {'model':'knnbasic','RMSE': accuracy.rmse(knnbpreds), 'MAE': accuracy.mae(knnbpreds), 'CV': np.mean(cv_knn_basic['test_rmse'])}
knnbaselineresult = {'model':'knnbaseline','RMSE': accuracy.rmse(knnbaselinepreds), 'MAE': accuracy.mae(knnbaselinepreds), 'CV': np.mean(cv_knn_baseline['test_rmse'])}
knnwmresult = {'model':'knnwm','RMSE': accuracy.rmse(knnwmpreds), 'MAE': accuracy.mae(knnwmpreds), 'CV': np.mean(cv_knn_wm['test_rmse'])}

RMSE: 0.8051
MAE:  0.6194
RMSE: 0.7894
MAE:  0.6076
RMSE: 0.8926
MAE:  0.6958
RMSE: 0.8077
MAE:  0.6217
RMSE: 0.8151
MAE:  0.6278


In [63]:
#Combine all the results into a list 
result_list = [baselineresult, svdresult, knnbasicresult, knnbaselineresult, knnwmresult]

In [64]:
#Transform the results lists into a DataFrame 
df_results_updated = pd.DataFrame.from_dict(result_list, orient='columns')
df_results_updated = df_results_updated.set_index('model')

In [65]:
#Display the results for all of the models 
df_results_updated

Unnamed: 0_level_0,RMSE,MAE,CV
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,0.805064,0.619421,0.814845
svd,0.789376,0.607647,0.803589
knnbasic,0.89259,0.695836,0.905783
knnbaseline,0.807736,0.621723,0.817969
knnwm,0.81511,0.627821,0.826288


## Generating New Ratings

In [33]:
#Create function that generates new ratings for moves in our dataset 

def movie_rater(movie_df,num, genre=None):
    
    #Generate a new userID = 1000
    userID = 1000
    
    #Create empty list of ratings 
    rating_list = []
    
    #For the number of ratings still left to generate, choose a random movie that contains the given genre
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        
    #Ask user to provide rating for the random movie that was provided     
        rating = input('How would you rate this movie on a scale of 0-5, press n if you have not seen :\n')
        if rating == 'n':
            continue
    
    #Print userId, movieId, title and rating for new user and their new ratings 
        else:
            rating_one_movie = {'userId':userID,'movieId':movie['movieId'].values[0],'title':movie['title'].values[0],'rating':rating}
            rating_list.append(rating_one_movie) 
            num -= 1
    return rating_list   
        

In [85]:
#Take original dataframe and include necessary columns 
dfnew = df[['userId', 'movieId', 'rating', 'title', 'genres', 'year']]

In [86]:
#Re-investigate the original dataframe that included titles and genres for films 
dfnew.head()

Unnamed: 0,userId,movieId,rating,title,genres,year
0,1,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995
1,1,3,4.0,Grumpier Old Men (1995),Comedy|Romance,1995
2,1,6,4.0,Heat (1995),Action|Crime|Thriller,1995
3,1,47,5.0,Seven (a.k.a. Se7en) (1995),Mystery|Thriller,1995
4,1,50,5.0,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,1995


In [87]:
#Apply the function to obtain new ratings for Action films 
userrating = movie_rater(dfnew, 3, 'Action')

       userId  movieId  rating         title                           genres  \
62353     414      160     1.0  Congo (1995)  Action|Adventure|Mystery|Sci-Fi   

       year  
62353  1995  
How would you rate this movie on a scale of 0-5, press n if you have not seen :
3
       userId  movieId  rating              title  \
57744     380    52281     4.0  Grindhouse (2007)   

                                    genres  year  
57744  Action|Crime|Horror|Sci-Fi|Thriller  2007  
How would you rate this movie on a scale of 0-5, press n if you have not seen :
4
       userId  movieId  rating            title              genres  year
55951     369     4262     3.0  Scarface (1983)  Action|Crime|Drama  1983
How would you rate this movie on a scale of 0-5, press n if you have not seen :
5


In [88]:
#Display the new user ratings 
userrating

[{'userId': 1000, 'movieId': 160, 'title': 'Congo (1995)', 'rating': '3'},
 {'userId': 1000,
  'movieId': 52281,
  'title': 'Grindhouse (2007)',
  'rating': '4'},
 {'userId': 1000, 'movieId': 4262, 'title': 'Scarface (1983)', 'rating': '5'}]

The new user has rated three new movies: Congo, Grindhouse and Scarface. Our model can now provide predictions for these ratings by adding them to our model.

In [103]:
#Add new ratings to our DataFrame
new_ratings_df = df4.append(userrating,ignore_index=True, sort=False)

In [104]:
#Drop the 'title' column so that our dataframe is ready to be put into surprise
new_ratings_df.drop(['title'], axis=1, inplace=True)

In [106]:
#Investigate new DataFrame
new_ratings_df.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4
1,1,3,4
2,1,6,4
3,1,47,5
4,1,50,5


Now we will redo the same modeling process as above in order to find predictions for the above movies.

In [108]:
#Instansiate reader and data 
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(new_ratings_df, reader)

In [109]:
#Train test split 
trainset, testset = train_test_split(data, test_size=.2)

In [113]:
#Reinstantiate the model with the best parameters from GridSearch and fit on the trainset 
svdtuned2 = SVD(n_factors=150,
               reg_all=0.1,
               n_epochs=40,
               lr_all=0.01)
svdtuned2.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f9c13ac1a58>

In [116]:
#Find predictions for the three movies that user with userId=1000 just rated
print(svdtuned2.predict(1000,160))
print(svdtuned2.predict(1000,52281))
print(svdtuned2.predict(1000,4262))


user: 1000       item: 160        r_ui = None   est = 2.97   {'was_impossible': False}
user: 1000       item: 52281      r_ui = None   est = 4.02   {'was_impossible': False}
user: 1000       item: 4262       r_ui = None   est = 4.59   {'was_impossible': False}


Now we have predictions.