# Capstone (2/3) - Netflix Recommender (Surprise)

## 1. Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from surprise import Dataset, Reader 
from surprise import NormalPredictor, BaselineOnly, SVD, NMF, KNNBasic, KNNWithMeans, KNNBaseline
from surprise import accuracy
from surprise.model_selection import cross_validate

## 2. Load File

In [2]:
# load file
nf =  pd.read_csv('2005-d4.csv')
movie_df = pd.read_csv('2005-d41kMT.csv')
movie_titles = pd.read_csv('movie_titles.csv', names=['movie_id','release_year','movie_title'], encoding = 'ISO-8859-1')

In [3]:
# read the dataframe information
nf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5116704 entries, 0 to 5116703
Data columns (total 6 columns):
 #   Column        Dtype 
---  ------        ----- 
 0   movie_id      int64 
 1   user_id       int64 
 2   rating        int64 
 3   date          object
 4   release_year  int64 
 5   movie_title   object
dtypes: int64(4), object(2)
memory usage: 234.2+ MB


In [4]:
# convert date to datetime format
nf['date'] = pd.to_datetime(nf['date'], format='%Y-%m-%d')

In [5]:
# Check for any null values
nf.isnull().sum()

movie_id        0
user_id         0
rating          0
date            0
release_year    0
movie_title     0
dtype: int64

In [6]:
# see the dataset details 
nf.describe(include = 'all', datetime_is_numeric=True)

Unnamed: 0,movie_id,user_id,rating,date,release_year,movie_title
count,5116704.0,5116704.0,5116704.0,5116704,5116704.0,5116704
unique,,,,,,17050
top,,,,,,National Treasure
freq,,,,,,20658
mean,9181.094,1326255.0,3.690217,2005-09-04 03:07:01.133603456,1994.299,
min,1.0,6.0,1.0,2005-06-01 00:00:00,1896.0,
25%,4884.0,662746.0,3.0,2005-07-17 00:00:00,1990.0,
50%,9340.0,1327787.0,4.0,2005-09-03 00:00:00,1999.0,
75%,13675.0,1989631.0,5.0,2005-10-20 00:00:00,2003.0,
max,17770.0,2649429.0,5.0,2005-12-31 00:00:00,2005.0,


## 3. Assumptions

Moving forward, to use cross validation because it is not bias comparing with 1 prediction (of 1 fold). 
\
Thus, there is no need to split data into train and test anymore. 

* For the Netflix Prize Data, RMSE measurement is used for comparison. Hence, we will use this too. 

* kfolds of 5 to reduce biasness and time spent (instead of 3 or 10). 

## 4. Prepare Dataset - Surprise

In [7]:
# Using the Surprise Library to read the dataset

reader = Reader(rating_scale=(1,5))
nf1 = Dataset.load_from_df(nf[['movie_id', 'user_id', 'rating']], reader)

In [8]:
# Set the similarity options for KNN algorithms in advance for pearson r 

sim_pr = {'name': 'pearson','shrinkage': 0}

In [9]:
# calling all algorithms 

np = NormalPredictor()
bo = BaselineOnly()

svd = SVD()
nmf = NMF()

knnb = KNNBasic()
knnb_pr = KNNBasic(sim_options=sim_pr)
knnbo = KNNBaseline()
knnbo_pr = KNNBaseline(sim_options=sim_pr)

## 5. Algorithms (Collaborative Filter -- Model-Based)

### (I) Normal Predictor

In [10]:
# Run 5-fold cross-validation and print results.
cross_validate(np, nf1, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.4632  1.4628  1.4610  1.4623  1.4628  1.4624  0.0008  
MAE (testset)     1.1700  1.1699  1.1680  1.1691  1.1699  1.1694  0.0008  
Fit time          6.04    8.26    8.21    8.32    8.21    7.81    0.89    
Test time         9.72    9.32    8.61    9.25    8.37    9.05    0.50    


{'test_rmse': array([1.4632444 , 1.46280941, 1.46095104, 1.4623228 , 1.46276784]),
 'test_mae': array([1.17003581, 1.16992045, 1.16797589, 1.16908071, 1.16992386]),
 'fit_time': (6.037235975265503,
  8.263891458511353,
  8.20615553855896,
  8.322164297103882,
  8.211103916168213),
 'test_time': (9.724432229995728,
  9.32488226890564,
  8.606228351593018,
  9.248372316360474,
  8.367465496063232)}

To reject Normal Predictor algorithm, as the RMSE is off the charts. 

### (II) BaselineOnly

In [11]:
# Run 5-fold cross-validation and print results.
cross_validate(bo, nf1, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9522  0.9520  0.9505  0.9514  0.9532  0.9519  0.0009  
MAE (testset)     0.7511  0.7514  0.7509  0.7508  0.7519  0.7512  0.0004  
Fit time          20.48   22.48   22.65   22.44   22.71   22.15   0.84    
Test time         7.87    7.77    7.85    7.90    7.98    7.88    0.07    


{'test_rmse': array([0.95222147, 0.95200974, 0.95051284, 0.95135682, 0.95315557]),
 'test_mae': array([0.7511174 , 0.75138459, 0.75089839, 0.75079228, 0.75188359]),
 'fit_time': (20.478379011154175,
  22.481547832489014,
  22.64858078956604,
  22.43720269203186,
  22.70942258834839),
 'test_time': (7.873626232147217,
  7.768875598907471,
  7.850117921829224,
  7.9025092124938965,
  7.984791994094849)}

We can consider the BaselineOnly algorithm, although the RMSE is high. But there is a 1.1% improvement as compared to Cinematch score of 95.25%.

### (III) Matrix Decomposition

#### (A) SVD

In [12]:
# Run 5-fold cross-validation and print results.
cross_validate(svd, nf1, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9641  0.9634  0.9671  0.9656  0.9654  0.9651  0.0013  
MAE (testset)     0.7558  0.7553  0.7579  0.7570  0.7565  0.7565  0.0009  
Fit time          201.04  202.72  202.69  202.86  203.26  202.51  0.77    
Test time         9.76    9.46    11.27   11.33   12.02   10.77   0.99    


{'test_rmse': array([0.96410411, 0.963449  , 0.96707909, 0.96559049, 0.96539016]),
 'test_mae': array([0.75575637, 0.75534729, 0.75793194, 0.75697514, 0.75654472]),
 'fit_time': (201.03636717796326,
  202.7239968776703,
  202.6918168067932,
  202.85834503173828,
  203.25560426712036),
 'test_time': (9.764836072921753,
  9.461615562438965,
  11.271177768707275,
  11.333149909973145,
  12.019845962524414)}

#### (B) NMF

In [13]:
# Run 5-fold cross-validation and print results.
cross_validate(nmf, nf1, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0174  1.0166  1.0153  1.0160  1.0167  1.0164  0.0007  
MAE (testset)     0.7890  0.7883  0.7876  0.7881  0.7890  0.7884  0.0006  
Fit time          289.37  291.29  291.49  291.46  291.18  290.96  0.80    
Test time         12.10   9.47    9.42    9.41    9.58    10.00   1.05    


{'test_rmse': array([1.01739092, 1.01662117, 1.01528338, 1.01597694, 1.01668263]),
 'test_mae': array([0.78902326, 0.78833205, 0.78760064, 0.78811515, 0.78904727]),
 'fit_time': (289.37050223350525,
  291.2917425632477,
  291.48831129074097,
  291.4604244232178,
  291.1844599246979),
 'test_time': (12.096824407577515,
  9.474971294403076,
  9.41965365409851,
  9.413270473480225,
  9.575867652893066)}

### (IV) KNN

With KNN, we will be using Pearson R and MSD Similarity for comparison.

#### (A) KNN - Basic

In [14]:
# Run 5-fold cross-validation on msd similarity.
cross_validate(knnb, nf1, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0221  1.0232  1.0238  1.0222  1.0248  1.0232  0.0010  
MAE (testset)     0.7985  0.7991  0.7999  0.7980  0.8007  0.7993  0.0010  
Fit time          18.78   24.18   23.61   21.72   21.53   21.96   1.90    
Test time         58.78   63.67   59.89   61.76   61.49   61.12   1.67    


{'test_rmse': array([1.02213776, 1.02317002, 1.02379045, 1.022179  , 1.02484034]),
 'test_mae': array([0.79853568, 0.79909133, 0.79992801, 0.79804522, 0.80074938]),
 'fit_time': (18.779494047164917,
  24.17823553085327,
  23.61124038696289,
  21.72212243080139,
  21.526944160461426),
 'test_time': (58.7792387008667,
  63.666033029556274,
  59.893397092819214,
  61.757367849349976,
  61.48782134056091)}

In [18]:
# Run 5-fold cross-validation on pearson r.
cross_validate(knnb_pr, nf1, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0451  1.0466  1.0456  1.0447  1.0451  1.0454  0.0006  
MAE (testset)     0.8125  0.8142  0.8132  0.8124  0.8125  0.8129  0.0007  
Fit time          220.06  353.55  211.75  129.36  134.93  209.93  81.05   
Test time         195.26  284.57  256.63  136.49  161.25  206.84  56.01   


{'test_rmse': array([1.04514202, 1.04656892, 1.04557589, 1.04472423, 1.04511904]),
 'test_mae': array([0.81250823, 0.81417047, 0.81316282, 0.81236951, 0.81248667]),
 'fit_time': (220.05937457084656,
  353.5478539466858,
  211.74868369102478,
  129.36437106132507,
  134.93317413330078),
 'test_time': (195.26036167144775,
  284.57482290267944,
  256.6324374675751,
  136.48748779296875,
  161.25412368774414)}

#### (C) KNN - Baseline

In [19]:
# Run 5-fold cross-validation on msd similarity.
cross_validate(knnbo, nf1, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBaseline on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9639  0.9652  0.9652  0.9664  0.9665  0.9654  0.0009  
MAE (testset)     0.7436  0.7444  0.7450  0.7459  0.7461  0.7450  0.0009  
Fit time          46.87   47.90   45.60   44.66   43.41   45.69   1.59    
Test time         79.70   91.90   81.91   87.96   76.16   83.53   5.68    


{'test_rmse': array([0.96394211, 0.96518892, 0.96523937, 0.96636147, 0.96646048]),
 'test_mae': array([0.74358268, 0.74443222, 0.74500407, 0.74593699, 0.74614077]),
 'fit_time': (46.86977195739746,
  47.90440320968628,
  45.595598220825195,
  44.659833908081055,
  43.41004276275635),
 'test_time': (79.70402312278748,
  91.89628767967224,
  81.90903210639954,
  87.96286749839783,
  76.1644880771637)}

In [20]:
# Run 5-fold cross-validation on pearson r.
cross_validate(knnbo_pr, nf1, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBaseline on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9768  0.9787  0.9766  0.9772  0.9755  0.9770  0.0011  
MAE (testset)     0.7501  0.7518  0.7504  0.7501  0.7493  0.7503  0.0008  
Fit time          145.98  173.06  142.81  146.60  205.32  162.75  23.91   
Test time         228.39  240.44  205.35  178.71  198.50  210.28  21.90   


{'test_rmse': array([0.97678321, 0.97873048, 0.97661375, 0.97715418, 0.97546993]),
 'test_mae': array([0.75007629, 0.75178155, 0.75043514, 0.75012476, 0.74932525]),
 'fit_time': (145.9770212173462,
  173.06050515174866,
  142.80761623382568,
  146.60282635688782,
  205.32131338119507),
 'test_time': (228.38552141189575,
  240.44156861305237,
  205.35053777694702,
  178.70879364013672,
  198.50252437591553)}

### (V) TensorFlow

Please refer to Jupyter Notebook 3 of 3 for the codes, as we will be using tensorflow to run. 

## 6. Recommendations based on Best 3 RMSE Algorithms

In [21]:
# Select a random user
user_id = 1499245

In [22]:
# Check movie_df info 
movie_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    1000 non-null   int64  
 1   index         1000 non-null   int64  
 2   movie_id      1000 non-null   int64  
 3   count         1000 non-null   float64
 4   release_year  1000 non-null   int64  
 5   movie_title   1000 non-null   object 
dtypes: float64(1), int64(4), object(1)
memory usage: 47.0+ KB


In [23]:
# Based on above info, to take the necessary columns 
movie_c = movie_df[['count', 'movie_id', 'movie_title', 'release_year']]
movie_c

Unnamed: 0,count,movie_id,movie_title,release_year
0,20658.0,17169,National Treasure,2004
1,17521.0,17324,Hitch,2005
2,16426.0,11812,Million Dollar Baby,2004
3,15496.0,12966,The Aviator,2004
4,14850.0,5317,Miss Congeniality,2000
...,...,...,...,...
995,1242.0,1709,Clash of the Titans,1981
996,1240.0,15064,Dangerous Liaisons,1988
997,1238.0,8295,Holes,2003
998,1235.0,8413,Girl with a Pearl Earring,2003


In [24]:
# Using Surprise Library, to build the whole nf1 dataset as the trainset 
trainset = nf1.build_full_trainset()

## (I) BaselineOnly 

In [25]:
# fit the trainset to BaselineOnly Model
bo.fit(trainset)

Estimating biases using als...


<surprise.prediction_algorithms.baseline_only.BaselineOnly at 0x20c16d24400>

In [26]:
# create a copy of movie_c dataframe purely for baseline, so that the subsequent files will not be affected 
bo_t = movie_c.copy(deep=True)

# Additional column on the estimated score based on the model's prediction for this user
bo_t['est_score'] = bo_t['movie_id'].apply(lambda x: bo.predict(user_id, x).est)

# sort the estimated scores, to view the top 10 movies that we can recommend to user to watch 
bo_t = bo_t.sort_values(by=['est_score'], ascending=False)
bo_t.head(10)

Unnamed: 0,count,movie_id,movie_title,release_year,est_score
278,3862.0,6736,Robots,2005,4.450964
127,5866.0,8764,Happy Gilmore,1996,4.23266
839,1502.0,5727,The Last Starfighter,1984,4.22876
394,3007.0,4545,Dances With Wolves: Special Edition,1990,4.127354
981,1261.0,8,What the #$*! Do We Know!?,2004,4.057638
869,1434.0,14602,Sabrina,1995,4.029759
303,3719.0,15471,Phone Booth,2003,4.028324
484,2428.0,2743,The Pianist,2002,4.027078
337,3401.0,14185,Mary Poppins,1964,3.999137
998,1235.0,8413,Girl with a Pearl Earring,2003,3.991943


## (II) SVD

In [27]:
# fit the trainset to SVD Model
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x20c16d24160>

In [28]:
# create a copy of movie_c dataframe purely for svd, so that the subsequent files will not be affected 
svd_t = movie_c.copy(deep=True)

# Additional column on the estimated score based on the model's prediction for this user
svd_t['est_score'] = svd_t['movie_id'].apply(lambda x: svd.predict(user_id, x).est)

# sort the estimated scores, to view the top 10 movies that we can recommend to user to watch 
svd_t = svd_t.sort_values(by=['est_score'], ascending=False)
svd_t.head(10)

Unnamed: 0,count,movie_id,movie_title,release_year,est_score
278,3862.0,6736,Robots,2005,4.572724
839,1502.0,5727,The Last Starfighter,1984,4.328896
127,5866.0,8764,Happy Gilmore,1996,4.265021
394,3007.0,4545,Dances With Wolves: Special Edition,1990,4.219067
869,1434.0,14602,Sabrina,1995,4.107465
337,3401.0,14185,Mary Poppins,1964,4.08574
981,1261.0,8,What the #$*! Do We Know!?,2004,4.060704
300,3729.0,14670,Batman,1989,4.056289
484,2428.0,2743,The Pianist,2002,4.048302
303,3719.0,15471,Phone Booth,2003,4.046744


## (III) k-Nearest Neighbor (Baseline)

In [29]:
# fit the trainset to k-nearest neighbor baseline Model
knnbo.fit(trainset)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0x20c16d24af0>

In [30]:
# create a copy of movie_c dataframe purely for knn baseline, so that the subsequent files will not be affected 
knnbo_t = movie_c.copy(deep=True)

# Additional column on the estimated score based on the model's prediction for this user
knnbo_t['est_score'] = knnbo_t['movie_id'].apply(lambda x: svd.predict(user_id, x).est)

# sort the estimated scores, to view the top 10 movies that we can recommend to user to watch 
knnbo_t = knnbo_t.sort_values(by=['est_score'], ascending=False)
knnbo_t.head(10)

Unnamed: 0,count,movie_id,movie_title,release_year,est_score
278,3862.0,6736,Robots,2005,4.572724
839,1502.0,5727,The Last Starfighter,1984,4.328896
127,5866.0,8764,Happy Gilmore,1996,4.265021
394,3007.0,4545,Dances With Wolves: Special Edition,1990,4.219067
869,1434.0,14602,Sabrina,1995,4.107465
337,3401.0,14185,Mary Poppins,1964,4.08574
981,1261.0,8,What the #$*! Do We Know!?,2004,4.060704
300,3729.0,14670,Batman,1989,4.056289
484,2428.0,2743,The Pianist,2002,4.048302
303,3719.0,15471,Phone Booth,2003,4.046744


## Additional RMSE Run for Different Data Size (only for best 3 algo)

### (I) Random 10 Million data from Jun to Dec 2005

In [31]:
# load the dataset file with 10 million data
nf_10 =  pd.read_csv('2005-d3.csv')

In [32]:
# Using the Surprise Library to read the dataset

reader = Reader(rating_scale=(1,5))
nf_10 = Dataset.load_from_df(nf_10[['movie_id', 'user_id', 'rating']], reader)

In [33]:
# Run 5-fold cross-validation and print results.
cross_validate(bo, nf_10, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9429  0.9404  0.9415  0.9414  0.9414  0.9415  0.0008  
MAE (testset)     0.7413  0.7395  0.7402  0.7405  0.7402  0.7403  0.0006  
Fit time          39.73   44.32   44.66   44.95   44.94   43.72   2.01    
Test time         19.00   19.11   17.28   18.79   17.13   18.26   0.87    


{'test_rmse': array([0.94285681, 0.94036919, 0.9414918 , 0.9413956 , 0.94141412]),
 'test_mae': array([0.741325  , 0.73948938, 0.7401915 , 0.74050056, 0.74020267]),
 'fit_time': (39.72959303855896,
  44.31876587867737,
  44.660983085632324,
  44.94636130332947,
  44.940850496292114),
 'test_time': (19.001505851745605,
  19.107502460479736,
  17.281549215316772,
  18.78510808944702,
  17.13228678703308)}

In [34]:
# Run 5-fold cross-validation and print results.
cross_validate(svd, nf_10, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9322  0.9329  0.9328  0.9330  0.9334  0.9329  0.0004  
MAE (testset)     0.7266  0.7266  0.7267  0.7265  0.7278  0.7268  0.0005  
Fit time          372.99  377.52  377.98  377.35  378.02  376.77  1.91    
Test time         23.68   21.11   23.57   23.97   23.77   23.22   1.06    


{'test_rmse': array([0.93220402, 0.93292919, 0.93284324, 0.93296853, 0.93343069]),
 'test_mae': array([0.72655332, 0.72664649, 0.72671354, 0.72650715, 0.72776392]),
 'fit_time': (372.98564434051514,
  377.5158200263977,
  377.9786660671234,
  377.34541058540344,
  378.01528310775757),
 'test_time': (23.682081937789917,
  21.10678482055664,
  23.565629959106445,
  23.970784425735474,
  23.768656730651855)}

In [35]:
# Run 5-fold cross-validation on msd similarity.
cross_validate(knnbo, nf_10, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBaseline on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9365  0.9376  0.9370  0.9378  0.9373  0.9372  0.0004  
MAE (testset)     0.7240  0.7247  0.7244  0.7248  0.7245  0.7245  0.0003  
Fit time          86.60   93.95   94.36   93.19   93.26   92.27   2.87    
Test time         215.78  220.59  221.03  218.15  218.24  218.76  1.90    


{'test_rmse': array([0.93649304, 0.93757162, 0.93702524, 0.93776381, 0.93730443]),
 'test_mae': array([0.72398942, 0.72467962, 0.72439653, 0.7248102 , 0.72450949]),
 'fit_time': (86.59847044944763,
  93.94507384300232,
  94.36499238014221,
  93.1896824836731,
  93.25578022003174),
 'test_time': (215.78409957885742,
  220.59057450294495,
  221.03189492225647,
  218.14552474021912,
  218.2447862625122)}

## == END ==