<a href="https://colab.research.google.com/github/dhruvmun/MDM-Recommendation-System/blob/master/surprise_methods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install surprise



In [0]:
from surprise import SVD, BaselineOnly, SVDpp, NMF, SlopeOne, CoClustering
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore
from surprise import accuracy
from surprise.model_selection import train_test_split

In [0]:
# Load the movielens-1M dataset
data = Dataset.load_builtin('ml-1m')

In [0]:
# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=.20)

In [0]:
def recommendation(algo, trainset, testset):
  # Train the algorithm on the trainset, and predict ratings for the testset
  algo.fit(trainset)
  predictions = algo.test(testset)

  # Then compute RMSE
  accuracy.rmse(predictions)
  # Then compute MAE
  accuracy.mae(predictions)
  return

#### Basic algorithm (Baseline approach):

In [6]:
algo = BaselineOnly()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)
# Then compute MAE
accuracy.mae(predictions)
print("Done!")

Estimating biases using als...
RMSE: 0.9097
MAE:  0.7196
Done!


In [7]:
results = cross_validate(BaselineOnly(), data, measures=['RMSE', 'MAE'], cv=5, verbose=False)

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...


In [8]:
results

{'fit_time': (4.990859270095825,
  5.814388751983643,
  5.663867712020874,
  5.770900726318359,
  5.692715406417847),
 'test_mae': array([0.71936599, 0.71997615, 0.71911252, 0.72005384, 0.71927567]),
 'test_rmse': array([0.90917297, 0.90877064, 0.90862051, 0.90964181, 0.90780963]),
 'test_time': (2.0957388877868652,
  2.1422946453094482,
  2.1305301189422607,
  2.2126646041870117,
  1.6206450462341309)}

In [0]:
# predictions[0].uid = 5

# predictions[0]

AttributeError: ignored

In [0]:
# a = list(predictions[0])
# a[3] = 
# predictions[0].__setattr__(est,1)

NameError: ignored

#### Matrix Factorization-based algorithms:

In [8]:
# algorithm is equivalent to Probabilistic Matrix Factorization [check]

algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")

RMSE: 0.8731
MAE:  0.6851
Done!


In [15]:
data.split(n_folds=5)

AttributeError: ignored

In [0]:
results = cross_validate(SVD(), data, measures=['RMSE', 'MAE'], cv=5, verbose=False)

In [0]:
# algorithm is an extension of SVD that takes into account implicit ratings.

algo = SVDpp()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")

RMSE: 0.8644
MAE:  0.6739
Done!


In [0]:
# collaborative filtering algorithm based on Non-negative Matrix Factorization. It is very similar with SVD.

algo = NMF()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")  

RMSE: 0.9160
MAE:  0.7232
Done!


In [0]:
# Implementation of Slope One algorithm 

algo = SlopeOne()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")

NameError: ignored

In [0]:
# collaborative filtering algorithm based on co-clustering. 

algo = Coclustering()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")

#### k-NN algorithms (Collabrative filtering) :

Note: KNNBaseline() seems to be giving the best result in KNN approaches

In [0]:
# Basic collaborative filtering algo

algo = KNNBasic()

algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9272
MAE:  0.7310
Done!


In [0]:
# basic collaborative filtering algorithm taking into account a baseline rating.

algo = KNNBaseline()

algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.8978
MAE:  0.7081
Done!


In [0]:
# Basic collaborative filtering algorithm, taking into account the mean ratings of each user

algo = KNNWithMeans()

algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9316
MAE:  0.7398
Done!


In [0]:
# basic collaborative filtering algorithm, taking into account the z-score normalization of each user

algo = KNNWithZScore()

algo.fit(trainset)
predictions = algo.test(testset)

accuracy.rmse(predictions)
accuracy.mae(predictions)
print("Done!")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9324
MAE:  0.7375
Done!


In [0]:
import pandas as pd

In [11]:
benchmark = []
# Iterate over all algorithms
for algorithm in [SVD(), SVDpp(), SlopeOne(), NMF(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:
    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE', 'MAE'], cv=5, verbose=False)
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse') 

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Don

Unnamed: 0_level_0,test_rmse,test_mae,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SVDpp,0.862614,0.673489,3256.132195,73.504739
SVD,0.873984,0.685826,55.877482,2.63271
KNNBaseline,0.895069,0.706415,62.627911,174.428705
SlopeOne,0.906634,0.71451,16.042697,64.904856
BaselineOnly,0.908617,0.719422,4.586215,2.908219
CoClustering,0.915561,0.717795,19.347667,2.986175
NMF,0.915935,0.723579,55.372593,2.918873
KNNBasic,0.923024,0.727552,56.279947,150.852565
KNNWithMeans,0.929287,0.738663,57.593617,163.850462
KNNWithZScore,0.93072,0.736607,54.722606,161.066698
