# Model based Collaborative Filtering 

https://towardsdatascience.com/how-to-build-a-model-based-recommendation-system-using-python-surprise-2df3b77ab3e5

> [How to Build a Memory-Based RecSys using Python Surprise](https://towardsdatascience.com/how-to-build-a-model-based-recommendation-system-using-python-surprise-2df3b77ab3e5)
> - Model-based models (admittedly, a weird name) use some sort of machine learning algorithm to estimate the ratings. A typical example is singular value decomposition of the user-item ratings matrix.

In [1]:
from surprise import Dataset, Reader
from time import time
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise import SVD
import pandas as pd
import numpy as np

In [2]:
file_path = '../data-processed/full-data/pid-track-binary-rating-train-data.csv'

In [3]:
reader = Reader(
     line_format='user item rating', sep=',', rating_scale = (0,1), skip_lines=1
     )

In [4]:
data = Dataset.load_from_file(file_path, reader=reader)

In [5]:
trainsetfull = data.build_full_trainset()

In [6]:
print('Number of users: ', trainsetfull.n_users, '\n')
print('Number of items: ', trainsetfull.n_items, '\n')

Number of users:  1000000 

Number of items:  1996586 



In [None]:
trainset, testset = train_test_split(data, test_size=0.2)

In [None]:
print('Number of users: ', trainset.n_users, '\n')
print('Number of items: ', trainset.n_items, '\n')

In [None]:
SVD_model = SVD()
SVD_model.fit(trainset)
predictions = SVD_model.test(testset)
accuracy.rmse(predictions)

In [None]:
train_set=pd.read_csv(file_path)

In [None]:
evaluation_set = pd.read_csv('../data-processed/full-data/evaluation-pids-ground-truth.csv')

In [None]:
evaluation_set['pred_svd'] = evaluation_set.apply(lambda x: SVD_model.predict(uid = int(x.pid), iid = x.track_uri)[4], axis=1)
evaluation_set['pred_rating'] = evaluation_set.apply(lambda x: SVD_model.predict(uid = int(x.pid), iid = x.track_uri)[3], axis=1)

In [None]:
tracks = list(train_set['track_uri'].unique())
tracks[:10]

In [None]:
evaluation_set.head(50)

In [None]:
ddd

In [None]:
from collections import defaultdict

from surprise import SVD
from surprise import Dataset


def get_top_n(predictions, n=20):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)

# Than predict ratings for all pairs (u, i) that are NOT in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

top_n = get_top_n(predictions, n=10)

# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])