# Memory based Collaborative Filtering 

https://towardsdatascience.com/how-to-build-a-memory-based-recommendation-system-using-python-surprise-55f3257b2cf4

> [How to Build a Memory-Based RecSys using Python Surprise](https://towardsdatascience.com/how-to-build-a-memory-based-recommendation-system-using-python-surprise-55f3257b2cf4)
> - Memory-based models calculate the similarities between users / items based on user-item rating pairs.

In [1]:
from surprise import Dataset, Reader
from time import time
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise import KNNWithMeans
import pandas as pd
import numpy as np

In [2]:
file_path = '../data-processed/scale-down/pid-track-binary-rating-train-data-scaled.csv'

In [3]:
reader = Reader(
    line_format='user item rating', sep=',', rating_scale = (0,1), skip_lines=1
    )

In [4]:
data = Dataset.load_from_file(file_path, reader=reader)

In [5]:
trainsetfull = data.build_full_trainset()

In [6]:
print('Number of users: ', trainsetfull.n_users, '\n')
print('Number of items: ', trainsetfull.n_items, '\n')

Number of users:  40000 

Number of items:  346295 



In [7]:
trainset, testset = train_test_split(data, test_size=0.2)

In [8]:
print('Number of users: ', trainset.n_users, '\n')
print('Number of items: ', trainset.n_items, '\n')

Number of users:  39855 

Number of items:  303133 



In [9]:
trainset_iids = list(trainset.all_items())
iid_converter = lambda x: trainset.to_raw_iid(x)
trainset_raw_iids = list(map(iid_converter, trainset_iids))

In [10]:
trainsetfull_iids = list(trainsetfull.all_items())
iid_converter = lambda x: trainsetfull.to_raw_iid(x)
trainsetfull_raw_iids = list(map(iid_converter, trainsetfull_iids))

In [None]:
t0=time()
k = 10
min_k = 2
sim_option = {'name':'pearson', 'user_based':True}
algo = KNNWithMeans(k = k, min_k = min_k, sim_option = sim_option)
algo.fit(trainset)
print(f'elapsed{(1/60*(time()-t0))}')

In [None]:
predictions = algo.test(testset)
accuracy.rmse(predictions)

In [None]:
algo.sim()

In [13]:
from surprise.model_selection import cross_validate
results = cross_validate(
    algo = algo, data = data, measures=['RMSE'], 
    cv=5, return_train_measures=True
    )

Computing the msd similarity matrix...
Done computing similarity matrix.


KeyboardInterrupt: 

In [None]:
results['test_rmse'].mean()

In [None]:
algo.fit(trainsetfull)

In [None]:
algo.predict(uid = '677004', iid = 'spotify:track:4Y3gLYlV5rD3fhirhWKtei')