# import Library

In [1]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from datetime import datetime

# Load File

In [2]:
data_cols = ['user id','movie id','rating','timestamp']
data_train = pd.read_csv('.\\ml-100k\\ua.base', sep='\t',names=data_cols, encoding='latin-1')
data_test = pd.read_csv('.\\ml-100k\\ua.test', sep='\t',names=data_cols, encoding='latin-1')

# Handle Timestamp 

In [3]:
data_train['timestamp'] = data_train['timestamp'].apply(datetime.fromtimestamp)
data_test['timestamp'] = data_test['timestamp'].apply(datetime.fromtimestamp)

In [4]:
data_train.head()

Unnamed: 0,user id,movie id,rating,timestamp
0,1,1,5,1997-09-23 06:02:38
1,1,2,3,1997-10-15 13:26:11
2,1,3,4,1997-11-03 15:42:40
3,1,4,3,1997-10-15 13:25:19
4,1,5,3,1998-03-13 09:15:12


In [5]:
# pandas dataframe to Surprise dataframe
reader = Reader(rating_scale=(1, 5))
data_train = Dataset.load_from_df(data_train[["user id", "movie id", "rating"]], reader)
data_test = Dataset.load_from_df(data_test[["user id", "movie id", "rating"]], reader)

In [6]:
# build the training and testing set
trainingSet = data_train.build_full_trainset()

testingSet_preset = data_test.build_full_trainset()
testingSet = testingSet_preset.build_testset()

# Setting Algo which we want

In [7]:
from surprise import KNNWithMeans

# To use item-based cosine similarity
sim_options = {
    "name": "cosine", # contains the similarity metric to use. Options are cosine, msd, pearson, or pearson_baseline. The default is msd.
    "user_based": False,  # Compute  similarities between items. A boolean that tells whether the approach will be user-based or item-based. The default is True, which means the user-based approach will be used.
}

algo = KNNWithMeans(sim_options=sim_options)

In [8]:
algo.fit(trainingSet)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x22ce2e651f0>

In [9]:
accuracy.rmse(algo.test(testingSet))

RMSE: 0.9605


0.9605088665222314

In [10]:
accuracy.mae(algo.test(testingSet))

MAE:  0.7539


0.753930247235347