In [12]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from scipy import io
from surprise.prediction_algorithms.knns import KNNBasic
from surprise.prediction_algorithms.knns import KNNWithMeans
import numpy as np

In [13]:
data = io.loadmat("each_movie_data.mat")

movie_by_id = {}
with open("movie_ids.txt", "r") as file:
    for line in file:
        line = line.split(" ", 1)
        movie_by_id[int(line[0]) - 1] = line[1]

In [14]:
my_ratings = np.zeros(1682)
my_ratings[205] = 3.0
my_ratings[270] = 5.0
my_ratings[264] = 3.5
my_ratings[323] = 4.25
my_ratings[635] = 4.0
my_ratings[0] = 3.5
my_ratings[21] = 3.0
my_ratings[28] = 3.0
my_ratings[55] = 4.0
my_ratings[63] = 4.5
my_ratings[66] = 4.0
my_ratings[88] = 4.5
my_ratings[93] = 3.5
my_ratings[167] = 4.5
my_ratings[153] = 4.5
my_ratings[540] = 1.0
my_ratings[626] = 2.5
my_ratings[754] = 3.0
my_ratings[779] = 2.0
my_ratings[889] = 1.5
my_ratings[169] = 5.0

my_rated = np.array([1 if my_ratings[i] > 0 else 0 for i in range(0, my_ratings.shape[0])])

my_user_id = data["Y"].shape[1]

data["Y"] = np.append(data["Y"], my_ratings.reshape(-1, 1), axis=1)
data["R"] = np.append(data["R"], my_rated.reshape(-1, 1), axis=1)

In [15]:
user_count = len(data['Y'][0])
movie_count = len(data['Y'])
size = user_count * movie_count
data_arr = []

for user_index in range(user_count):
    for movie_index in range(movie_count):
        data_arr.append([user_index, movie_index, data['Y'][movie_index][user_index]])

dataset = pd.DataFrame(data_arr, columns=["userID", "itemID", "rating"])

In [16]:
surprise_data = Dataset.load_from_df(dataset[['userID', 'itemID', 'rating']], Reader(rating_scale=(0, 5)))

my_rating_ids = dataset[(dataset["userID"] == my_user_id) & (dataset['rating'] > 0.0)]['itemID'].to_numpy()
all_rating_ids = dataset['itemID'].unique()
ids_to_predict = np.setdiff1d(all_rating_ids, my_rating_ids)

In [17]:
trainset = surprise_data.build_full_trainset()

In [18]:
algo = KNNBasic(k=10, sim_options={'user_based': True})

algo.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1c3ad785640>

In [19]:
recommends = []
for movie in ids_to_predict:
    recommends.append([movie, algo.predict(uid=my_user_id, iid=movie).est])

recommends = pd.DataFrame(recommends, columns=['Movie id', 'Score']).sort_values('Score', ascending=False)

In [20]:
top_20 = recommends.head(20)

print('Recommendations for KKNBasic')
for idx, row in top_20.iterrows():
    print("Movie: %s with score %lf"%(movie_by_id[idx].replace('\n', ''), row['Score']))

Recommendations for KKNBasic
Movie: Chasing Amy (1997) with score 1.466420
Movie: Good Will Hunting (1997) with score 1.361157
Movie: FairyTale: A True Story (1997) with score 1.266515
Movie: Devil's Advocate, The (1997) with score 1.261284
Movie: Queen Margot (Reine Margot, La) (1994) with score 1.164451
Movie: Rainmaker, The (1997) with score 1.079269
Movie: Evita (1996) with score 1.071015
Movie: Ice Storm, The (1997) with score 1.070345
Movie: Sense and Sensibility (1995) with score 0.983317
Movie: Sabrina (1995) with score 0.977554
Movie: Maverick (1994) with score 0.971617
Movie: Scream (1996) with score 0.884735
Movie: Up Close and Personal (1996) with score 0.876407
Movie: Rosewood (1997) with score 0.868968
Movie: Ed Wood (1994) with score 0.868712
Movie: Return of the Pink Panther, The (1974) with score 0.868712
Movie: Midnight in the Garden of Good and Evil (1997) with score 0.785364
Movie: Deceiver (1997) with score 0.782037
Movie: Fried Green Tomatoes (1991) with score 0.7

In [21]:
algo = KNNWithMeans(k=10, sim_options={'user_based': True})
algo.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x1c3d1ce7c40>

In [22]:
recommends = []
for movie in ids_to_predict:
    recommends.append([movie, algo.predict(uid=my_user_id, iid=movie).est])

recommends = pd.DataFrame(recommends, columns=['Movie id', 'Score']).sort_values('Score', ascending=False)
top_20 = recommends.head(20)

print('Recommendations for KKNWithMeans')
for idx, row in top_20.iterrows():
    print("Movie: %s with score %lf"%(movie_by_id[idx].replace('\n', ''), row['Score']))

Recommendations for KKNWithMeans
Movie: Chasing Amy (1997) with score 1.474575
Movie: Good Will Hunting (1997) with score 1.369312
Movie: FairyTale: A True Story (1997) with score 1.274670
Movie: Devil's Advocate, The (1997) with score 1.269439
Movie: Queen Margot (Reine Margot, La) (1994) with score 1.172606
Movie: Rainmaker, The (1997) with score 1.087425
Movie: Evita (1996) with score 1.079170
Movie: Ice Storm, The (1997) with score 1.078501
Movie: Sense and Sensibility (1995) with score 0.991472
Movie: Sabrina (1995) with score 0.985709
Movie: Maverick (1994) with score 0.979772
Movie: Scream (1996) with score 0.892890
Movie: Up Close and Personal (1996) with score 0.884562
Movie: Rosewood (1997) with score 0.877124
Movie: Ed Wood (1994) with score 0.876868
Movie: Return of the Pink Panther, The (1974) with score 0.876868
Movie: Midnight in the Garden of Good and Evil (1997) with score 0.793519
Movie: Deceiver (1997) with score 0.790193
Movie: Fried Green Tomatoes (1991) with score