In [1]:
#import all the necessary libraries
from surprise import Dataset
from surprise import Reader
import pandas as pd
import numpy as np

In [2]:
#Load the movie data 
data = Dataset.load_builtin('ml-100k')

In [3]:
#Fetch the raw ratings
raw_ratings = data.raw_ratings

#Display a sample of rating (user_id, item_id, rating, timestamp)
print(raw_ratings[:20])

[('196', '242', 3.0, '881250949'), ('186', '302', 3.0, '891717742'), ('22', '377', 1.0, '878887116'), ('244', '51', 2.0, '880606923'), ('166', '346', 1.0, '886397596'), ('298', '474', 4.0, '884182806'), ('115', '265', 2.0, '881171488'), ('253', '465', 5.0, '891628467'), ('305', '451', 3.0, '886324817'), ('6', '86', 3.0, '883603013'), ('62', '257', 2.0, '879372434'), ('286', '1014', 5.0, '879781125'), ('200', '222', 5.0, '876042340'), ('210', '40', 3.0, '891035994'), ('224', '29', 3.0, '888104457'), ('303', '785', 3.0, '879485318'), ('122', '387', 5.0, '879270459'), ('194', '274', 2.0, '879539794'), ('291', '1042', 4.0, '874834944'), ('234', '1184', 2.0, '892079237')]


In [5]:
#Build the trainset
trainset = data.build_full_trainset()

In [6]:
type(trainset)

surprise.trainset.Trainset

In [8]:
#Crate a dictionary mapping user and item indices to ratings
ratings_dict = {'user':[], 'item': [], 'rating':[]}
for u,i, r in trainset.all_ratings():
    ratings_dict['user'].append(u)
    ratings_dict['item'].append(i)
    ratings_dict['rating'].append(r)

In [9]:
ratings_dict

{'user': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  2,
  

In [10]:
#Determined the number of users and items
num_users = trainset.n_users
num_items = trainset.n_items

In [11]:
#create the rating matrix
ratings = np.zeros((num_users, num_items))
for user, item, rating in zip(ratings_dict['user'], ratings_dict['item'], ratings_dict['rating']):
    ratings[user, item] = rating

In [12]:
ratings

array([[3., 0., 0., ..., 0., 0., 0.],
       [0., 3., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 4., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [13]:
#Calculate the user- user similarity (cosine similarity)
def cosine_similarity(u, v):
    return np.dot(u, v)/(np.linalg.norm(u) * np.linalg.norm(v))

In [15]:
def user_similarity_matrix(ratings):
    num_users = ratings.shape[0]
    similarity_matrix = np.zeros((num_users, num_users))
    for i in range(num_users):
        for j in range(num_users):
            if i != j:
                similarity_matrix[i][j] = cosine_similarity(ratings[i], ratings[j])
    return similarity_matrix

In [16]:
user_sim_matrix = user_similarity_matrix(ratings)

In [18]:
#Predict rating for a user based on user similarity
def predict_user_rating(user_id, item_id, ratings, user_sim_matrix):
    num_users = ratings.shape[0]
    num_items = ratings.shape[1]
    pred_rating = 0.0
    sim_sum = 0.0
    for i in range(num_users):
        if i!= user_id and ratings[i][item_id] != 0:
            pred_rating += user_sim_matrix[user_id][i] * ratings[i][item_id]
            sim_sum += np.abs(user_sim_matrix[user_id][i])
    if sim_sum == 0:
        return 0
    return pred_rating/sim_sum

In [19]:
#Generate recommendation for a user
def recommend_items(user_id, ratings, user_sim_matrix, num_recommendations=3):
    user_rating = ratings[user_id]
    unrated_items = np.where(user_rating == 0)[0]
    predictions = []
    for item_id in unrated_items:
        pred = predict_user_rating(user_id, item_id, ratings, user_sim_matrix)
        predictions.append((item_id, pred))
    predictions.sort(key=lambda x: x[1], reverse=True) #Sorting prediction by rating order
    return [item_id for item_id, _ in predictions[:num_recommendations]]

In [20]:
#Example: Generate recommendation
recommended_items = recommend_items(0, ratings, user_sim_matrix)
print('Recommended item for user 0:', recommended_items)

Recommended item for user 0: [1038, 1130, 1150]


In [21]:
#Example: Generate recommendation
recommended_items = recommend_items(1, ratings, user_sim_matrix)
print('Recommended item for user 0:', recommended_items)

Recommended item for user 0: [1130, 1436, 1038]


In [22]:
#Example: Generate recommendation
recommended_items = recommend_items(2, ratings, user_sim_matrix)
print('Recommended item for user 0:', recommended_items)

Recommended item for user 0: [1038, 1436, 1150]


In [23]:
#Example: Generate recommendation
recommended_items = recommend_items(4, ratings, user_sim_matrix)
print('Recommended item for user 0:', recommended_items)

Recommended item for user 0: [1130, 1436, 1038]
