<a href="https://colab.research.google.com/github/chandan9t8/UnivProjects/blob/main/Anime%20Recommendation%20System/SVD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import sklearn
import random
from IPython.display import display
from sklearn.model_selection import GridSearchCV
from surprise import Dataset
from surprise import SVD
from surprise import Reader
from surprise.model_selection import train_test_split
from surprise import accuracy
from collections import defaultdict

In [None]:
#Import our cleaned ratings csv
rating = pd.read_csv('/content/drive/MyDrive/Data Mining Project/cleaned_ratings.csv', index_col=0)
real_rating = rating[rating['rating']!=-1]

#Import our anime database csv
anime = pd.read_csv('/content/drive/MyDrive/Data Mining Project/cleaned_anime.csv')

#Create a dict of anime by anime id
anime_by_id = anime[['anime_id', 'name']]
anime_by_id = anime_by_id.set_index('anime_id').T.to_dict('list')

In [None]:
#Making a surprise dataset for testing
dataset = Dataset.load_from_df(real_rating,reader=Reader(rating_scale=(1,10)))

# split data to trainset and testset
train, test = train_test_split(dataset,test_size=0.3,random_state=42)

In [None]:
n_factors = [50,100,150]
n_epochs = [20,30]
lr =[0.005,0.01]
reg = [0.02,0.1]

min_error = 1000

for a in n_factors:
    for b in n_epochs:
        for c in lr:
            for d in reg:
                print("Training with %s factors, %s epochs, %s learning rate, %s regularizer: " %(a, b, c, d))
                model = SVD(n_factors = a, n_epochs = b, lr_all=c, reg_all=d)
                model.fit(train)
                predict = model.test(test)
                rmse = accuracy.rmse(predict)
                if rmse < min_error:
                    min_error = rmse
                    params = [a, b, c, d]
                print("Current min_error is %s" % min_error)
print(min_error, params)

Training with 50 factors, 20 epochs, 0.005 learning rate, 0.02 regularizer: 
RMSE: 1.1446
Current min_error is 1.144646411232415
Training with 50 factors, 20 epochs, 0.005 learning rate, 0.1 regularizer: 
RMSE: 1.1734
Current min_error is 1.144646411232415
Training with 50 factors, 20 epochs, 0.01 learning rate, 0.02 regularizer: 
RMSE: 1.1793
Current min_error is 1.144646411232415
Training with 50 factors, 20 epochs, 0.01 learning rate, 0.1 regularizer: 
RMSE: 1.1527
Current min_error is 1.144646411232415
Training with 50 factors, 30 epochs, 0.005 learning rate, 0.02 regularizer: 
RMSE: 1.1585
Current min_error is 1.144646411232415
Training with 50 factors, 30 epochs, 0.005 learning rate, 0.1 regularizer: 
RMSE: 1.1541
Current min_error is 1.144646411232415
Training with 50 factors, 30 epochs, 0.01 learning rate, 0.02 regularizer: 
RMSE: 1.1953
Current min_error is 1.144646411232415
Training with 50 factors, 30 epochs, 0.01 learning rate, 0.1 regularizer: 
RMSE: 1.1388
Current min_err

In [None]:
# create a model and fit the trainset with
tuned_model = SVD(n_factors = params[0], n_epochs = params[1], lr_all = params[2], reg_all = params[3])
#tuned_model = SVD(n_factors = 150, n_epochs = 30, lr_all = 0.01, reg_all = 0.1)
tuned_model.fit(train)

#predicting on testset and evaluate
predict = tuned_model.test(test)

In [None]:
#evaluate the RMSE
accuracy.rmse(predict)
accuracy.mae(predict)

In [None]:
def create_predict_set(user_id, rating):
    anime_id_list = rating['anime_id'].unique()
    anime_watched = rating.loc[rating['user_id'] == user_id, 'anime_id']

    rec_set = np.setdiff1d(anime_id_list, anime_watched)

    return rec_set

In [None]:
def top_recs(model, test, user_id, anime, n_recs):

    predict_set = [[user_id, anime_id, None] for anime_id in test]
    predict = model.test(predict_set)
    ratings = np.array([predictions.est for predictions in predict])

    print("The top %s anime recommended for user_id %s:" % (n_recs, user_id))
    recs = (-ratings).argsort()[:n_recs]
    for i in recs:
        rec_name = test[i]
        print(anime[anime['anime_id'] == rec_name]['name'].values[0])

In [None]:
random_user = random.choice(rating['user_id'])
random_user

test = create_predict_set(random_user, rating)
num_recs = 5

top_recs(tuned_model, test, random_user, anime, num_recs)

The top 5 anime recommended for user_id 53423:
Clannad: After Story
Fullmetal Alchemist: Brotherhood
Steins;Gate
Ginga Eiyuu Densetsu
Hunter x Hunter (2011)
