# **Collaborative Filtering**

In [None]:
# Import libraries
import pandas as pd
import numpy as np

## **Preprocessing the Data**

### Anime Dataset

In [None]:
anime_df = pd.read_csv("cleaned_anime.csv")
anime_df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64.0,9.26,793665
1,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51.0,9.25,114262
2,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24.0,9.17,673572
3,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51.0,9.16,151266
4,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10.0,9.15,93351


In [None]:
# Remove information we don't need
anime_df = anime_df.loc[:, ["anime_id", "name", "rating"]]
anime_df.head()

Unnamed: 0,anime_id,name,rating
0,5114,Fullmetal Alchemist: Brotherhood,9.26
1,28977,Gintama°,9.25
2,9253,Steins;Gate,9.17
3,9969,Gintama&#039;,9.16
4,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,9.15


### Ratings Dataset

In [None]:
rating_df = pd.read_csv("cleaned_rating.csv")
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,
1,1,24,
2,1,79,
3,1,226,
4,1,241,


In [None]:
# Remove missing values
rating_df.dropna(inplace=True)
# How many missing values do we have?
rating_df.isnull().sum()

user_id     0
anime_id    0
rating      0
dtype: int64

# **Building the Recommendation System**

In [None]:
!pip install scikit-surprise


Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3162999 sha256=b675d397d70d2c4ee0e333621f5d7a8617c204c47a9cf015cf905c146f92d807
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [None]:
%%time
# Import the surprise library
from surprise import Reader
from surprise import Dataset

# reader parses the file containing the ratings
# Our rating scale is from 1 to 10 inclusive
reader = Reader(rating_scale=(1, 10))

# Load the dataframe into the model's dataset
data = Dataset.load_from_df(rating_df[["user_id", "anime_id", "rating"]], reader)
data

CPU times: user 850 ms, sys: 528 ms, total: 1.38 s
Wall time: 2.53 s


<surprise.dataset.DatasetAutoFolds at 0x7b8ac8ad7160>

In [None]:
# Import the SVD algorithm
from surprise import SVD

In [None]:
# Import GridSearchCV to tune parameters
from surprise.model_selection import GridSearchCV

In [None]:
# This cell took a LONG time to run on my computer
# Don't suggest running it unless you have time to waste

%%time

# Create parameters combinations
params = {
    "n_epochs": [10, 15], "lr_all": [0.003, 0.005, 0.007], "reg_all": [0.01, 0.02, 0.03]
}

# Run the grid search using SVD and the parameters to find the best parameters for Root Mean Square Error and Mean Absolute Error
gs = GridSearchCV(SVD, params, measures=['rmse', 'mae'], cv=3, joblib_verbose=2, n_jobs=-2)

gs.fit(data)

[Parallel(n_jobs=-2)]: Done  40 tasks      | elapsed:  3.3min


CPU times: user 4min 40s, sys: 4.2 s, total: 4min 44s
Wall time: 4min 45s


In [None]:
# Best RMSE score
print(gs.best_score["rmse"])

# Best combination of parameters for the best RMSE
print(gs.best_params["rmse"])

1.182886645389906
{'n_epochs': 15, 'lr_all': 0.007, 'reg_all': 0.03}


In [None]:
%%time
algo = gs.best_estimator["rmse"]
algo.fit(data.build_full_trainset())

CPU times: user 5.21 s, sys: 20.7 ms, total: 5.23 s
Wall time: 5.31 s


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7b8af3d4eef0>

In [None]:
# Just testing out the prediction method on user 4271 on anime id 7088
pred = algo.predict(4271, 7088).est
pred

7.6639416590566825

In [None]:
# Import the dump function
from surprise.dump import dump
# Create the Pickle file for the SVD algorithm
dump("recommender.pkl", algo)