In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import KNNBasic
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

from read_and_split_data import split_data, filter_animes_without_grade

In [2]:
def get_dataset():
    anime = pd.read_parquet("../datasets/anime.parquet")
    anime = anime[["anime_id", "type"]]
    users = pd.read_parquet("../datasets/users.parquet")
    base_df = users.merge(anime, on="anime_id", how="left")
    return base_df

base_df = get_dataset()
data = split_data(base_df)

In [3]:
kf = KFold(n_splits=5)

In [4]:
def predict_and_error(data, algo, path):
    fold = 0
    predictions_list = []
    for trainset, testset in kf.split(data):
        algo.fit(trainset)
        predictions = algo.test(testset)
        predictions_list.append(predictions)
        # Mean Squared Error
        accuracy.rmse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mae(predictions, verbose=True)
        # Fraction of Concordant Pairs
        accuracy.fcp(predictions, verbose=True)
        base_df = pd.DataFrame(columns=["user", "anime", "actual", "est", "details"], data=predictions)
        base_df.to_csv(f"{path}_{fold}.csv")
        base_df.to_parquet(f"{path}_{fold}.parquet")
        fold += 1
    return predictions

In [5]:
def set_axis_style(ax, labels):
    ax.xaxis.set_tick_params(direction='out')
    ax.xaxis.set_ticks_position('bottom')
    ax.set_xticks(np.arange(1, len(labels) + 1))
    ax.set_xticklabels(labels)
    ax.set_xlim(0.25, len(labels) + 0.75)
    
def plot_violin_graph(list_of_data, title):
    # Create a figure instance
    label = ["fold 0", "fold 1", "fold 2", "fold 3"]
    fig = plt.figure()
  
    # Create an axes instance
    ax = fig.gca()
    ax.set_title(title)
    set_axis_style(ax, label)
    # Create the violinplot
    violinplot = ax.violinplot(list_of_data)
    plt.show()

### Cosine Similarity

In [6]:
algo_cossine = KNNBasic(k=7, sim_options={"name": "cosine", "user_based": False})

In [7]:
predictions_cosine = predict_and_error(data, algo_cossine, path="../predictions/knn_basic/anime_type_tv/no_filter/knn_basic_cosine")

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2076
MSE: 4.8737
MAE:  1.5290
FCP:  0.6216
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2064
MSE: 4.8682
MAE:  1.5260
FCP:  0.6211
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2053
MSE: 4.8634
MAE:  1.5259
FCP:  0.6200
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2037
MSE: 4.8562
MAE:  1.5252
FCP:  0.6207
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2078
MSE: 4.8744
MAE:  1.5273
FCP:  0.6208


### Mean Squared Difference

In [8]:
algo_mds = KNNBasic(k=7, sim_options={"name": "msd", "user_based": False})

In [9]:
predictions_msd = predict_and_error(data, algo_mds, path="../predictions/knn_basic/anime_type_tv/no_filter/knn_basic_msd")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1891
MSE: 4.7920
MAE:  1.5156
FCP:  0.6400
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1911
MSE: 4.8009
MAE:  1.5176
FCP:  0.6372
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1918
MSE: 4.8039
MAE:  1.5194
FCP:  0.6380
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1864
MSE: 4.7805
MAE:  1.5155
FCP:  0.6397
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1883
MSE: 4.7885
MAE:  1.5164
FCP:  0.6374


### Pearson

In [10]:
algo_pearson = KNNBasic(k=7, sim_options={"name": "pearson", "user_based": False})

In [11]:
predictions_pearson = predict_and_error(data, algo_pearson, path="../predictions/knn_basic/anime_type_tv/no_filter/knn_basic_pearson")

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2488
MSE: 5.0570
MAE:  1.5960
FCP:  0.5805
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2443
MSE: 5.0371
MAE:  1.5933
FCP:  0.5819
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2463
MSE: 5.0458
MAE:  1.5967
FCP:  0.5796
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2444
MSE: 5.0373
MAE:  1.5953
FCP:  0.5796
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2493
MSE: 5.0593
MAE:  1.5965
FCP:  0.5795


### Pearson Baseline

In [12]:
algo_pearson_baseline = KNNBasic(k=7, sim_options={"name": "pearson_baseline", "user_based": False})

In [13]:
predictions_pearson_baseline = predict_and_error(data, algo_pearson_baseline, path="../predictions/knn_basic/anime_type_tv/no_filter/knn_basic_pearson_baseline")

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0871
MSE: 4.3559
MAE:  1.4220
FCP:  0.6861
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0885
MSE: 4.3617
MAE:  1.4218
FCP:  0.6865
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0830
MSE: 4.3390
MAE:  1.4199
FCP:  0.6867
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0851
MSE: 4.3475
MAE:  1.4208
FCP:  0.6861
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0857
MSE: 4.3501
MAE:  1.4209
FCP:  0.6868


# Filter grade -1

### Cossine

In [14]:
base_df_without_negative = filter_animes_without_grade(base_df)

In [15]:
predictions_cosine = predict_and_error(data, algo_cossine, path="../predictions/knn_basic/anime_type_tv/with_filter_remove_negative/knn_basic_cosine")

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2063
MSE: 4.8677
MAE:  1.5272
FCP:  0.6211
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2080
MSE: 4.8753
MAE:  1.5287
FCP:  0.6226
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2014
MSE: 4.8462
MAE:  1.5246
FCP:  0.6222
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2072
MSE: 4.8718
MAE:  1.5276
FCP:  0.6198
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.2052
MSE: 4.8628
MAE:  1.5258
FCP:  0.6228


### Msd

In [16]:
predictions_msd = predict_and_error(data, algo_mds, path="../predictions/knn_basic/anime_type_tv/with_filter_remove_negative/knn_basic_msd")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1880
MSE: 4.7873
MAE:  1.5167
FCP:  0.6386
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1890
MSE: 4.7919
MAE:  1.5178
FCP:  0.6379
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1863
MSE: 4.7799
MAE:  1.5147
FCP:  0.6390
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1865
MSE: 4.7808
MAE:  1.5155
FCP:  0.6394
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1916
MSE: 4.8030
MAE:  1.5192
FCP:  0.6378


### Pearson

In [17]:
predictions_pearson = predict_and_error(data, algo_pearson, path="../predictions/knn_basic/anime_type_tv/with_filter_remove_negative/knn_basic_pearson")

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2493
MSE: 5.0595
MAE:  1.5957
FCP:  0.5816
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2449
MSE: 5.0396
MAE:  1.5947
FCP:  0.5805
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2465
MSE: 5.0469
MAE:  1.5966
FCP:  0.5793
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2456
MSE: 5.0426
MAE:  1.5935
FCP:  0.5803
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2456
MSE: 5.0426
MAE:  1.5955
FCP:  0.5792


### Pearson Baseline

In [18]:
predictions_pearson_baseline = predict_and_error(data, algo_pearson_baseline, path="../predictions/knn_basic/anime_type_tv/with_filter_remove_negative/knn_basic_pearson_baseline")

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0810
MSE: 4.3306
MAE:  1.4185
FCP:  0.6867
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0897
MSE: 4.3670
MAE:  1.4230
FCP:  0.6852
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0840
MSE: 4.3432
MAE:  1.4200
FCP:  0.6867
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0923
MSE: 4.3777
MAE:  1.4248
FCP:  0.6846
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0871
MSE: 4.3559
MAE:  1.4225
FCP:  0.6859
