In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import KNNBaseline
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

from read_and_split_data import split_data, filter_animes_without_grade

In [2]:
def get_dataset():
    anime = pd.read_parquet("../datasets/anime.parquet")
    anime = anime[["anime_id", "type"]]
    users = pd.read_parquet("../datasets/users.parquet")
    base_df = users.merge(anime, on="anime_id", how="left")
    return base_df

base_df = get_dataset()
data = split_data(base_df)

In [3]:
kf = KFold(n_splits=5)

In [4]:
def predict_and_error(data, algo, path):
    fold = 0
    predictions_list = []
    for trainset, testset in kf.split(data):
        algo.fit(trainset)
        predictions = algo.test(testset)
        predictions_list.append(predictions)
        # Mean Squared Error
        accuracy.rmse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mae(predictions, verbose=True)
        # Fraction of Concordant Pairs
        accuracy.fcp(predictions, verbose=True)
        base_df = pd.DataFrame(columns=["user", "anime", "actual", "est", "details"], data=predictions)
        base_df.to_csv(f"{path}_{fold}.csv")
        base_df.to_parquet(f"{path}_{fold}.parquet")
        fold += 1
    return predictions

In [5]:
def set_axis_style(ax, labels):
    ax.xaxis.set_tick_params(direction='out')
    ax.xaxis.set_ticks_position('bottom')
    ax.set_xticks(np.arange(1, len(labels) + 1))
    ax.set_xticklabels(labels)
    ax.set_xlim(0.25, len(labels) + 0.75)
    
def plot_violin_graph(list_of_data, title):
    # Create a figure instance
    label = ["fold 0", "fold 1", "fold 2", "fold 3"]
    fig = plt.figure()
  
    # Create an axes instance
    ax = fig.gca()
    ax.set_title(title)
    set_axis_style(ax, label)
    # Create the violinplot
    violinplot = ax.violinplot(list_of_data)
    plt.show()

### Cosine Similarity

In [6]:
algo_cossine = KNNBaseline(k=7, sim_options={"name": "cosine", "user_based": False})

In [7]:
predictions_cosine = predict_and_error(data, algo_cossine, path="../predictions/knn_baseline/anime_type_tv/no_filter/knn_baseline_cosine")

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1522
MSE: 4.6319
MAE:  1.4710
FCP:  0.6754
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1540
MSE: 4.6399
MAE:  1.4730
FCP:  0.6761
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1551
MSE: 4.6446
MAE:  1.4728
FCP:  0.6753
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1556
MSE: 4.6465
MAE:  1.4729
FCP:  0.6752
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1575
MSE: 4.6548
MAE:  1.4743
FCP:  0.6748


### Mean Squared Difference

In [8]:
algo_mds = KNNBaseline(k=7, sim_options={"name": "msd", "user_based": False})

In [9]:
predictions_msd = predict_and_error(data, algo_mds, path="../predictions/knn_baseline/anime_type_tv/no_filter/knn_baseline_msd")

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1568
MSE: 4.6520
MAE:  1.4708
FCP:  0.6742
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1552
MSE: 4.6447
MAE:  1.4698
FCP:  0.6728
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1557
MSE: 4.6471
MAE:  1.4719
FCP:  0.6737
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1554
MSE: 4.6457
MAE:  1.4718
FCP:  0.6737
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1591
MSE: 4.6618
MAE:  1.4733
FCP:  0.6720


### Pearson

In [10]:
algo_pearson = KNNBaseline(k=7, sim_options={"name": "pearson", "user_based": False})

In [11]:
predictions_pearson = predict_and_error(data, algo_pearson, path="../predictions/knn_baseline/anime_type_tv/no_filter/knn_baseline_pearson")

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1683
MSE: 4.7017
MAE:  1.4854
FCP:  0.6689
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1724
MSE: 4.7194
MAE:  1.4889
FCP:  0.6684
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1648
MSE: 4.6862
MAE:  1.4845
FCP:  0.6697
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1651
MSE: 4.6877
MAE:  1.4832
FCP:  0.6687
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1646
MSE: 4.6856
MAE:  1.4843
FCP:  0.6702


### Pearson Baseline

In [12]:
algo_pearson_baseline = KNNBaseline(k=7, sim_options={"name": "pearson_baseline", "user_based": False})

In [13]:
predictions_pearson_baseline = predict_and_error(data, algo_pearson_baseline, path="../predictions/knn_baseline/anime_type_tv/no_filter/knn_baseline_pearson_baseline")

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0542
MSE: 4.2195
MAE:  1.3826
FCP:  0.7171
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0549
MSE: 4.2225
MAE:  1.3835
FCP:  0.7159
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0381
MSE: 4.1540
MAE:  1.3756
FCP:  0.7163
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0489
MSE: 4.1979
MAE:  1.3803
FCP:  0.7178
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0475
MSE: 4.1921
MAE:  1.3778
FCP:  0.7176


# Filter grade -1

### Cossine

In [14]:
base_df_without_negative = filter_animes_without_grade(base_df)

In [15]:
predictions_cosine = predict_and_error(data, algo_cossine, path="../predictions/knn_baseline/anime_type_tv/with_filter_remove_negative/knn_baseline_cosine")

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1528
MSE: 4.6345
MAE:  1.4717
FCP:  0.6751
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1548
MSE: 4.6434
MAE:  1.4725
FCP:  0.6738
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1534
MSE: 4.6370
MAE:  1.4718
FCP:  0.6756
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1524
MSE: 4.6329
MAE:  1.4719
FCP:  0.6756
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1586
MSE: 4.6596
MAE:  1.4755
FCP:  0.6750


### Msd

In [16]:
predictions_msd = predict_and_error(data, algo_mds, path="../predictions/knn_baseline/anime_type_tv/with_filter_remove_negative/knn_baseline_msd")

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1567
MSE: 4.6514
MAE:  1.4722
FCP:  0.6734
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1572
MSE: 4.6535
MAE:  1.4720
FCP:  0.6733
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1575
MSE: 4.6549
MAE:  1.4722
FCP:  0.6737
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1563
MSE: 4.6495
MAE:  1.4717
FCP:  0.6732
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1569
MSE: 4.6522
MAE:  1.4719
FCP:  0.6723


### Pearson

In [17]:
predictions_pearson = predict_and_error(data, algo_pearson, path="../predictions/knn_baseline/anime_type_tv/with_filter_remove_negative/knn_baseline_pearson")

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1666
MSE: 4.6940
MAE:  1.4846
FCP:  0.6699
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1671
MSE: 4.6964
MAE:  1.4858
FCP:  0.6691
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1636
MSE: 4.6810
MAE:  1.4831
FCP:  0.6693
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1655
MSE: 4.6895
MAE:  1.4843
FCP:  0.6686
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1728
MSE: 4.7210
MAE:  1.4886
FCP:  0.6694


### Pearson Baseline

In [18]:
predictions_pearson_baseline = predict_and_error(data, algo_pearson_baseline, path="../predictions/knn_baseline/anime_type_tv/with_filter_remove_negative/knn_baseline_pearson_baseline")

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0514
MSE: 4.2084
MAE:  1.3805
FCP:  0.7178
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0485
MSE: 4.1965
MAE:  1.3805
FCP:  0.7177
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0497
MSE: 4.2014
MAE:  1.3795
FCP:  0.7163
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0492
MSE: 4.1991
MAE:  1.3801
FCP:  0.7175
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0469
MSE: 4.1897
MAE:  1.3797
FCP:  0.7165
