In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import KNNWithZScore
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

from read_and_split_data import split_data, filter_animes_without_grade

In [2]:
def get_dataset():
    anime = pd.read_parquet("../datasets/anime.parquet")
    anime = anime[["anime_id", "type"]]
    users = pd.read_parquet("../datasets/users.parquet")
    base_df = users.merge(anime, on="anime_id", how="left")
    return base_df

base_df = get_dataset()
data = split_data(base_df)

In [3]:
kf = KFold(n_splits=5)

In [4]:
def predict_and_error(data, algo, path):
    fold = 0
    predictions_list = []
    for trainset, testset in kf.split(data):
        algo.fit(trainset)
        predictions = algo.test(testset)
        predictions_list.append(predictions)
        # Mean Squared Error
        accuracy.rmse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mae(predictions, verbose=True)
        # Fraction of Concordant Pairs
        accuracy.fcp(predictions, verbose=True)
        base_df = pd.DataFrame(columns=["user", "anime", "actual", "est", "details"], data=predictions)
        base_df.to_csv(f"{path}_{fold}.csv")
        base_df.to_parquet(f"{path}_{fold}.parquet")
        fold += 1
    return predictions

In [5]:
def set_axis_style(ax, labels):
    ax.xaxis.set_tick_params(direction='out')
    ax.xaxis.set_ticks_position('bottom')
    ax.set_xticks(np.arange(1, len(labels) + 1))
    ax.set_xticklabels(labels)
    ax.set_xlim(0.25, len(labels) + 0.75)
    
def plot_violin_graph(list_of_data, title):
    # Create a figure instance
    label = ["fold 0", "fold 1", "fold 2", "fold 3"]
    fig = plt.figure()
  
    # Create an axes instance
    ax = fig.gca()
    ax.set_title(title)
    set_axis_style(ax, label)
    # Create the violinplot
    violinplot = ax.violinplot(list_of_data)
    plt.show()

### Cosine Similarity

In [6]:
algo_cossine = KNNWithZScore(k=7, sim_options={"name": "cosine", "user_based": False})

In [7]:
predictions_cosine = predict_and_error(data, algo_cossine, path="../predictions/knn_with_z_score/anime_type_tv/no_filter/knn_with_z_score_cosine")

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1630
MSE: 4.6784
MAE:  1.4778
FCP:  0.6793
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1620
MSE: 4.6742
MAE:  1.4767
FCP:  0.6805
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1614
MSE: 4.6715
MAE:  1.4771
FCP:  0.6799
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1585
MSE: 4.6593
MAE:  1.4747
FCP:  0.6791
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1610
MSE: 4.6698
MAE:  1.4774
FCP:  0.6787


### Mean Squared Difference

In [8]:
algo_mds = KNNWithZScore(k=7, sim_options={"name": "msd", "user_based": False})

In [9]:
predictions_msd = predict_and_error(data, algo_mds, path="../predictions/knn_with_z_score/anime_type_tv/no_filter/knn_with_z_score_msd")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1689
MSE: 4.7039
MAE:  1.4788
FCP:  0.6772
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1685
MSE: 4.7024
MAE:  1.4776
FCP:  0.6781
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1660
MSE: 4.6915
MAE:  1.4778
FCP:  0.6767
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1634
MSE: 4.6802
MAE:  1.4758
FCP:  0.6769
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1649
MSE: 4.6869
MAE:  1.4777
FCP:  0.6768


### Pearson

In [10]:
algo_pearson = KNNWithZScore(k=7, sim_options={"name": "pearson", "user_based": False})

In [11]:
predictions_pearson = predict_and_error(data, algo_pearson, path="../predictions/knn_with_z_score/anime_type_tv/no_filter/knn_with_z_score_pearson")

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1730
MSE: 4.7217
MAE:  1.4873
FCP:  0.6733
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1690
MSE: 4.7046
MAE:  1.4845
FCP:  0.6755
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1685
MSE: 4.7022
MAE:  1.4852
FCP:  0.6749
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1704
MSE: 4.7105
MAE:  1.4853
FCP:  0.6743
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1716
MSE: 4.7160
MAE:  1.4858
FCP:  0.6757


### Pearson Baseline

In [12]:
algo_pearson_baseline = KNNWithZScore(k=7, sim_options={"name": "pearson_baseline", "user_based": False})

In [13]:
predictions_pearson_baseline = predict_and_error(data, algo_pearson_baseline, path="../predictions/knn_with_z_score/anime_type_tv/no_filter/knn_with_z_score_pearson_baseline")

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0526
MSE: 4.2130
MAE:  1.3838
FCP:  0.7189
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0499
MSE: 4.2020
MAE:  1.3822
FCP:  0.7188
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0520
MSE: 4.2106
MAE:  1.3833
FCP:  0.7184
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0493
MSE: 4.1998
MAE:  1.3831
FCP:  0.7188
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0521
MSE: 4.2109
MAE:  1.3822
FCP:  0.7184


# Filter grade -1

### Cossine

In [14]:
base_df_without_negative = filter_animes_without_grade(base_df)

In [15]:
predictions_cosine = predict_and_error(data, algo_cossine, path="../predictions/knn_with_z_score/anime_type_tv/with_filter_remove_negative/knn_with_z_score_cosine")

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1642
MSE: 4.6836
MAE:  1.4786
FCP:  0.6787
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1615
MSE: 4.6722
MAE:  1.4764
FCP:  0.6797
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1607
MSE: 4.6687
MAE:  1.4752
FCP:  0.6794
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1619
MSE: 4.6738
MAE:  1.4770
FCP:  0.6802
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1610
MSE: 4.6701
MAE:  1.4767
FCP:  0.6799


### Msd

In [16]:
predictions_msd = predict_and_error(data, algo_mds, path="../predictions/knn_with_z_score/anime_type_tv/with_filter_remove_negative/knn_with_z_score_msd")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1654
MSE: 4.6892
MAE:  1.4771
FCP:  0.6774
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1653
MSE: 4.6886
MAE:  1.4764
FCP:  0.6775
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1674
MSE: 4.6978
MAE:  1.4784
FCP:  0.6779
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1645
MSE: 4.6853
MAE:  1.4776
FCP:  0.6753
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1664
MSE: 4.6933
MAE:  1.4771
FCP:  0.6767


### Pearson

In [17]:
predictions_pearson = predict_and_error(data, algo_pearson, path="../predictions/knn_with_z_score/anime_type_tv/with_filter_remove_negative/knn_with_z_score_pearson")

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1713
MSE: 4.7144
MAE:  1.4857
FCP:  0.6739
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1709
MSE: 4.7127
MAE:  1.4864
FCP:  0.6749
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1686
MSE: 4.7030
MAE:  1.4848
FCP:  0.6759
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1733
MSE: 4.7234
MAE:  1.4866
FCP:  0.6750
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1735
MSE: 4.7240
MAE:  1.4864
FCP:  0.6736


### Pearson Baseline

In [18]:
predictions_pearson_baseline = predict_and_error(data, algo_pearson_baseline, path="../predictions/knn_with_z_score/anime_type_tv/with_filter_remove_negative/knn_with_z_score_pearson_baseline")

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0512
MSE: 4.2072
MAE:  1.3836
FCP:  0.7197
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0520
MSE: 4.2109
MAE:  1.3824
FCP:  0.7180
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0479
MSE: 4.1939
MAE:  1.3805
FCP:  0.7174
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0524
MSE: 4.2124
MAE:  1.3836
FCP:  0.7193
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0503
MSE: 4.2038
MAE:  1.3831
FCP:  0.7184
