In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import KNNWithMeans
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

from read_and_split_data import split_data, filter_animes_without_grade

In [2]:
def get_dataset():
    anime = pd.read_parquet("../datasets/anime.parquet")
    anime = anime[["anime_id", "type"]]
    users = pd.read_parquet("../datasets/users.parquet")
    base_df = users.merge(anime, on="anime_id", how="left")
    return base_df

base_df = get_dataset()
data = split_data(base_df)

In [3]:
kf = KFold(n_splits=5)

In [4]:
def predict_and_error(data, algo, path):
    fold = 0
    predictions_list = []
    for trainset, testset in kf.split(data):
        algo.fit(trainset)
        predictions = algo.test(testset)
        predictions_list.append(predictions)
        # Mean Squared Error
        accuracy.rmse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mae(predictions, verbose=True)
        # Fraction of Concordant Pairs
        accuracy.fcp(predictions, verbose=True)
        base_df = pd.DataFrame(columns=["user", "anime", "actual", "est", "details"], data=predictions)
        base_df.to_csv(f"{path}_{fold}.csv")
        base_df.to_parquet(f"{path}_{fold}.parquet")
        fold += 1
    return predictions

In [5]:
def set_axis_style(ax, labels):
    ax.xaxis.set_tick_params(direction='out')
    ax.xaxis.set_ticks_position('bottom')
    ax.set_xticks(np.arange(1, len(labels) + 1))
    ax.set_xticklabels(labels)
    ax.set_xlim(0.25, len(labels) + 0.75)
    
def plot_violin_graph(list_of_data, title):
    # Create a figure instance
    label = ["fold 0", "fold 1", "fold 2", "fold 3"]
    fig = plt.figure()
  
    # Create an axes instance
    ax = fig.gca()
    ax.set_title(title)
    set_axis_style(ax, label)
    # Create the violinplot
    violinplot = ax.violinplot(list_of_data)
    plt.show()

### Cosine Similarity

In [6]:
algo_cossine = KNNWithMeans(k=7, sim_options={"name": "cosine", "user_based": False})

In [7]:
predictions_cosine = predict_and_error(data, algo_cossine, path="../predictions/knn_with_means/anime_type_tv/no_filter/knn_with_means_cosine")

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1615
MSE: 4.6720
MAE:  1.4794
FCP:  0.6774
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1614
MSE: 4.6716
MAE:  1.4780
FCP:  0.6800
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1628
MSE: 4.6777
MAE:  1.4792
FCP:  0.6774
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1603
MSE: 4.6670
MAE:  1.4770
FCP:  0.6777
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1580
MSE: 4.6568
MAE:  1.4771
FCP:  0.6776


### Mean Squared Difference

In [8]:
algo_mds = KNNWithMeans(k=7, sim_options={"name": "msd", "user_based": False})

In [9]:
predictions_msd = predict_and_error(data, algo_mds, path="../predictions/knn_with_means/anime_type_tv/no_filter/knn_with_means_msd")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1625
MSE: 4.6763
MAE:  1.4766
FCP:  0.6764
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1694
MSE: 4.7065
MAE:  1.4822
FCP:  0.6743
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1661
MSE: 4.6919
MAE:  1.4776
FCP:  0.6744
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1690
MSE: 4.7046
MAE:  1.4805
FCP:  0.6751
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1651
MSE: 4.6877
MAE:  1.4786
FCP:  0.6755


### Pearson

In [10]:
algo_pearson = KNNWithMeans(k=7, sim_options={"name": "pearson", "user_based": False})

In [11]:
predictions_pearson = predict_and_error(data, algo_pearson, path="../predictions/knn_with_means/anime_type_tv/no_filter/knn_with_means_pearson")

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1732
MSE: 4.7227
MAE:  1.4864
FCP:  0.6736
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1696
MSE: 4.7074
MAE:  1.4865
FCP:  0.6726
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1718
MSE: 4.7167
MAE:  1.4865
FCP:  0.6725
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1738
MSE: 4.7254
MAE:  1.4885
FCP:  0.6734
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1742
MSE: 4.7271
MAE:  1.4896
FCP:  0.6720


### Pearson Baseline

In [12]:
algo_pearson_baseline = KNNWithMeans(k=7, sim_options={"name": "pearson_baseline", "user_based": False})

In [13]:
predictions_pearson_baseline = predict_and_error(data, algo_pearson_baseline, path="../predictions/knn_with_means/anime_type_tv/no_filter/knn_with_means_pearson_baseline")

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0524
MSE: 4.2123
MAE:  1.3851
FCP:  0.7186
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0512
MSE: 4.2075
MAE:  1.3842
FCP:  0.7177
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0544
MSE: 4.2207
MAE:  1.3846
FCP:  0.7183
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0517
MSE: 4.2096
MAE:  1.3839
FCP:  0.7168
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0492
MSE: 4.1991
MAE:  1.3817
FCP:  0.7185


# Filter grade -1

### Cossine

In [14]:
base_df_without_negative = filter_animes_without_grade(base_df)

In [15]:
predictions_cosine = predict_and_error(data, algo_cossine, path="../predictions/knn_with_means/anime_type_tv/with_filter_remove_negative/knn_with_means_cosine")

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1610
MSE: 4.6697
MAE:  1.4774
FCP:  0.6780
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1616
MSE: 4.6727
MAE:  1.4790
FCP:  0.6772
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1578
MSE: 4.6562
MAE:  1.4758
FCP:  0.6785
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1610
MSE: 4.6697
MAE:  1.4775
FCP:  0.6784
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 2.1628
MSE: 4.6778
MAE:  1.4794
FCP:  0.6777


### Msd

In [16]:
predictions_msd = predict_and_error(data, algo_mds, path="../predictions/knn_with_means/anime_type_tv/with_filter_remove_negative/knn_with_means_msd")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1694
MSE: 4.7063
MAE:  1.4821
FCP:  0.6744
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1636
MSE: 4.6813
MAE:  1.4760
FCP:  0.6754
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1662
MSE: 4.6923
MAE:  1.4791
FCP:  0.6756
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1658
MSE: 4.6907
MAE:  1.4785
FCP:  0.6747
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 2.1645
MSE: 4.6853
MAE:  1.4779
FCP:  0.6760


### Pearson

In [17]:
predictions_pearson = predict_and_error(data, algo_pearson, path="../predictions/knn_with_means/anime_type_tv/with_filter_remove_negative/knn_with_means_pearson")

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1751
MSE: 4.7310
MAE:  1.4891
FCP:  0.6717
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1743
MSE: 4.7276
MAE:  1.4891
FCP:  0.6725
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1700
MSE: 4.7087
MAE:  1.4856
FCP:  0.6735
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1737
MSE: 4.7250
MAE:  1.4870
FCP:  0.6730
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.1712
MSE: 4.7142
MAE:  1.4868
FCP:  0.6719


### Pearson Baseline

In [18]:
predictions_pearson_baseline = predict_and_error(data, algo_pearson_baseline, path="../predictions/knn_with_means/anime_type_tv/with_filter_remove_negative/knn_with_means_pearson_baseline")

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0527
MSE: 4.2136
MAE:  1.3849
FCP:  0.7175
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0527
MSE: 4.2134
MAE:  1.3840
FCP:  0.7177
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0513
MSE: 4.2080
MAE:  1.3835
FCP:  0.7177
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0495
MSE: 4.2005
MAE:  1.3825
FCP:  0.7190
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 2.0498
MSE: 4.2017
MAE:  1.3828
FCP:  0.7184
