In [1]:
import pandas as pd
import numpy as np
from surprise import BaselineOnly
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

from read_and_split_data import split_data, filter_animes_without_grade

In [2]:
def get_dataset():
    anime = pd.read_parquet("../datasets/anime.parquet")
    anime = anime[["anime_id", "type"]]
    users = pd.read_parquet("../datasets/users.parquet")
    base_df = users.merge(anime, on="anime_id", how="left")
    return base_df

base_df = get_dataset()
data = split_data(base_df)

In [3]:
kf = KFold(n_splits=5)

In [4]:
def predict_and_error(data, algo, path):
    fold = 0
    predictions_list = []
    for trainset, testset in kf.split(data):
        algo.fit(trainset)
        predictions = algo.test(testset)
        predictions_list.append(predictions)
        # Mean Squared Error
        accuracy.rmse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mse(predictions, verbose=True)
        # Mean Absolute Erro
        accuracy.mae(predictions, verbose=True)
        # Fraction of Concordant Pairs
        accuracy.fcp(predictions, verbose=True)
        base_df = pd.DataFrame(columns=["user", "anime", "actual", "est", "details"], data=predictions)
        base_df.to_parquet(f"{path}_{fold}.parquet")
        fold += 1
    return predictions

In [5]:
algo_normal_predictor = BaselineOnly()

In [6]:
predictions = predict_and_error(data, algo_normal_predictor, path="../predictions/baseline_only/anime_type_tv/no_filter/baseline_only")

Estimating biases using als...
RMSE: 2.2240
MSE: 4.9461
MAE:  1.5715
FCP:  0.6835
Estimating biases using als...
RMSE: 2.2216
MSE: 4.9353
MAE:  1.5699
FCP:  0.6845
Estimating biases using als...
RMSE: 2.2214
MSE: 4.9348
MAE:  1.5702
FCP:  0.6842
Estimating biases using als...
RMSE: 2.2217
MSE: 4.9361
MAE:  1.5711
FCP:  0.6856
Estimating biases using als...
RMSE: 2.2242
MSE: 4.9470
MAE:  1.5700
FCP:  0.6846


In [7]:
base_df_without_negative = filter_animes_without_grade(base_df)

In [8]:
predictions_without_negative = predict_and_error(data, algo_normal_predictor, path="../predictions/baseline_only/anime_type_tv/with_filter_remove_negative/baseline_only")

Estimating biases using als...
RMSE: 2.2204
MSE: 4.9303
MAE:  1.5695
FCP:  0.6843
Estimating biases using als...
RMSE: 2.2220
MSE: 4.9371
MAE:  1.5696
FCP:  0.6837
Estimating biases using als...
RMSE: 2.2233
MSE: 4.9432
MAE:  1.5704
FCP:  0.6844
Estimating biases using als...
RMSE: 2.2249
MSE: 4.9503
MAE:  1.5719
FCP:  0.6845
Estimating biases using als...
RMSE: 2.2222
MSE: 4.9380
MAE:  1.5712
FCP:  0.6848
