In [11]:
import pandas as pd
import numpy as np
from collections import Counter

from river import optim
from river import reco
from river import metrics

import pickle

In [2]:
df = pd.read_pickle("../data/trainset.pkl")

In [3]:
trainset = df[["user_id", "anime_id", "rating"]].to_dict(orient="records")

In [4]:
del df

In [5]:
trainset[0]

{'user_id': 0, 'anime_id': 1004, 'rating': 5}

In [6]:
# test_id = df["user_id"].max() + 1
# print(test_id)

# test_anime = [
#     "Angel Beats!",
#     "Mahoutsukai Precure!",
#     "Ookami to Koushinryou",
#     "Shinsekai yori",
#     "Shugo Chara!",
#     "Uma Musume: Pretty Derby (TV)",
#     "Fate/Zero",
#     "Uchuu Senkan Yamato 2199",
# ]

In [7]:
metric = metrics.MAE() + metrics.RMSE()

In [8]:
biased_mf_params = {
    "n_factors": 10,
    "bias_optimizer": optim.SGD(0.025),
    "latent_optimizer": optim.SGD(0.05),
    "weight_initializer": optim.initializers.Zeros(),
    "latent_initializer": optim.initializers.Normal(mu=0.0, sigma=0.1, seed=73),
    "l2_bias": 0.0,
    "l2_latent": 0.0,
}

model = reco.BiasedMF(**biased_mf_params)

In [9]:
for data in trainset:
    y_pred = model.predict_one(user=data["user_id"], item=data["anime_id"])
    metric = metric.update(data["rating"], y_pred)
    model = model.learn_one(
        user=data["user_id"], item=data["anime_id"], y=data["rating"]
    )
    # _ = model.learn_one(**x, y=y)

In [10]:
metric

MAE: 0.934269, RMSE: 1.235888

In [12]:
with open("../data/model.pkl", "wb") as f:
    pickle.dump(model, f)