In [1]:
%config Completer.use_jedi = False

In [2]:
import os
import sys
import time
import pandas as pd
import numpy as np
import scipy
from scipy import sparse as sp
from matplotlib import pyplot as plt
import pickle
from tqdm import tqdm
import datetime

from scripts.multiple_logging import setup_logger
from scripts.utils import convert_ids_to_ordered, MovingAverage
from scripts.mnap import compute_mnap

In [3]:
X_train = sp.load_npz("sparse_data/X_train.npz")
X_test = sp.load_npz("sparse_data/X_test.npz")
y_train = pd.read_csv('sparse_data/y_train.csv')
y_test = pd.read_csv("sparse_data/y_test.csv")

In [4]:
y_train = y_train.rating
y_test = y_test.rating

## Train fastFM

In [5]:
from fastFM import mcmc, als, sgd
from sklearn.metrics import mean_squared_error

In [6]:
rank = 4
seed = 15
step_size = 0.3
init_stdev = 0.1
l2_reg_w = 0.1
l2_reg_V = 0.1

In [7]:
# baseline to outperform
np.sqrt(np.mean((y_test - np.mean(y_train))**2))

1.2059134375722442

### SGD

In [8]:
# rmse_test_re = []
# rmse_train_re = [0]
# iterations = range(1, 2000, 50)
# for i in iterations:
#     fm = sgd.FMRegression(n_iter=i, l2_reg_w=l2_reg_w,l2_reg_V=l2_reg_V, rank=rank, random_state=seed, step_size=step_size, init_stdev=init_stdev)
#     rmse_test_re.append(np.sqrt(mean_squared_error(fm.predict(X_test), y_test)))
#     rmse_train_re.append(np.sqrt(mean_squared_error(fm.predict(X_train), y_train)))
#     print(rmse_train_re[-1], rmse_test_re[-1])

### MCMC

In [9]:
machine = mcmc.FMRegression(n_iter=0, rank=8, init_stdev=init_stdev)

In [10]:
rmse_test_re = []
machine.fit_predict(X_train, y_train, X_test)
for i in range(20):
    y_pred = machine.fit_predict(X_train, y_train, X_test, n_more_iter=5)
    rmse_test_re.append(np.sqrt(mean_squared_error(y_pred, y_test)))
    print(rmse_test_re[-1])

1.177225323355309
1.1735614110654096
1.171150916193435
1.1691861613692267
1.1674081657716926
1.1658775328782305
1.1645793587001
1.1635162045052183
1.1625854835399216
1.1618379492018531
1.1612348323362867
1.1606946664195013
1.1602224973625421
1.1598668121641735
1.1595308071180699
1.159245143512729
1.1589831344015165
1.1587989776480796
1.158625977620565
1.1584256485851734


### ALS

In [11]:
from fastFM import als

In [15]:
machine = als.FMRegression(n_iter=0, init_stdev=0.1, rank=4, l2_reg_w=4, l2_reg_V=40)
machine.fit(X_train, y_train)

FMRegression(l2_reg_V=40, l2_reg_w=4, n_iter=0, rank=4)

In [16]:
rmse_test_re = []
rmse_train_re = []
for i in range(8):
    rmse_test_re.append(np.sqrt(mean_squared_error(machine.predict(X_test), y_test)))
    rmse_train_re.append(np.sqrt(mean_squared_error(machine.predict(X_train), y_train)))
    print(rmse_train_re[-1], rmse_test_re[-1])
    machine.fit(X_train, y_train, n_more_iter=1)

4.145479843069438 4.141492051192489
0.8782549788422414 1.159014859039651
0.8740560818768811 1.1571669479246152
0.863527809112829 1.1573504539292965
0.8558919040611088 1.1576292488873803
0.8512374718523829 1.1580290655968581
0.8471834931034479 1.1585127070689307
0.843084513507544 1.159143585536511
