## Online Factorization Machine
Online factorization models take single data as an input, make a prediction, and train with the data. This notebook demonstrates fitting online models with an adam optimizer and those with hedge backpropagation (HBP) to criteo data.

In [1]:
import sys
sys.path.append('./../')

from utils import data_preprocess, plot
import os
import pickle
import numpy as np
import torch

from time import time

from models.models_online.FM_FTRL import FM_FTRL
from models.models_online.SFTRL_CCFM import SFTRL_CCFM
from models.models_online.SFTRL_Vanila import SFTRL_Vanila
from models.models_online.RRF_Online import RRF_Online

from utils.data_manager  import *

Tensor_type = torch.DoubleTensor


In [10]:
model_option_list = ['FTRL','SFTRL_C','SFTRL_V','Online_RFF']
m = 40
lr_FM = 0.005
task = 'cls'

In [22]:
if task == 'reg':
    nbUsers = 943
    nbMovies = 1682
    nbFeatures = nbUsers + nbMovies
    nbRatingsTrain = 90570
    nbRatingsTest = 9430
    data_dir = os.getcwd() + '/../dataset/ml-100k/'

    #filename1, filename2 = 'ub.base', 'ub.test'
    filename1, filename2 = 'ua.base', 'ua.test'
    _, x_train, y_train, rate_train, timestamp_train = load_dataset_movielens(data_dir + filename1,
                                                                              nbRatingsTrain,
                                                                              nbFeatures,
                                                                              nbUsers)
    # sort dataset in time
    #x_train_s, rate_train_s, _ = sort_dataset(x_train, rate_train, timestamp_train)
    x_train_s, rate_train_s, _ = sort_dataset_movielens(x_train, rate_train, timestamp_train)

    
    want_permute = True
    if want_permute :
        idx = np.random.permutation(x_train_s.shape[0])
        x_train_s = x_train_s[idx]
        rate_train_s = rate_train_s[idx]
    else:
        pass
    
    x_train_s = x_train_s.todense()

elif task == 'cls':
    data_dir = './../dataset/cod-rna2/'
    filename = "cod-rna2.scale"
    X, y = load_svmlight_file(data_dir + filename, n_features=8)
    X = X.toarray()

    want_permute = False
    if want_permute :
        idx = np.random.permutation(X.shape[0])
        x_train_s = np.asarray(X[idx])
        rate_train_s = np.asarray(y[idx])
    else:
        x_train_s = np.asarray(X)
        rate_train_s = np.asarray(y)

else:
    raise ValueError

In [24]:
x_train_s

array([[ 0.179872 ,  0.848485 , -0.810701 , ..., -0.810701 ,  0.260379 ,
         0.0858314],
       [ 0.222698 ,  0.939394 , -0.868804 , ..., -0.868804 ,  0.41109  ,
         0.0472483],
       [ 0.30621  ,  0.69697  , -0.780102 , ..., -0.780102 ,  0.44365  ,
        -0.0794641],
       ...,
       [ 0.446467 , -0.727273 , -0.436509 , ..., -0.506946 , -0.257884 ,
         0.198159 ],
       [ 0.478587 , -0.727273 , -0.436509 , ..., -0.366072 , -0.173699 ,
         0.103567 ],
       [ 0.480728 , -0.727273 , -0.520666 , ..., -0.506946 , -0.0895143,
         0.103567 ]])

In [28]:
model_option = 'SFTRL_C'

In [29]:
assert(model_option in model_option_list)
if model_option is 'FTRL':
    Model_FM_FTRL = FM_FTRL(Tensor_type(x_train_s),Tensor_type(rate_train_s) , task,lr_FM, m )
    pred_F, _ , time = Model_FM_FTRL.online_learning()

elif model_option is 'SFTRL_C':
    Model_SFTRL_C = SFTRL_CCFM(Tensor_type(x_train_s),Tensor_type(rate_train_s) , task,lr_FM,m )
    pred_F, _ , time = Model_SFTRL_C.online_learning()

elif model_option is 'SFTRL_V':
    Model_SFTRL_V = SFTRL_Vanila(Tensor_type(x_train_s),Tensor_type(rate_train_s) , task,lr_FM,m )
    pred_F, _ , time = Model_SFTRL_V.online_learning()

elif model_option in 'Online_RFF':
    inputs_matrix, outputs = Tensor_type(x_train_s), Tensor_type(rate_train_s)
    Online_RRF = RRF_Online(inputs_matrix,
                            outputs,
                            'cls')

    pred_RRF = Online_RRF.online_learning()

    #print(type(x_train_s))
    

SFTRL_CCFM_0.005_40_start
 0 th : pred 1.000000 , real -1.000000 
 1000 th : pred -1.000000 , real -1.000000 
 2000 th : pred -1.000000 , real 1.000000 
 3000 th : pred -1.000000 , real -1.000000 
 4000 th : pred -1.000000 , real -1.000000 
 5000 th : pred -1.000000 , real -1.000000 
 6000 th : pred -1.000000 , real -1.000000 
 7000 th : pred -1.000000 , real -1.000000 
 8000 th : pred -1.000000 , real -1.000000 
 9000 th : pred -1.000000 , real -1.000000 
 10000 th : pred -1.000000 , real -1.000000 
 11000 th : pred -1.000000 , real -1.000000 
 12000 th : pred -1.000000 , real -1.000000 
 13000 th : pred -1.000000 , real -1.000000 
 14000 th : pred -1.000000 , real -1.000000 
 15000 th : pred -1.000000 , real -1.000000 
 16000 th : pred -1.000000 , real -1.000000 
 17000 th : pred -1.000000 , real -1.000000 
 18000 th : pred -1.000000 , real -1.000000 
 19000 th : pred -1.000000 , real -1.000000 
 20000 th : pred -1.000000 , real -1.000000 
 21000 th : pred -1.000000 , real -1.000000 