### Load preprocessed data

Run the script that downloads and processes the MovieLens data.
Uncomment it to run the download & processing script.

In [1]:
#!python ../src/download.py

In [2]:
import numpy as np
fh = np.load('data/dataset.npz')
# We have a bunch of feature columns and last column is the y-target
train_x = fh['train_x'].astype(np.int64)
train_y = fh['train_y']

test_x = fh['test_x'].astype(np.int64)
test_y = fh['test_y']

n_user = int(fh['n_user'])
n_item = int(fh['n_item'])
n_occu = int(fh['n_occu'])
n_rank = int(fh['n_ranks'])

train_x[:, 1] += n_user
train_x[:, 2] += n_user + n_item
train_x[:, 3] += n_user + n_item + n_occu
test_x[:, 1] += n_user
test_x[:, 2] += n_user + n_item
test_x[:, 3] += n_user + n_item + n_occu

n_feat = n_user + n_item + n_occu + n_rank

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from abstract_model import AbstractModel

In [5]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable


def l2_regularize(array):
    loss = torch.sum(array ** 2.0)
    return loss


def index_into(arr, idx):
    new_shape = (idx.size()[0], idx.size()[1], arr.size()[1])
    return arr[idx.resize(torch.numel(idx.data))].view(new_shape)


def factorization_machine(v, x=None):
    # Takes an input 2D matrix v of n vectors, each d-dimensional
    # produces output that is d-dimensional
    # v is (batchsize, n_features, dim)
    # x is (batchsize, n_features)
    # x functions as a weight array, assumed to be 1 if missing
    # Uses Rendle's trick for computing pairs of features in linear time
    batchsize = v.size()[0]
    n_features = v.size()[1]
    n_dim = v.size()[2]
    if x is None:
        x = Variable(torch.ones(v.size()))
    else:
        x = x.expand(batchsize, n_features, n_dim)
    t0 = (v * x).sum(dim=1)**2.0
    t1 = (v**2.0 * x**2.0).sum(dim=1)
    return 0.5 * (t0 - t1)

In [13]:
import torch
from torch import nn
import torch.nn.functional as F
import pytorch_lightning as pl

from pytorch_lightning.loggers import TensorBoardLogger


class MF(AbstractModel):
    def __init__(self, n_feat, 
                 train_x, train_y, test_x, test_y, 
                 k=18, c_feat=1.0, c_bias=1.0, 
                 batch_size=128):
        super().__init__()
        self.save_data(train_x, train_y, test_x, test_y)
        self.k = k
        self.n_feat = n_feat
        self.feat = nn.Embedding(n_feat, k)
        self.bias_feat = nn.Embedding(n_feat, 1)
        self.c_feat = c_feat
        self.c_bias = c_bias
        self.batch_size = batch_size

    def forward(self, inputs):
        biases = index_into(self.bias_feat.weight, inputs).squeeze().sum(dim=1)
        vectrs = index_into(self.feat.weight, inputs)
        interx = factorization_machine(vectrs).squeeze().sum(dim=1)
        logodds = biases + interx
        return logodds 

    def likelihood(self, prediction, target):
        loss_mse = F.mse_loss(prediction, target.squeeze())
        return loss_mse
    
    def prior(self):
        prior_feat = l2_regularize(self.feat.weight) * self.c_feat
        total = prior_feat + prior_feat
        return total


model = MF(n_feat, train_x, train_y, test_x, test_y)

# add a logger
logger = TensorBoardLogger("tb_logs", name="fm_model")

# We could have turned on multiple GPUs here, for example
# trainer = pl.Trainer(gpus=8, precision=16)    
trainer = pl.Trainer(max_epochs=5,
                     reload_dataloaders_every_epoch=True,
                     logger=logger)    

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


### Train model

In [14]:
trainer.test(model)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
TEST RESULTS
{'test_loss': tensor(121.6394)}
--------------------------------------------------------------------------------



{'test_loss': 121.63938903808594}

#### Run model

In [None]:
trainer.fit(model)
trainer.test(model)


  | Name      | Type      | Params
----------------------------------------
0 | feat      | Embedding | 221 K 
1 | bias_feat | Embedding | 12 K  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…






HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…