### Load preprocessed data

Run the script that downloads and processes the MovieLens data.
Uncomment it to run the download & processing script.

In [1]:
#!python ../src/download.py

In [6]:
import numpy as np
fh = np.load('data/dataset.npz')

# We have a bunch of feature columns and last column is the y-target
# Note pytorch is finicky about need int64 types
train_x = fh['train_x'].astype(np.int64)
train_y = fh['train_y']

# We've already split into train & test
test_x = fh['test_x'].astype(np.int64)
test_y = fh['test_y']


n_user = int(fh['n_user'])
n_item = int(fh['n_item'])
n_occu = int(fh['n_occu'])
n_rank = int(fh['n_ranks'])

In [7]:
n_user

6041

In [8]:
from abstract_model import AbstractModel

In [15]:
import torch
from torch import nn
import torch.nn.functional as F
import pytorch_lightning as pl


def l2_regularize(array):
    loss = torch.sum(array ** 2.0)
    return loss


def sample_gaussian(mu, log_var):
    var = log_var.mul(0.5).exp_().to(mu.device)
    eps = torch.FloatTensor(var.size()).normal_().to(mu.device)
    return mu + eps * var


def gaussian_kldiv(mu, log_var):
    kld = mu.pow(2).add_(log_var.exp()).mul_(-1).add_(1).add_(log_var)
    kldloss = torch.sum(kld).mul_(-0.5)
    return kldloss


class MF(AbstractModel):
    def __init__(self, n_user, n_item, 
                 k=18, c_vector=1.0, c_bias=1.0,
                 c_kld=1.0, batch_size=128):
        super().__init__()
        self.k = k
        self.n_user = n_user
        self.n_item = n_item
        self.c_bias = c_bias
        self.c_kld = c_kld
        self.batch_size = batch_size
        
        # We've added new terms here:
        self.user_mu = nn.Embedding(n_user, k)
        self.user_lv = nn.Embedding(n_user, k)
        self.item_mu = nn.Embedding(n_item, k)
        self.item_lv = nn.Embedding(n_item, k)
        
        self.bias_user = nn.Embedding(n_user, 1)
        self.bias_item = nn.Embedding(n_item, 1)
        self.bias = nn.Parameter(torch.ones(1))

    def forward(self, inputs):
        user_id = inputs[:, 0]
        item_id = inputs[:, 1]
        
        # *NEW: stochastically-sampled user & item vectors
        vector_user = sample_gaussian(self.user_mu(user_id), self.user_lv(user_id))
        vector_item = sample_gaussian(self.item_mu(item_id), self.item_lv(item_id))
        
        bias_user = self.bias_user(user_id).squeeze()
        bias_item = self.bias_item(item_id).squeeze()
        
        biases = (self.bias + bias_user + bias_item)
        ui_interaction = torch.sum(vector_user * vector_item, dim=1)
        prediction = ui_interaction + biases
        return prediction

    def likelihood(self, prediction, target):
        # MSE error between target = R_ui and prediction = p_u * q_i
        loss_mse = F.mse_loss(prediction, target.squeeze())
        return loss_mse, {"mse": loss_mse}
    
    def prior(self):
        prior_bias_user =  l2_regularize(self.bias_user.weight) * self.c_bias
        prior_bias_item = l2_regularize(self.bias_item.weight) * self.c_bias
        
        # *NEW: regularization:
        user_kld = gaussian_kldiv(self.user_mu.weight, self.user_lv.weight) * self.c_kld
        item_kld = gaussian_kldiv(self.item_mu.weight, self.item_lv.weight) * self.c_kld
        total = prior_bias_user + prior_bias_item + user_kld + item_kld
        log = dict(prior_bias_user=prior_bias_user, prior_bias_item=prior_bias_item,
                   user_kld=user_kld, item_kld=item_kld)
        return total, log

In [16]:
from pytorch_lightning.logging import WandbLogger


k = 128
c_bias = 1e-3
c_vector = 1e-5
c_kld = 1.0
model = MF(n_user, n_item,
           k=k, c_bias=c_bias, c_vector=c_vector,
           c_kld = 1.0,
           batch_size=1024)
model.save_data(train_x, train_y, train_x, train_y)

# add a logger
logger = WandbLogger(name="07_mf", project="simple_mf")

trainer = pl.Trainer(max_epochs=100, logger=logger,
                     early_stop_callback=True,
                     gpus=1, progress_bar_refresh_rate=1) 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]


### Test model

In [None]:
trainer.test(model)

#### Run model

In [None]:
trainer.fit(model)