## colab settings

In [1]:
import os

prj_name = 'movielens'
prj_path = '/content/drive/My Drive/colab/study/recommender_system/'\
        + prj_name + '/'
os.chdir(prj_path + 'notebooks/')

## settings

In [2]:
%load_ext autoreload
%autoreload 2

import sys

sys.path.append('..')

# main

In [3]:
interimpath = '../data/interim/'

In [4]:
from importlib import import_module

import torch
import torch.nn as nn

!pip install pytorch-ignite
from ignite.metrics import Loss

Collecting pytorch-ignite
[?25l  Downloading https://files.pythonhosted.org/packages/c0/8e/08569347023611e40e62a14162024ca6238d42cb528b2302f84d662a2033/pytorch_ignite-0.4.1-py2.py3-none-any.whl (166kB)
[K     |██                              | 10kB 28.4MB/s eta 0:00:01[K     |████                            | 20kB 3.6MB/s eta 0:00:01[K     |██████                          | 30kB 4.7MB/s eta 0:00:01[K     |███████▉                        | 40kB 5.1MB/s eta 0:00:01[K     |█████████▉                      | 51kB 4.2MB/s eta 0:00:01[K     |███████████▉                    | 61kB 4.8MB/s eta 0:00:01[K     |█████████████▊                  | 71kB 5.2MB/s eta 0:00:01[K     |███████████████▊                | 81kB 5.5MB/s eta 0:00:01[K     |█████████████████▊              | 92kB 5.9MB/s eta 0:00:01[K     |███████████████████▋            | 102kB 5.7MB/s eta 0:00:01[K     |█████████████████████▋          | 112kB 5.7MB/s eta 0:00:01[K     |███████████████████████▋        | 12

In [5]:
batch_size = 2 ** 12

loss_fn = nn.MSELoss()
opt_ = torch.optim.Adam
lr = 0.0001
val_metrics = {
        'loss': Loss(loss_fn)
        }
device = 'cuda:0'
max_epochs = 100

## model construction

In [6]:
class MatFac(nn.Module):
    def __init__(self, n, n_embed):
        super(MatFac, self).__init__()
        self.embed_user = nn.Embedding(n[0], n_embed[0])
        self.embed_item = nn.Embedding(n[1], n_embed[1])
        self.fc = nn.Linear(sum(n_embed), 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        ue = self.embed_user(x[:, 0])
        ui = self.embed_item(x[:, 1])
        x = torch.cat((ue, ui), 1)
        x = self.fc(x)
        x = self.sigmoid(x)
        return 5 * x

In [7]:
n = torch.load(interimpath + 'n_um.pt')
n_embed = (30, 30)

model = MatFac(n, n_embed)

## load data

In [8]:
from torch.utils.data import DataLoader

In [9]:
train_dataset = torch.load(interimpath + 'train_dataset.pt')
val_dataset = torch.load(interimpath + 'val_dataset.pt')
test_dataset = torch.load(interimpath + 'test_dataset.pt')

In [10]:
train_loader = DataLoader(train_dataset,
        batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset,
        batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset,
        batch_size=batch_size, shuffle=True, num_workers=2)

## train

In [14]:
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.handlers import Checkpoint, DiskSaver

def train_net(net, opt, loss_fn, val_metrics, train_loader, val_loader, device):
    net.to(device)
    def prepare_batch(batch, device, non_blocking=False):
        x, y = batch
        return x.to(device), y.to(device)
    def output_transform(x, y, y_pred, loss):
        return (y_pred.max(1)[1], y)
    trainer = create_supervised_trainer(net, opt, loss_fn, device,
            prepare_batch=prepare_batch, output_transform=output_transform)
    evaluator = create_supervised_evaluator(net, val_metrics, device,
            prepare_batch=prepare_batch)
    s = '{}: {:.2f} '
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(trainer):
        evaluator.run(train_loader)
        print('Epoch {}'.format(trainer.state.epoch))
        message = 'Train - '
        for m in val_metrics.keys():
            message += s.format(m, evaluator.state.metrics[m])
        print(message)
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(trainer):
        evaluator.run(val_loader)
        message = 'Val   - '
        for m in val_metrics.keys():
            message += s.format(m, evaluator.state.metrics[m])
        print(message)    
    return trainer

In [15]:
opt = opt_(model.parameters(), lr)

trainer = train_net(model, opt, loss_fn, val_metrics,
        train_loader, val_loader, device)
trainer.run(train_loader, max_epochs=max_epochs)

Epoch 1
Train - loss: 1.12 
Val   - loss: 1.12 
Epoch 2
Train - loss: 0.94 
Val   - loss: 0.94 
Epoch 3
Train - loss: 0.89 
Val   - loss: 0.89 
Epoch 4
Train - loss: 0.85 
Val   - loss: 0.86 
Epoch 5
Train - loss: 0.83 
Val   - loss: 0.83 
Epoch 6
Train - loss: 0.81 
Val   - loss: 0.81 
Epoch 7
Train - loss: 0.79 
Val   - loss: 0.80 
Epoch 8
Train - loss: 0.78 
Val   - loss: 0.79 
Epoch 9
Train - loss: 0.77 
Val   - loss: 0.78 
Epoch 10
Train - loss: 0.76 
Val   - loss: 0.77 
Epoch 11
Train - loss: 0.75 
Val   - loss: 0.76 
Epoch 12
Train - loss: 0.75 
Val   - loss: 0.76 
Epoch 13
Train - loss: 0.74 
Val   - loss: 0.75 
Epoch 14
Train - loss: 0.74 
Val   - loss: 0.75 
Epoch 15
Train - loss: 0.74 
Val   - loss: 0.75 
Epoch 16
Train - loss: 0.73 
Val   - loss: 0.75 
Epoch 17
Train - loss: 0.73 
Val   - loss: 0.74 
Epoch 18
Train - loss: 0.73 
Val   - loss: 0.74 
Epoch 19
Train - loss: 0.73 
Val   - loss: 0.74 
Epoch 20
Train - loss: 0.72 
Val   - loss: 0.74 
Epoch 21
Train - loss: 0.72 


State:
	iteration: 351600
	epoch: 100
	epoch_length: 3516
	max_epochs: 100
	output: <class 'tuple'>
	batch: <class 'list'>
	metrics: <class 'dict'>
	dataloader: <class 'torch.utils.data.dataloader.DataLoader'>
	seed: <class 'NoneType'>
	times: <class 'dict'>

## Test