In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils import data

# For data preprocess
import numpy as np
import csv
import os

# For plotting
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from utils.mylib import *
from d2l import torch as d2l


config = {
    'sava_path': 'models/MF_1M.pth',
    'batch_size': 500
}
init_Seed()
device = get_device()                 # get the current available device ('cpu' or 'cuda')

data_train = '../data/ML1M/ML1M_copy1_train.txt'
data_test = '../data/ML1M/ML1M_copy1_test.txt'

tr_set = prep_dataloader(data_train, 'train', config['batch_size'])
dv_set = prep_dataloader(data_test, 'dev', config['batch_size'])
# tt_set = prep_dataloader("data/ML100K/ML100K_copy1_test.txt", 'test', config['batch_size'], target_only=target_only)

GPU ready!
Max user: 6040
Max item: 3952
Finished reading the train set of MoviesLen Dataset (600126 samples found, each dim = 2)
Max user: 6040
Max item: 3952
Finished reading the dev set of MoviesLen Dataset (200041 samples found, each dim = 2)


In [4]:
model = MF().to(device)
ckpt = torch.load(config['sava_path'], map_location='cpu')  # Load your best model
model.load_state_dict(ckpt)
# plot_pred(dv_set, model, device)  # Show prediction on the validation set
print(dev(dv_set, model, device))

0.7528189614542324


In [3]:
config = {
    'n_epochs': 2000,              # maximum number of epochs
    'batch_size': 50,               # mini-batch size for dataloader
    'optimizer': 'Adam',              # optimization algorithm (optimizer in torch.optim)
    'optim_hparas': {                # hyper-parameters for the optimizer (depends on which optimizer you are using)
        'lr': 0.01,                # learning rate
        # 'weight_decay': 0.001
        # 'momentum': 0.9              # momentum for SGD
    },
    'early_stop': 3,               # early stopping epochs (the number epochs since your model's last improvement)
    'save_path': 'models/model.pth',  # your model will be saved here
    'D': 50
}

In [4]:
class Envoy(nn.Module):
    def __init__(self, n_factors=20):
        super(Envoy, self).__init__()
        
        self.net = nn.Sequential(nn.Linear(n_factors, 1000 * n_factors), nn.ReLU(), nn.Linear(1000 * n_factors, n_factors))

        self.init_net()
        
        self.criterion = nn.MSELoss(reduction='mean')
    
    def forward(self, X):
        # return torch.einsum('ij, ij -> i', [self.U[user], self.P[item] @ self.Q])
        return self.net(X)

    def cal_loss(self, pred, target):
        ''' Calculate loss '''
        return self.criterion(pred, target)
    
    def init_net(self):
        def init_weights(m):
            if type(m) == nn.Linear:
                nn.init.normal_(m.weight, std=0.1)
        self.net.apply(init_weights)

In [5]:
def prep_dataloader(X, Y, batch_size, n_jobs=0, mode='train'):
    ''' Generates a dataset, then is put into a dataloader. '''
    dataset = data.TensorDataset(X, Y)
    return data.DataLoader(dataset, batch_size, shuffle=(mode == 'train'), num_workers=n_jobs)                           # Construct dataloader

In [6]:
V = torch.load("./VTensor.pt").data.to('cpu')
V_tilde = torch.rand(4050, 20)

In [7]:
# tr_set = prep_dataloader(V_tilde[:500], V[:500], config['batch_size'], n_jobs=10, mode='train')

In [8]:
def train(V_tilde, V):
    n_epochs = config['n_epochs']  # Maximum number of epochs
    batch_size = config['batch_size']

    # Setup optimizer
    optimizer = getattr(torch.optim, config['optimizer'])(
        model.parameters(), **config['optim_hparas'])

    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=config['step_size'], gamma=config['gamma'])


    min_mse = 1000.
    loss_record = {'train': [], 'dev': []} 
    early_stop_cnt = 0
    epoch = 0

    cnt = 0

    while epoch < n_epochs:
        model.train()

        optimizer.zero_grad()    
        X, Y = V_tilde.to(device), V.to(device)
        Y_hat = model(X)

        mse_loss = model.cal_loss(Y_hat, Y)
        
        mse_loss.backward()
        
        optimizer.step()

        loss_record['train'].append(mse_loss.detach().cpu().item())

        print("epoch: {:4d} train_loss: {:.4f}".format(epoch, np.sqrt(mse_loss.detach().cpu().item())))

        cnt += 1
        if cnt == 50:
            cnt = 0
            V_tilde = model(X).detach()

        # for X, Y in tr_set:
        #     optimizer.zero_grad()    
        #     X, Y = X.to(device), Y.to(device)
        #     Y_hat = model(X)

        #     mse_loss = model.cal_loss(Y_hat, Y)
            
        #     mse_loss.backward()
            
        #     optimizer.step()

        #     loss_record['train'].append(mse_loss.detach().cpu().item())

        #     print("train_loss: {:.4f}".format(np.sqrt(mse_loss.detach().cpu().item())))

        # scheduler.step()
        
        epoch += 1

        # dev_mse = dev(tr_set, model, device)

        # print("epoch = {:4d} dev_loss: {:.4f}".format(epoch, np.sqrt(dev_mse)))

        # if dev_mse < min_mse:
        #     min_mse = dev_mse
        #     early_stop_cnt = 0
        #     print("Saving model (epoch = {:4d}  loss = {:.4f} )".format(epoch, np.sqrt(dev_mse)))
        #     torch.save(model.state_dict(), config['save_path'])
        # else:
        #     early_stop_cnt += 1
        
        
        # loss_record['dev'].append(dev_mse)

        # if early_stop_cnt > config['early_stop']:
        #     break

    print("Finish training after {} epochs".format(epoch))
    return min_mse, loss_record


In [9]:
def dev(dv_set, model, device):
    model.eval()                                # set model to evalutation mode
    total_loss = 0
    for X, y in dv_set:                         # iterate through the dataloader
        X, y = X.to(device), y.to(device)       # move data to device (cpu/cuda)
        with torch.no_grad():                   # disable gradient calculation
            pred = model(X)                     # forward pass (compute output)
            mse_loss = model.cal_loss(pred, y)  # compute loss
        total_loss += mse_loss.detach().cpu().item() * len(y)    # accumulate loss
        # total_loss += mse_loss.detach().cpu().item()     # accumulate loss
    total_loss = total_loss / len(dv_set.dataset)              # compute averaged loss

    return total_loss

In [10]:
model = Envoy().to(device)

In [11]:
t = train(V_tilde, V)

epoch:    0 train_loss: 0.9509
epoch:    1 train_loss: 3.3020
epoch:    2 train_loss: 1.1894
epoch:    3 train_loss: 1.2126
epoch:    4 train_loss: 1.6466
epoch:    5 train_loss: 1.4960
epoch:    6 train_loss: 1.1448
epoch:    7 train_loss: 0.8274
epoch:    8 train_loss: 0.6225
epoch:    9 train_loss: 0.5318
epoch:   10 train_loss: 0.5170
epoch:   11 train_loss: 0.5323
epoch:   12 train_loss: 0.5495
epoch:   13 train_loss: 0.5575
epoch:   14 train_loss: 0.5535
epoch:   15 train_loss: 0.5387
epoch:   16 train_loss: 0.5154
epoch:   17 train_loss: 0.4863
epoch:   18 train_loss: 0.4543
epoch:   19 train_loss: 0.4222
epoch:   20 train_loss: 0.3924
epoch:   21 train_loss: 0.3664
epoch:   22 train_loss: 0.3452
epoch:   23 train_loss: 0.3295
epoch:   24 train_loss: 0.3193
epoch:   25 train_loss: 0.3138
epoch:   26 train_loss: 0.3118
epoch:   27 train_loss: 0.3122
epoch:   28 train_loss: 0.3136
epoch:   29 train_loss: 0.3152
epoch:   30 train_loss: 0.3167
epoch:   31 train_loss: 0.3177
epoch:  

In [12]:
# plot_learning_curve(model_loss_record, title='MF model')

In [13]:
V[1:4]

tensor([[ 0.6415,  0.5864, -0.4928,  0.1856, -0.6383, -0.3647,  0.3854, -0.5441,
         -0.1961,  0.2173,  0.4751,  0.2366, -0.7045, -0.6043, -0.4009, -0.3298,
         -0.7767, -0.6387, -0.6175, -0.8895],
        [ 0.4283,  0.2832, -0.0845, -0.1470, -0.0795, -0.3145, -0.0724,  0.0375,
         -0.4534,  0.6837,  0.3904,  0.2906, -0.4650, -0.6292, -0.5667, -0.1778,
         -0.6784, -0.3649, -0.6034, -0.5467],
        [ 0.2628,  0.4229, -0.4310, -0.4454, -0.3224, -0.5331,  0.2927, -0.3426,
         -0.3448,  0.8053,  0.0897, -0.4763,  0.0247, -0.4313, -0.4606, -0.3986,
         -0.4444, -0.6274, -0.4168, -0.4746]])

In [14]:
with torch.no_grad():
    print(model.net(V_tilde[1:4].to(device)))

tensor([[ 0.4187,  0.3437, -0.1813, -0.1442, -0.3098, -0.3514,  0.3227, -0.3371,
         -0.4475,  0.3324,  0.3210,  0.2577, -0.2884, -0.2103, -0.4819, -0.1831,
         -0.1221, -0.3170, -0.2455, -0.3142],
        [ 0.5004,  0.4015, -0.2357, -0.3144, -0.3439, -0.3841,  0.5069, -0.4297,
         -0.3418,  0.2850,  0.2506,  0.3777, -0.4287, -0.2568, -0.3792, -0.2197,
         -0.2918, -0.3717, -0.1971, -0.3096],
        [ 0.8974,  0.4406, -0.3025, -0.4658, -0.4909, -0.5831,  0.4129, -0.2776,
         -0.2709,  0.3251,  0.3821,  0.2295, -0.4354, -0.3237, -0.5151, -0.4819,
         -0.2715, -0.5388, -0.5477, -0.4067]], device='cuda:0')
