In [None]:
import pandas as pd
from datetime import datetime
import numpy as np

def RMSE(ratings_pred, ratings):
    ratings_mask = ratings > 0
    return torch.sum((ratings_pred * ratings_mask - ratings) ** 2)

df_train = pd.read_csv('movielens100k_train.csv')
df_test = pd.read_csv('movielens100k_test.csv')
df_train['UserID'] = df_train['UserID'] - 1
df_test['UserID'] = df_test['UserID'] - 1
USERS = df_train['UserID'].max() + 1
df_train['MovieID'] = df_train['MovieID'] - 1
df_test['MovieID'] = df_test['MovieID'] - 1
MOVIES = df_train['MovieID'].max() + 1
df_train['Date'] = [datetime.utcfromtimestamp(x) for x in df_train['Timestamp']]
df_test['Date'] = [datetime.utcfromtimestamp(x) for x in df_test['Timestamp']]

MIN_DATE = min(df_train['Date'].min(), df_test['Date'].min())

df_train['Days'] = [x.days for x in df_train['Date'] - MIN_DATE] 
df_test['Days'] = [x.days for x in df_test['Date'] - MIN_DATE]

MIN_DAYS = min(df_train['Days'].min(), df_test['Days'].min())
MAX_DAYS = max(df_train['Days'].max(), df_test['Days'].max())
NO_DAY_BINS = 30
BIN_SIZE = (MAX_DAYS + 1) / NO_DAY_BINS

def get_day_bin(days):
  return int(min(max(1, days // BIN_SIZE), NO_DAY_BINS) - 1)

df = pd.concat([df_train, df_test])
ROWS = df['UserID']
COLS = df['MovieID']

print('Finished feature engineering...')
print('df shape', len(df))
print('df_train.head()', len(df_train))
print(df_train.head())
print('df_test.head()', len(df_test))
print(df_test.head())

Finished feature engineering...
df shape 100000
df_train.head() 80000
   Unnamed: 0  UserID  MovieID  Rating  Timestamp                Date  Days
0           0       0        0       5  874965758 1997-09-22 22:02:38     2
1           1       0        1       3  876893171 1997-10-15 05:26:11    25
2           2       0        2       4  878542960 1997-11-03 07:42:40    44
3           3       0        3       3  876893119 1997-10-15 05:25:19    25
4           4       0        4       3  889751712 1998-03-13 01:15:12   173
df_test.head() 20000
   Unnamed: 0  UserID  MovieID  Rating  Timestamp                Date  Days
0           0       0        5       5  887431973 1998-02-14 04:52:53   147
1           1       0        9       3  875693118 1997-10-01 08:05:18    11
2           2       0       11       5  878542960 1997-11-03 07:42:40    44
3           3       0       13       5  874965706 1997-09-22 22:01:46     2
4           4       0       16       3  875073198 1997-09-24 03:53:18    

In [None]:
from scipy.sparse import csr_matrix
import torch
import random
import numpy as np

seed = 1337
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device('cpu')
print('Using device', device)
torch.set_default_tensor_type(torch.FloatTensor)

train_data = csr_matrix((df_train['Rating'], (df_train['UserID'], df_train['MovieID'])), shape=(USERS, MOVIES), dtype=np.float32)
RMSE_train = torch.Tensor(train_data.copy().toarray()).to(device)
rows, cols = train_data.nonzero()
train_mn = train_data.min()
train_mx = train_data.max()
for i, j in zip(rows, cols):
  train_data[i, j] = (train_data[i, j] - train_mn) / (train_mx - train_mn)


test_data = csr_matrix((df_test['Rating'], (df_test['UserID'], df_test['MovieID'])), shape=(USERS, MOVIES), dtype=np.float32)
RMSE_test = torch.Tensor(test_data.copy().toarray()).to(device)
rows, cols = test_data.nonzero()
test_mn = test_data.min()
test_mx = test_data.max()
for i, j in zip(rows, cols):
  test_data[i, j] = (test_data[i, j] - test_mn) / (test_mx - test_mn)


Using device cuda:0


In [None]:
R_train = train_data
R_train_tensor = []
for i in range(USERS):
    batch = R_train[i].nonzero()[1]
    ts = torch.from_numpy(R_train[i, batch].todense().transpose().astype(np.float32)).to(device)
    R_train_tensor.append(ts)
print(len(R_train_tensor), R_train_tensor[0].shape)
print(R_train.shape)

R_train_T = train_data.transpose()
R_train_tensor_T = []
for j in range(MOVIES):
    batch = R_train_T[j].nonzero()[1]
    ts = torch.from_numpy(R_train_T[j, batch].todense().transpose().astype(np.float32)).to(device)
    R_train_tensor_T.append(ts)
print(len(R_train_tensor_T), R_train_tensor_T[0].shape)

943 torch.Size([135, 1])
(943, 1682)
1682 torch.Size([383, 1])


In [None]:
import numpy as np
from copy import deepcopy

import torch
from torch import nn
from torch.nn import functional as F
from torch.distributions.multivariate_normal import MultivariateNormal
from torch.autograd import Variable as V


def swish(x):
    return x.mul(torch.sigmoid(x))

def log_norm_pdf(x, mu, logvar):
    return -0.5*(logvar + np.log(2 * np.pi) + (x - mu).pow(2) / logvar.exp())

def reparameterize(mu, logvar):
  std = torch.exp(0.5*logvar)
  eps = torch.randn_like(std)
  return eps.mul(std).add_(mu)

def weights_init(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.normal_(m.weight, 0, 0.0001)
        torch.nn.init.constant_(m.bias, 0.001)

class Encoder(nn.Module):
    def __init__(self, hidden_dim, latent_dim, input_dim, eps=1e-1):
        super(Encoder, self).__init__()
        # self.fc1 = nn.Linear(input_dim, 40)
        # self.ln1 = nn.LayerNorm(40, eps=eps)
        # self.fc2 = nn.Linear(40, 20)
        # self.ln2 = nn.LayerNorm(20, eps=eps)
        # self.fc3 = nn.Linear(20, 10)
        # self.ln3 = nn.LayerNorm(10, eps=eps)

        # self.fc1 = nn.Linear(input_dim, 40)
        # self.ln1 = nn.LayerNorm(40, eps=eps)
        # self.fc2 = nn.Linear(40, 20)
        # self.ln2 = nn.LayerNorm(20, eps=eps)
        # self.fc3 = nn.Linear(20, 10)
        # self.ln3 = nn.LayerNorm(10, eps=eps)

        self.fc_mu = nn.Linear(input_dim, latent_dim)
        self.fc_logvar = nn.Linear(input_dim, latent_dim)

        # self.fc1.apply(weights_init)
        # self.fc2.apply(weights_init)
        # self.fc3.apply(weights_init)
        self.fc_mu.apply(weights_init)
        self.fc_logvar.apply(weights_init)
        
    def forward(self, x, dropout_rate, calculate_loss=True):
        # norm = x.pow(2).sum(dim=-1).sqrt()
        # x = x / norm[:, None]

        x = F.dropout(x, p=dropout_rate, training=self.training)
        
        # h1 = self.ln1(torch.relu(self.fc1(x)))
        # h2 = self.ln2(torch.relu(self.fc2(h1)))
        # h3 = self.ln3(torch.relu(self.fc3(h2)))
        
        mu, logvar = self.fc_mu(x), self.fc_logvar(x)

        return mu, logvar

class Decoder(nn.Module):
    def __init__(self, hidden_dim, latent_dim, input_dim, eps=1e-1):
        super(Decoder, self).__init__()

        # self.fc1 = nn.Linear(latent_dim, 10)
        # self.fc2 = nn.Linear(10, 20)
        # self.fc3 = nn.Linear(20, 40)
        self.fc4 = nn.Linear(latent_dim, input_dim)

        # self.fc1.apply(weights_init)
        # self.fc2.apply(weights_init)
        # self.fc3.apply(weights_init)
        self.fc4.apply(weights_init)

    def forward(self, z, calculate_loss=True):
        # h1 = torch.relu(self.fc1(z))
        # h2 = torch.relu(self.fc2(h1))
        # h3 = torch.relu(self.fc3(h2))
        x = torch.sigmoid(self.fc4(z))

        return x

class VAE(nn.Module):
    def __init__(self, hidden_dim, latent_dim, input_dim):
        super(VAE, self).__init__()

        self.encoder = Encoder(hidden_dim, latent_dim, input_dim)
        self.decoder = Decoder(hidden_dim, latent_dim, input_dim)
        self.zr = torch.zeros(1, latent_dim).to(device=device)
        self.mones = torch.ones(1, latent_dim).to(device) * -1


    def forward(self, user_ratings, alpha=0.5, beta=None, gamma=1, dropout_rate=0.5, calculate_loss=True, n_epoch=1):
        mu, logvar = self.encoder(user_ratings, dropout_rate=dropout_rate, calculate_loss=calculate_loss)    
        z = reparameterize(mu, logvar)
        x_pred = self.decoder(z, calculate_loss=calculate_loss)
        
        if calculate_loss:
            if gamma:
                kl_weight = gamma * 10
            elif beta:
                kl_weight = beta

            user_mask = user_ratings > 0
            mll = torch.pow(x_pred * user_mask - user_ratings, 2).sum(dim=-1).mul(kl_weight).mean()
            # mll = (-torch.log(x_pred) * user_ratings).sum(dim=-1).mul(kl_weight).mean()
            # prior = log_norm_pdf(z, self.zr, self.zr)
            kld = (log_norm_pdf(z, mu, logvar)).sum(dim=-1).mul(kl_weight).mean()
            negative_elbo = mll + kld
            
            return mu, negative_elbo
        else:
            return mu


class DualVAE(nn.Module):
    def __init__(self, hidden_dim, latent_dim, input_dim_users, input_dim_movies, eps=1e-1):
        super(DualVAE, self).__init__()
        self.movies_VAE = VAE(hidden_dim, latent_dim, input_dim_users)
        self.users_VAE = VAE(hidden_dim, latent_dim, input_dim_movies)

    def forward(self, ratings, beta=None, gamma=1, dropout_rate=0.5, calculate_loss=True, n_epoch=1):
        if calculate_loss:
          mu_users, user_loss = self.users_VAE(ratings, beta=beta, gamma=gamma, dropout_rate=dropout_rate, calculate_loss=calculate_loss, n_epoch=n_epoch)
          mu_movies, movie_loss = self.movies_VAE(ratings.T, beta=beta, gamma=gamma, dropout_rate=dropout_rate, calculate_loss=calculate_loss, n_epoch=n_epoch)
          y_hat = mu_users @ mu_movies.T

          ratings_mask = ratings > 0
          mse = torch.pow(y_hat * ratings_mask - ratings, 2).mean()
          # normalizer = 0.5 * self.lambda_u * torch.sum(self.u_dev * torch.pow(self.U - z_i, 2).unsqueeze(2)) + 0.5 * self.lambda_v * torch.sum(self.v_dev * torch.pow(self.V - z_j, 2).unsqueeze(2))
          total_loss = user_loss + movie_loss + mse
          return total_loss, user_loss, movie_loss, mse
        
        mu_users = self.users_VAE(ratings, beta=beta, gamma=gamma, dropout_rate=dropout_rate, calculate_loss=calculate_loss, n_epoch=n_epoch)
        mu_movies = self.movies_VAE(ratings.T, beta=beta, gamma=gamma, dropout_rate=dropout_rate, calculate_loss=calculate_loss, n_epoch=n_epoch)
        y_hat = mu_users @ mu_movies.T

        return y_hat

In [None]:
import numpy as np

import torch
from torch import optim

import random
from copy import deepcopy
from tqdm import trange

args = {
    'dataset': '',
    'hidden_dim': 40,
    'latent_dim': 5,
    'batch_size': train_data.count_nonzero(),
    'beta': None,
    'gamma': 1,
    'lr': 1e-6,
    'n_epochs': 50000,
    'dropout_rate': 0.5,
    'print_step': 100,
    'n_enc_epochs': 3,
    'n_dec_epochs': 1,
    'not_alternating': True,
}

user_losses = []
movie_losses = []
mse_losses = []


def generate(batch_size, device, data_in, data_out=None, shuffle=False, samples_perc_per_epoch=1):
    yield Batch(device, [], data_in, data_out)


class Batch:
    def __init__(self, device, idx, data_in, data_out=None):
        self._device = device
        self._idx = idx
        self._data_in = data_in
        self._data_out = data_out
        
    def get_ratings(self, is_out=False):
        data = self._data_in
        return data
    
    def get_ratings_to_dev(self, is_out=False):
        ratings = self.get_ratings(is_out)
        return torch.Tensor(ratings.toarray()).to(self._device)


def evaluate(model, data_in, data_out, RMSE_data, metrics, data_mn, data_mx, samples_perc_per_epoch=1, batch_size=500):
    metrics = deepcopy(metrics)
    model.eval()
    
    for m in metrics:
        m['score'] = []
    
    for batch in generate(batch_size=batch_size,
                          device=device,
                          data_in=data_in,
                          data_out=data_out,
                          samples_perc_per_epoch=samples_perc_per_epoch
                         ):
        
        ratings = batch.get_ratings_to_dev()
        ratings_pred = model(ratings, calculate_loss=False) * (data_mx - data_mn) + data_mn
        for m in metrics:
            x = m['metric'](ratings_pred, RMSE_data)
            m['score'].append(x.cpu().detach())
    for m in metrics:
        m['score'] = np.sqrt(np.sum(m['score']) / data_in.count_nonzero())
        
    return [x['score'] for x in metrics]


def run(model, opts, train_data, batch_size, n_epoch, n_epochs, beta, gamma, dropout_rate):
    model.train()
    for epoch in range(n_epochs):
        for batch in generate(batch_size=batch_size, device=device, data_in=train_data, shuffle=False):
            ratings = batch.get_ratings_to_dev()

            for optimizer in opts:
                optimizer.zero_grad()
              
            loss, user_loss, movie_loss, mse = model(ratings, beta=beta, gamma=gamma, dropout_rate=dropout_rate, n_epoch=n_epoch)
            loss.backward()

            user_losses.append(user_loss)
            movie_losses.append(movie_loss)
            mse_losses.append(mse)

            for optimizer in opts:
                optimizer.step()


model_kwargs = {
    'hidden_dim': args['hidden_dim'],
    'latent_dim': args['latent_dim'],
    'input_dim_users': train_data.shape[0],
    'input_dim_movies': train_data.shape[1]
}
metrics = [{'metric': RMSE}]

best_ndcg = -np.inf
train_scores, valid_scores = [], []

model = DualVAE(**model_kwargs).to(device)
batch = Batch(device, [], train_data, train_data)
ratings = batch.get_ratings_to_dev()
model_best = DualVAE(**model_kwargs).to(device)

learning_kwargs = {
    'model': model,
    'train_data': train_data,
    'batch_size': args['batch_size'],
    'beta': args['beta'],
    'gamma': args['gamma']
}

optimizer = optim.Adam(set(model.parameters()), lr=args['lr'], weight_decay=1)
import time
for epoch in range(args['n_epochs']):
    if epoch % args['print_step'] == 0:
        t = time.time()

    run(opts=[optimizer], n_epoch=epoch, n_epochs=1, dropout_rate=args['dropout_rate'], **learning_kwargs)

    train_scores.append(
        evaluate(model, train_data, train_data, RMSE_train, metrics, train_mn, train_mx, 1, batch_size=args['batch_size'])[0]
    )
    valid_scores.append(
        evaluate(model, test_data, test_data, RMSE_test, metrics, test_mn, test_mx, 1, batch_size=args['batch_size'])[0]
    )
    if epoch % args['print_step'] == 0:
      print('Epoch:', epoch, 'train_RMSE=', f'{train_scores[-1]:.4f}', 'test_RMSE', f'{valid_scores[-1]:.4f}')
      print('Time:', time.time() - t)

Epoch: 0 train_RMSE= 3.7014 test_RMSE 3.7193
Time: 0.2701749801635742
Epoch: 100 train_RMSE= 3.7010 test_RMSE 3.7192
Time: 0.014446735382080078
Epoch: 200 train_RMSE= 3.6955 test_RMSE 3.7185
Time: 0.01509857177734375
Epoch: 300 train_RMSE= 3.6769 test_RMSE 3.7160
Time: 0.014678716659545898
Epoch: 400 train_RMSE= 3.6365 test_RMSE 3.7106
Time: 0.014102697372436523
Epoch: 500 train_RMSE= 3.5740 test_RMSE 3.7021
Time: 0.014379501342773438
Epoch: 600 train_RMSE= 3.4926 test_RMSE 3.6907
Time: 0.014168977737426758
Epoch: 700 train_RMSE= 3.3964 test_RMSE 3.6768
Time: 0.014819622039794922
Epoch: 800 train_RMSE= 3.2897 test_RMSE 3.6607
Time: 0.014062881469726562
Epoch: 900 train_RMSE= 3.1766 test_RMSE 3.6426
Time: 0.014884233474731445
Epoch: 1000 train_RMSE= 3.0616 test_RMSE 3.6229
Time: 0.014086723327636719


KeyboardInterrupt: ignored