In [1]:
import sys
import os
repo_dir = os.path.dirname(os.getcwd())
sys.path.append(repo_dir)

In [2]:
from utils.reproducibility import seed_everything
from models.mixtures import BernoulliMixture
from torch.utils.data import DataLoader
from utils.datasets import load_debd
from tqdm import tqdm
import numpy as np
import torch
import copy

device = 'cuda' if torch.cuda.is_available() else 'cpu'
gpus = None if device == 'cpu' else 1
print(device)

cuda


In [8]:
dataset_name = 'tmovie'
batch_size = 128

In [9]:
train, valid, test = load_debd(dataset_name)
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid, batch_size=batch_size, drop_last=True)
print(dataset_name, train.shape, valid.shape, test.shape)

tmovie (4524, 500) (1002, 500) (591, 500)


## Instantiate mixture

In [10]:
seed_everything(42)
n_components = 1024
model = BernoulliMixture(
    logits_p=torch.randn(n_components, train.shape[1]),
    logits_w=torch.full((n_components,), 1 / n_components),
    learn_w=False
).to(device)
opt = torch.optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-5)

## Train

In [17]:
max_num_epochs = 600 # 150
early_stopping_epochs = 30
warmup = 30

best_model = model
best_loss = np.inf
e = 0

for epoch in range(max_num_epochs):
    model.train()
    train_loss_avg = []
    for x in train_loader:
        opt.zero_grad()
        loss = -model(x.to(device)).mean()
        loss.backward()
        opt.step()
        train_loss_avg.append(loss.item())

    model.eval()
    valid_loss_avg = []
    with torch.no_grad():
        for x in valid_loader:
            loss = -model(x.to(device)).mean()
            valid_loss_avg.append(loss.item())
    val_loss_epoch = np.mean(valid_loss_avg)
    
    # early-stopping
    if val_loss_epoch < best_loss:
        e = 0
        best_loss = val_loss_epoch
        best_model = copy.deepcopy(model)
        best_model_epoch = epoch
    else:
        e += 1
        if epoch < warmup:
            e = 0
        if e > early_stopping_epochs:
            break

    print('Epoch [%d / %d] Training loss: %f Validation Loss: %f e: %d' % 
          (epoch + 1, max_num_epochs, np.mean(train_loss_avg), val_loss_epoch, e))

print('Best model epoch: ', best_model_epoch)

Epoch [1 / 600] Training loss: 65.528224 Validation Loss: 71.643715 e: 0
Epoch [2 / 600] Training loss: 65.508934 Validation Loss: 71.635198 e: 0
Epoch [3 / 600] Training loss: 65.514985 Validation Loss: 71.625743 e: 0
Epoch [4 / 600] Training loss: 65.450450 Validation Loss: 71.617835 e: 0
Epoch [5 / 600] Training loss: 65.362695 Validation Loss: 71.607473 e: 0
Epoch [6 / 600] Training loss: 65.393344 Validation Loss: 71.598733 e: 0
Epoch [7 / 600] Training loss: 65.413456 Validation Loss: 71.588818 e: 0
Epoch [8 / 600] Training loss: 65.414789 Validation Loss: 71.579820 e: 0
Epoch [9 / 600] Training loss: 65.330715 Validation Loss: 71.570221 e: 0
Epoch [10 / 600] Training loss: 65.436517 Validation Loss: 71.559003 e: 0
Epoch [11 / 600] Training loss: 65.344182 Validation Loss: 71.550657 e: 0
Epoch [12 / 600] Training loss: 65.242291 Validation Loss: 71.539564 e: 0
Epoch [13 / 600] Training loss: 65.409569 Validation Loss: 71.530572 e: 0
Epoch [14 / 600] Training loss: 65.424750 Valid

## Test

In [18]:
# if you use a high number number of bins then you may want to decrease the batch size
test_loader = DataLoader(test, batch_size=16, drop_last=False)

test_ll = []
model.eval()
for x in tqdm(test_loader):
    test_ll.extend(list(model(x.to(device)).detach().cpu().numpy()))
assert len(test_ll) == test.shape[0]
print('Test LL: %.2f' % np.mean(test_ll))

100%|██████████| 37/37 [00:00<00:00, 551.87it/s]

Test LL: -61.15



