### 0. Prerequisites

In [1]:
import os 
import time
import torch
import optuna
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy import stats
from tqdm import tqdm
from dotenv import load_dotenv
from torch import nn
from torch.optim import lr_scheduler
from collections import OrderedDict

from src.metrics import pearson_metric
from src.torch_models import MLPAE
from src.data import Dataset, load_data

In [2]:
load_dotenv()

True

In [3]:
DEVICE = "cuda:0"
EPOCHS = 150
EXPERIMENT = "VAE"

### 1. Data preparation

In [4]:
trainloader, _ = load_data(use_feather=True, split_data=False)

Loading took 6.31 seconds


### 2. Building a Model.

In [5]:
class BlockAE(nn.Module):
    def __init__(self, input_dim, output_dim, activation):
        super(BlockAE, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            activation()
        )
        
    def forward(self, x):
        out = self.main(x)
        return out

In [6]:
class VAE(nn.Module):
    
    def __init__(self, layers, dim, activation=nn.ReLU):
        super(VAE, self).__init__()
        self.layers = layers
        self.dim = dim
        self.activation = activation
        
        self.encoder = self.__build_encoder()
        self.decoder = self.__build_decoder()

    def __build_encoder(self):
        layers = self.layers
        activation = self.activation
        encoder_layers = []
        for idx in range(len(layers) - 1):
            layer = BlockAE(layers[idx], layers[idx + 1], activation)
            encoder_layers.append(layer)
        encoder_layers.append(nn.Linear(layers[-1], self.dim * 2))
        return torch.nn.Sequential(*encoder_layers)
    
    def __build_decoder(self):
        layers = self.layers[::-1]
        activation = self.activation
        decoder_layers = [nn.Linear(self.dim, layers[0])]
        for idx in range(len(layers) - 1):
            layer = BlockAE(layers[idx], layers[idx + 1], activation)
            decoder_layers.append(layer)
        return torch.nn.Sequential(*decoder_layers)
            
    
    def reparametrize(self, mu, sigma):
        if self.training:
            std = sigma.mul(0.5).exp_()
            eps = std.new_empty(std.size()).normal_()
            return eps.mul_(std).add_(mu)
        return mu
    
    def forward(self, x):
        bottleneck = self.encoder(x).view(-1, 2, self.dim)
        mu, sigma = bottleneck[:, 0, :], bottleneck[:, 1, :]
        z = self.reparametrize(mu, sigma)
        out = self.decoder(z)
        return out, mu, sigma


In [7]:
def vae_loss(x_hat, x, mu, sigma, beta=1):
    bce = nn.MSELoss(reduction='sum')(x_hat, x)
    kld = 0.5 * torch.sum(sigma.exp() - sigma - 1 + mu.pow(2))
    return bce + beta * kld

In [8]:
def train(model, criterion, loader, optimizer, investment_id_dropout=0.01, device='cpu'):
    model.to(device)
    model.train()
    
    train_loss = 0.0
    for i, (_x, _y) in enumerate(loader):
        optimizer.zero_grad()
        x = torch.cat((_x, torch.unsqueeze(_y, 1)), dim=1)
        x = x.to(device)
        x_hat, mu, sigma = model(x)
        
        loss = criterion(x_hat, x, mu, sigma)
        train_loss += loss.item()
        
        loss.backward()
        optimizer.step()
    return train_loss / len(loader.dataset)

In [9]:
model = VAE(layers=[302, 256, 128, 64], dim=20).to(DEVICE)

### 3. Training the Model.

In [10]:
experiment_dir = os.path.join("weights", EXPERIMENT)
if not os.path.exists(experiment_dir):
    os.makedirs(experiment_dir)

In [11]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
criterion = vae_loss

In [None]:
losses = []
for epoch in range(EPOCHS):
    start_execution = time.time()
    train_loss = train(model, criterion, trainloader, optimizer, device=DEVICE)
    scheduler.step()
    print(f"Epoch: {epoch+1:02d} ({time.time()-start_execution:.1f} s.) | Train: {train_loss:.5f} |")
    
    losses.append(train_loss)
    if train_loss <= min(losses):
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss, 
        }, os.path.join(experiment_dir, f"{epoch}.pt"))

In [33]:
weights_dir = os.path.join("weights", EXPERIMENT)
checkpoint = torch.load(os.path.join(weights_dir, "1.pt"))
inferenced = VAE(layers=[302, 256, 128, 64], dim=20)
inferenced.load_state_dict(checkpoint['model_state_dict'])
inferenced.eval()

VAE(
  (encoder): Sequential(
    (0): BlockAE(
      (main): Sequential(
        (0): Linear(in_features=302, out_features=256, bias=True)
        (1): ReLU()
      )
    )
    (1): BlockAE(
      (main): Sequential(
        (0): Linear(in_features=256, out_features=128, bias=True)
        (1): ReLU()
      )
    )
    (2): BlockAE(
      (main): Sequential(
        (0): Linear(in_features=128, out_features=64, bias=True)
        (1): ReLU()
      )
    )
    (3): Linear(in_features=64, out_features=40, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=20, out_features=64, bias=True)
    (1): BlockAE(
      (main): Sequential(
        (0): Linear(in_features=64, out_features=128, bias=True)
        (1): ReLU()
      )
    )
    (2): BlockAE(
      (main): Sequential(
        (0): Linear(in_features=128, out_features=256, bias=True)
        (1): ReLU()
      )
    )
    (3): BlockAE(
      (main): Sequential(
        (0): Linear(in_features=256, out_features=302, bias

In [None]:
z = torch.randn((1, 20))
sample = inferenced.decoder(z)
print(sample)