# Test dim of LHC dataset

In [1]:
%matplotlib inline

import sys
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import corner
import logging
import torch
from torch import nn

logging.basicConfig(
    format="%(asctime)-5.5s %(name)-30.30s %(levelname)-7.7s %(message)s",
    datefmt="%H:%M",
    level=logging.INFO,
)

sys.path.append("../../")
from experiments.datasets import WBFLoader, WBF40DLoader
from experiments.training import Nu
import plot_settings as ps


In [2]:
ps.setup()

## Autoencoder

In [45]:
def test_latent_dim(dim, epochs=50, lr=1.e-2, batchsize=200):
    print(f"Starting AE training with latent dim {dim}")
    
    autoencoder = nn.Sequential(
        nn.Linear(40, 25),
        nn.ReLU(),
        nn.Linear(25, dim),
        nn.ReLU(),
        nn.Linear(dim, 25),
        nn.ReLU(),
        nn.Linear(25, 40)
    )
    
    dataset = WBF40DLoader().load_dataset(train=True, dataset_dir="../data/samples/lhc40d")
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batchsize, shuffle=True, num_workers=4)
    optimizer = torch.optim.AdamW(autoencoder.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        total_loss = 0.
        for data in dataloader:
            x, _ = data
            x_reco = autoencoder(x)
            loss = criterion(x_reco, x)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step(epoch)
            total_loss += loss.item()

        print('Epoch {}: reco error = {:.4f}'.format(epoch+1, total_loss / len(dataloader)))
        
    autoencoder.eval()
    dataset = WBF40DLoader().load_dataset(train=True, dataset_dir="../data/samples/lhc40d")
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1000)
    test_loss = 0.
    for data in dataloader:
        x, _ = data
        x_reco = autoencoder(x)
        loss = criterion(x_reco, x)
        test_loss += loss.item()
        
    print('Test reco error = {:.4f}'.format(test_loss / len(dataloader)))
    print('')
    
    return test_loss / len(dataloader)
    

In [None]:
dims = [20, 18, 16, 14, 12, 10, 8, 6, 4, 2]

mses = [test_latent_dim(dim) for dim in dims]

Starting AE training with latent dim 20
Epoch 1: reco error = 0.0405
Epoch 2: reco error = 0.0229
Epoch 3: reco error = 0.0197
Epoch 4: reco error = 0.0194
Epoch 5: reco error = 0.0192
Epoch 6: reco error = 0.0190
