# Jet Compression using an Autoencoder

In [1]:
import sys
BIN = '../../'
sys.path.append(BIN)
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset

from my_nn_modules import AE_big, get_data, fit

import my_matplotlib_style as ms
mpl.rc_file(BIN + 'my_matplotlib_rcparams')

## Loading and preparing the data

In [2]:
force_cpu = False

if force_cpu:
    device = torch.device('cpu')
else:
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
print('Using device:', device)

Using device: cpu


In [3]:
# Load data
train = pd.read_pickle(BIN + 'processed_data/train.pkl')
test = pd.read_pickle(BIN + 'processed_data/test.pkl')

train.head(10)

Unnamed: 0,pT,eta,phi,E
1367643,300.752869,0.118391,-1.460827,303.719818
1739612,79.819145,4.535507,-1.185196,3722.832764
546066,220.472305,1.314261,-0.943992,440.19519
213212,74.533775,0.010658,-0.424795,74.770332
150544,220.668121,2.43291,-2.308348,1266.68103
574837,159.841782,0.731125,-1.150598,207.133514
1171687,306.125305,0.702473,0.437615,385.713013
1608254,189.011673,-1.387443,0.490634,403.126709
1618830,194.996719,-1.022815,-1.560437,306.534515
637947,186.887146,-0.621282,-0.465523,226.002701


## Normalizing and splitting into train and test datasets

In [4]:
# Normalize
train_mean = train.mean()
train_std = train.std()

train = (train - train_mean) / train_std
# Is this the right way to normalize? (only using train mean and std to normalize both train and test)
test = (test - train_mean) / train_std

train_x = train
test_x = test
train_y = train_x  # y = x since we are building and AE
test_y = test_x

train_ds = TensorDataset(torch.tensor(train_x.values), torch.tensor(train_y.values))
valid_ds = TensorDataset(torch.tensor(test_x.values), torch.tensor(test_y.values))

## Training

In [5]:
def mse_loss(input, target, size_average=None, reduce=None, reduction='mean'):
    # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
    r"""mse_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor

    Measures the element-wise mean squared error.

    See :class:`~torch.nn.MSELoss` for details.
    """
    if not (target.size() == input.size()):
        warnings.warn("Using a target size ({}) that is different to the input size ({}). "
                      "This will likely lead to incorrect results due to broadcasting. "
                      "Please ensure they have the same size.".format(target.size(), input.size()),
                      stacklevel=2)
    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)

    ret = (input - target) ** 2
    factor = torch.full(ret.size(), 1)
    factor[:, 2] = 2.4
    ret = ret * factor
    if reduction != 'none':
        ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)

    return ret

In [6]:
bs = 256  # batch size
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
loss_func = nn.MSELoss()
loss_func = mse_loss

In [7]:
class AE_2D_v100(nn.Module):
    def __init__(self, n_features=4):
        super(AE_2D_v100, self).__init__()
        self.en1 = nn.Linear(n_features, 100)
        self.en2 = nn.Linear(100, 100)
        self.en3 = nn.Linear(100, 100)
        self.en4 = nn.Linear(100, 2)
        self.de1 = nn.Linear(2, 100)
        self.de2 = nn.Linear(100, 100)
        self.de3 = nn.Linear(100, 100)
        self.de4 = nn.Linear(100, n_features)
        self.tanh = nn.Tanh()

    def encode(self, x):
        return self.en4(self.tanh(self.en3(self.tanh(self.en2(self.tanh(self.en1(x)))))))

    def decode(self, x):
        return self.de4(self.tanh(self.de3(self.tanh(self.de2(self.tanh(self.de1(self.tanh(x))))))))

    def forward(self, x):
        z = self.encode(x)
        return self.decode(z)

In [8]:
n_features = len(train.loc[0])
model = AE_2D_v100(n_features=n_features)

In [9]:
# Training
#epochs_list = [7, 5, 3, 2, 2]
epochs_list = [40, 40, 20, 20, 20]
lrs = [1e-3, 3e-4, 1e-4, 3e-5, 1e-5]
for ii, epochs in enumerate(epochs_list):
    print('Setting learning rate to %.1e' % lrs[ii])
    opt = optim.Adam(model.parameters(), lr=lrs[ii])
    #opt = optim.SGD(model.parameters(), lr=lrs[ii], momentum=0.9, nesterov=True)
    #opt = optim.RMSprop(model.parameters(), lr=lrs[ii], momentum=0.9)
    fit(epochs, model, loss_func, opt, train_dl, valid_dl, device)

Setting learning rate to 1.0e-03
Epoch 0: Validation loss = 0.10715715437662698 Time: 0:02:07.825716
Epoch 1: Validation loss = 0.08520913226633017 Time: 0:03:44.623826
Epoch 2: Validation loss = 0.07719197941080594 Time: 0:05:21.820441
Epoch 3: Validation loss = 0.06980059940911831 Time: 0:06:58.180364
Epoch 4: Validation loss = 0.07601545871071194 Time: 0:08:38.674303
Epoch 5: Validation loss = 0.06799713101155862 Time: 0:10:21.265588
Epoch 6: Validation loss = 0.05873262408653547 Time: 0:11:59.064033
Epoch 7: Validation loss = 0.052787435395288816 Time: 0:13:44.040238
Epoch 8: Validation loss = 0.04665405955833874 Time: 0:15:24.272842
Epoch 9: Validation loss = 0.0476494089195543 Time: 0:17:11.642743
Epoch 10: Validation loss = 0.06562708492934717 Time: 0:18:53.802103
Epoch 11: Validation loss = 0.050778616915632346 Time: 0:20:25.645725
Epoch 12: Validation loss = 0.06679719881466906 Time: 0:22:02.698729
Epoch 13: Validation loss = 0.06097566848592058 Time: 0:23:42.195912
Epoch 14: 

Epoch 17: Validation loss = 0.028826525231272236 Time: 0:30:12.715062
Epoch 18: Validation loss = 0.028677206208826425 Time: 0:31:57.320332
Epoch 19: Validation loss = 0.02867468887602695 Time: 0:33:44.799972
Setting learning rate to 1.0e-05
Epoch 0: Validation loss = 0.028511774212693165 Time: 0:01:51.671814
Epoch 1: Validation loss = 0.0287195306564537 Time: 0:03:38.696864
Epoch 2: Validation loss = 0.028866213287170227 Time: 0:05:24.204203
Epoch 3: Validation loss = 0.028831930792911686 Time: 0:07:13.088939
Epoch 4: Validation loss = 0.028972612178567968 Time: 0:08:56.517294
Epoch 5: Validation loss = 0.028712953232543883 Time: 0:10:39.726211
Epoch 6: Validation loss = 0.028524498217811625 Time: 0:12:26.834245
Epoch 7: Validation loss = 0.028641679951842785 Time: 0:14:06.239001
Epoch 8: Validation loss = 0.028837425765680137 Time: 0:15:44.764095
Epoch 9: Validation loss = 0.028725909130305732 Time: 0:17:31.910289
Epoch 10: Validation loss = 0.028623213746200013 Time: 0:19:07.689270


In [10]:
# Training
#epochs_list = [7, 5, 3, 2, 2]
epochs_list = [40, 40, 20, 20, 20]
lrs = [1e-4, 3e-5, 1e-5, 3e-6, 1e-6]
for ii, epochs in enumerate(epochs_list):
    print('Setting learning rate to %.1e' % lrs[ii])
    opt = optim.Adam(model.parameters(), lr=lrs[ii])
    #opt = optim.SGD(model.parameters(), lr=lrs[ii], momentum=0.9, nesterov=True)
    #opt = optim.RMSprop(model.parameters(), lr=lrs[ii], momentum=0.9)
    fit(epochs, model, loss_func, opt, train_dl, valid_dl, device)

Setting learning rate to 1.0e-04
Epoch 0: Validation loss = 0.029498209481903716 Time: 0:01:35.521629
Epoch 1: Validation loss = 0.029518394898326278 Time: 0:03:11.895488
Epoch 2: Validation loss = 0.030685875466933445 Time: 0:04:52.572418
Epoch 3: Validation loss = 0.029901059797897835 Time: 0:06:29.298219
Epoch 4: Validation loss = 0.030063812897673854 Time: 0:08:09.411302
Epoch 5: Validation loss = 0.029402771854688 Time: 0:10:12.697034
Epoch 6: Validation loss = 0.029806307585448617 Time: 0:11:56.013013
Epoch 7: Validation loss = 0.030436819391876417 Time: 0:13:48.475972
Epoch 8: Validation loss = 0.029929202914537937 Time: 0:15:20.619372
Epoch 9: Validation loss = 0.02933105469612895 Time: 0:16:42.457148


KeyboardInterrupt: 

In [11]:
# saving the model for later inference (if training is to be continued another saving method is recommended)
#save_path = './models/AE_2D_v100_loss0029.pt'
#torch.save(model.state_dict(), save_path)
# model_big = AE_big()
# model_big.load_state_dict(torch.load(save_path))
# model_big.eval()

## Evaluation

In [None]:
model.eval()

In [None]:
# Print a few tensors, still normalized
print('Comparing input and output:')
for ii in np.arange(100, 105):
    data = valid_ds.tensors[0][ii]
    pred = model(data)
    print('Inp:', data)
    print('Out:', pred)
    print(' ')

In [None]:
# Print a few tensors, now not normalized
print('Comparing input and output:')
for ii in np.arange(100, 105):
    data = valid_ds.tensors[0][ii]
    pred = model(data).detach().numpy()
    pred = np.multiply(pred, train_std.values)
    pred = np.add(pred, train_mean.values)
    data = np.multiply(data, train_std.values)
    data = np.add(data, train_mean.values)
    print('Inp:', data)
    print('Out:', pred)
    print(' ')

In [None]:
plt.close('all')
unit_list = ['[GeV]', '[rad]', '[rad]', '[GeV]']
variable_list = [r'$p_T$', r'$\eta$', r'$\phi$', r'$E$']
line_style = ['--', '-']
colors = ['orange', 'c']
markers = ['*', 's']


# Histograms
idxs = (0, 100000)  # Choose events to compare
data = torch.tensor(test_x[idxs[0]:idxs[1]].values)
pred = model(data).detach().numpy()
pred = np.multiply(pred, train_std.values)
pred = np.add(pred, train_mean.values)
data = np.multiply(data, train_std.values)
data = np.add(data, train_mean.values)

alph = 0.8
n_bins = 50
for kk in np.arange(4):
    plt.figure(kk + 4)
    n_hist_data, bin_edges, _ = plt.hist(data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins)
    n_hist_pred, _, _ = plt.hist(pred[:, kk], color=colors[0], label='Output', alpha=alph, bins=bin_edges)
    plt.suptitle(train.columns[kk])
    plt.xlabel(variable_list[kk] + ' ' + unit_list[kk])
    plt.ylabel('Number of events')
    plt.legend()
    ms.sciy()

In [None]:
# Plot input on top of output
idxs = (0, 100)  # Choose events to compare
data = torch.tensor(test_x[idxs[0]:idxs[1]].values)
pred = model(data).detach().numpy()
pred = np.multiply(pred, train_std.values)
pred = np.add(pred, train_mean.values)
data = np.multiply(data, train_std.values)
data = np.add(data, train_mean.values)

for kk in np.arange(4):
    plt.figure(kk)
    plt.plot(data[:, kk], color=colors[1], label='Input', linestyle=line_style[1], marker=markers[1])
    plt.plot(pred[:, kk], color=colors[0], label='Output', linestyle=line_style[0], marker=markers[0])
    plt.suptitle(train.columns[kk])
    plt.xlabel('Event')
    plt.ylabel(variable_list[kk] + ' ' + unit_list[kk])
    plt.legend()
    ms.sciy()

In [None]:
# Plot input on top of output
idxs = (0, int(1e5))  # Choose events to compare
data = torch.tensor(test_x[idxs[0]:idxs[1]].values)
latent = model.encode(data).detach().numpy()

In [None]:
for ii in np.arange(latent.shape[1]):
    plt.figure()
    plt.hist(latent[:, ii], label='$z_%d$' % (ii + 1), color='m')
    plt.suptitle('Latent variable #%d' % (ii + 1))
    plt.legend()
    ms.sciy()

In [None]:
mksz = 1

plt.figure()
plt.scatter(latent[:, 0], latent[:, 1], s=mksz)
plt.xlabel(r'$z_1$')
plt.ylabel(r'$z_2$')