# Explore M-flow latent space for LHC dataset

In [None]:
%matplotlib inline

import sys
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import logging
import torch
import numpy as np

logging.basicConfig(
    format="%(asctime)-5.5s %(name)-30.30s %(levelname)-7.7s %(message)s",
    datefmt="%H:%M",
    level=logging.INFO,
)

sys.path.append("../../")
from experiments.architectures.vector_transforms import create_vector_transform
from manifold_flow.flows import ManifoldFlow, EncoderManifoldFlow
from experiments.datasets import WBF40DLoader
import plot_settings as ps


In [None]:
ps.setup()

## Get data

In [None]:
n = 1000
sim40d = WBF40DLoader()

In [None]:
x0, _ = sim40d.load_dataset(train=False, dataset_dir="../data/samples/lhc40d", numpy=True, limit_samplesize=n, true_param_id=0)
x1, _ = sim40d.load_dataset(train=False, dataset_dir="../data/samples/lhc40d", numpy=True, limit_samplesize=n, true_param_id=1)
x2, _ = sim40d.load_dataset(train=False, dataset_dir="../data/samples/lhc40d", numpy=True, limit_samplesize=n, true_param_id=2)
x_gen0 = np.load("../data/results/mf_14_lhc40d_june_samples.npy")
x_gen1 = np.load("../data/results/mf_14_lhc40d_june_samples_trueparam1.npy")
x_gen2 = np.load("../data/results/mf_14_lhc40d_june_samples_trueparam2.npy")


## Load model

In [None]:
def load_model(
    filename,
    outerlayers=20,
    innerlayers=15,
    splinebins=11,
    splinerange=10.0,
    dropout=0.0,
    batchnorm=False,
    outertransform="rq-coupling",
    innertransform="rq-coupling",
    lineartransform="lu",
    pieepsilon=0.1,
    pieclip=None,
):
    outer_transform = create_vector_transform(
        40,
        outerlayers,
        linear_transform_type=lineartransform,
        base_transform_type=outertransform,
        context_features=None,
        dropout_probability=dropout,
        tail_bound=splinerange,
        num_bins=splinebins,
        use_batch_norm=batchnorm,
    )
    inner_transform = create_vector_transform(
        14,
        innerlayers,
        linear_transform_type=lineartransform,
        base_transform_type=innertransform,
        context_features=2,
        dropout_probability=dropout,
        tail_bound=splinerange,
        num_bins=splinebins,
        use_batch_norm=batchnorm,
    )
    model = ManifoldFlow(
        data_dim=40,
        latent_dim=14,
        outer_transform=outer_transform,
        inner_transform=inner_transform,
        apply_context_to_outer=False,
        pie_epsilon=pieepsilon,
        clip_pie=pieclip,
    )
        
    model.load_state_dict(
        torch.load("../data/models/{}.pt".format(filename), map_location=torch.device("cpu"))
    )
    _ = model.eval()
    
    return model

In [None]:
mf = load_model("mf_14_lhc40d_june")

## Project test data into latent space

In [None]:
def compute_uv(x, model=mf):
    model.eval()
    x_ = torch.tensor(x, dtype=torch.float)
    
    h, _ = model.outer_transform(x_, full_jacobian=False, context=None)
    u, v = model.projection(h)
    
    return u.detach().numpy(), v.detach().numpy()

In [None]:
u0, v0 = compute_uv(x0)
u1, v1 = compute_uv(x1)
u2, v2 = compute_uv(x2)


In [None]:
v0.shape

## Marginals of latents

In [None]:
ncols = 4
nrows = 4

fig = plt.figure(figsize=(3*ncols, 3*nrows))

for i in range(14):
    ax = plt.subplot(nrows, ncols, i+1)
    plt.hist(
        u0[:,i], range=(-1.5,1.5), bins=50, density=True,
        histtype="step", color=[ps.COLORS[1]], ls="-", lw=1.5
    )
    plt.hist(
        u1[:,i], range=(-1.5, 1.5), bins=50, density=True,
        histtype="step", color=[ps.COLORS[2]], ls="-", lw=1.5
    )
    plt.hist(
        u2[:,i], range=(-1.5, 1.5), bins=50, density=True,
        histtype="step", color=[ps.COLORS[3]], ls="-", lw=1.5
    )
    plt.xlabel("$u_{" + str(i) + "}$")
    plt.ylabel(f"Density")
    
plt.tight_layout()
plt.savefig("../figures/lhc_u_histos.pdf")


In [None]:
ncols = 4
nrows = 4

fig = plt.figure(figsize=(3*ncols, 3*nrows))

for i in range(14):
    ax = plt.subplot(nrows, ncols, i+1)
    plt.hist(
        v0[:,i], range=(-0.2,0.2), bins=50, density=True,
        histtype="step", color=[ps.COLORS[1]], ls="-", lw=1.5
    )
    plt.hist(
        v1[:,i], range=(-0.2,0.2), bins=50, density=True,
        histtype="step", color=[ps.COLORS[2]], ls="-", lw=1.5
    )
    plt.hist(
        v2[:,i], range=(-0.2,0.2), bins=50, density=True,
        histtype="step", color=[ps.COLORS[3]], ls="-", lw=1.5
    )
    plt.xlabel(f"$v_{i}$")
    plt.ylabel(f"Density")
    
plt.tight_layout()
plt.savefig("../figures/lhc_v_histos.pdf")



## Scatter plot

In [None]:
features = list(range(14))
n = len(features)
m = 250

fig = plt.figure(figsize=(2*(n-1), 2*(n-1)))
for ip, i in enumerate(features[1:]):
    for jp in range(ip):
        j = features[jp]
        ax = plt.subplot(n-1, n-1, ip*(n-1) + jp + 1)
        plt.scatter(u0[:m,j], u0[:m,i], s=0.5, c=[ps.COLORS[1]], rasterized=True)
        plt.scatter(u1[:m,j], u1[:m,i], s=0.5, c=[ps.COLORS[2]], rasterized=True)
        plt.scatter(u2[:m,j], u2[:m,i], s=0.5, c=[ps.COLORS[3]], rasterized=True)
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        plt.xlim(-1.5,1.5)
        plt.ylim(-1.5,1.5)
        
        if ip == n - 2:
            plt.xlabel(str(j))
        if jp == 0:
            plt.ylabel(str(i))
        
plt.tight_layout()
plt.savefig("../figures/lhc_u_scatter.pdf")
