# Explore LHC dataset

In [None]:
%matplotlib inline

import sys
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import corner
import logging
from sklearn.manifold import TSNE

logging.basicConfig(
    format="%(asctime)-5.5s %(name)-30.30s %(levelname)-7.7s %(message)s",
    datefmt="%H:%M",
    level=logging.INFO,
)

sys.path.append("../../")
from experiments.simulators.collider import WBFLoader


## Get data

In [2]:
n = 10000

In [3]:
sim = WBFLoader()
sim.latent_dim()

14

In [4]:
x, params = sim.load_dataset(train=True, dataset_dir="../data/samples/lhc", numpy=True, limit_samplesize=n)

10:16 experiments.simulators.collide INFO    Only using 10000 of 1000000 available samples


In [5]:
x_ = sim._preprocess(x, inverse=True)

## Viz

In [None]:
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=1000).fit_transform(x)


In [None]:
plt.figure(figsize=(5, 5))

plt.scatter(
    tsne[:, 0],
    tsne[:, 1],
    s=12.0,
    alpha=0.15,
    c="C3"
)

plt.xlabel("t-SNE component 0")
plt.ylabel("t-SNE component 1")

plt.tight_layout()
plt.savefig("../figures/lhc_tsne.pdf")

## Determine weights for individual closure tests

In [6]:
x_noise = np.random.normal(size=x.shape)

In [7]:
random_closure_tests = np.mean(sim._closure_tests(x_noise), axis=1)
CLOSURE_TEST_WEIGHTS = 1. / random_closure_tests / len(random_closure_tests)
print(", ".join([str(w) for w in CLOSURE_TEST_WEIGHTS]))

0.00016862130580152395, 0.0005698125734999572, 0.00016476021640861034, 0.00038959698689980843, 0.00015144917089564335, 0.00015546849282418505, 0.01598404016101232, 0.016163353627730328, 0.016134544959993, 0.016175428996361076, 0.016009764414657544, 0.016110206673750016, 0.022091777568568415, 0.020110148081289986, 0.014102815661450822, 0.010415164730794945, 0.02042471739499227, 0.013473722413637186, 9.950444927035824e-05, 0.0002644129479577595, 8.659921089120665e-05, 3.385228330290218e-05, 7.960868587401298e-05, 2.9947050919587682e-05, 0.00011065865105580433, 0.000109861012078321, 0.00011101511937214564, 0.0001095160928750233, 5.586835199655386e-05, 2.3088904124757297e-05, 0.03259948233660189, 0.0183717856562062, 0.013882968297972451, 0.017246399344132194


## Closure test vs noise

In [8]:
np.mean(sim.distance_from_manifold(x))

2.2684057161733498e-08

In [9]:
np.mean(sim.distance_from_manifold(x_noise))

1.0009224818481945

In [None]:
noises = np.geomspace(1.e-9, 1., 100)

closure = []

for noise in noises:
    closure.append(np.mean(sim.distance_from_manifold(x + noise * np.random.normal(size=x.shape))))

In [None]:
plt.figure(figsize=(5,5))

plt.plot(noises, closure)

plt.xscale("log")
plt.yscale("log")

plt.xlabel("Noise")
plt.ylabel("Closure metric")

plt.tight_layout()
plt.savefig("../figures/lhc_closure_vs_noise.pdf")
