# Plot input variables

This script will help us make some nice plots that show the input variables for training

## Setup and Config

Modify the below things to change what things are plotted

In [1]:
# Designed not to be modified
import sys
sys.path.append("../")
from adl_func_client.event_dataset import EventDataset
from adl_func_client.use_exe_func_adl_server import use_exe_func_adl_server
from calratio_perjet_training.fetch_training_data import fetch_perjet_data
import glob
import numpy as np
import asyncio

import matplotlib.pyplot as plt
import matplotlib.lines as mlines
plt.rc('font', size=14)

import pandas as pd

df_loc = "../data/datasets.csv"

## Load Datasets

In [2]:
datasets = pd.read_csv(df_loc)

In [3]:
async def fetch_data_async(info):
    return [info, f'{info.mH}_{info.mS}_{info.Lifetime}_{info.MCCampaign}', await fetch_perjet_data(EventDataset(f'localds://{info.RucioDSName}'), f'{info.mH}_{info.mS}_{info.Lifetime}_{info.MCCampaign}')]
all_datasets_future = [fetch_data_async(info) for index, info in datasets.iterrows()]
r = await asyncio.gather(*all_datasets_future)

In [4]:
len(r)

58

## Standard Kinematic Plots

Regular plots of jet pt, etc.

In [None]:
plt.hist(m125_m55_ltlow.JetPt, bins=30, range=(0,300))
plt.title('m125_m55_ltlow')
_ = plt.xlabel('$p_T$ [GeV]')

In [None]:
plt.hist(m125_m55_ltlow.JetEta, bins=8*5, range=(-4.0,4.0))
plt.title('m125_m55_ltlow')
_ = plt.xlabel('$\eta$')

In [None]:
plt.hist(m125_m55_ltlow.JetPhi, bins=8*5, range=(-4.0,4.0))
plt.title('m125_m55_ltlow')
_ = plt.xlabel('$\phi$')

## Long Lived Particle Truth

Look at the various truth variables that have come out

In [None]:
plt.hist(m125_m55_ltlow.nLLPs_Near_Jets)
plt.title('m125_m55_ltlow')
_ = plt.xlabel('Count')

In [None]:
plt.hist(m125_m55_ltlow.Lx, bins=20)
plt.title('m125_m55_ltlow')
plt.yscale('log')
_ = plt.xlabel('$L_x$ [mm]')

In [None]:
plt.hist(m125_m55_ltlow.Ly, bins=20)
plt.title('m125_m55_ltlow')
plt.yscale('log')
_ = plt.xlabel('$L_y$ [mm]')

In [None]:
plt.hist(np.sqrt(m125_m55_ltlow.Lx*m125_m55_ltlow.Lx + m125_m55_ltlow.Ly*m125_m55_ltlow.Ly), bins=20)
plt.title('m125_m55_ltlow')
plt.yscale('log')
_ = plt.xlabel('$L_{xy}$ [mm]')

In [None]:
plt.hist(m125_m55_ltlow.Lz, bins=20)
plt.title('m125_m55_ltlow')
plt.yscale('log')
_ = plt.xlabel('$L_z$ [mm]')

Lets look at how this varies as a function of $\eta$.

When Rachel looked at this, she recommended paying attention to $L_{zy}$ when $|\eta|<1.5$ and $L_z$ for $|\eta|>=1.5$. So, given we have the MC results here, lets look at how the distributions look as a function of jet $\eta$.

The funny shapes (or dips or cut-outs, depending on which two plots you are looking at) are explained by the angle w.r.t. the vertical, and how long in $L_z$ that line can be when it intersects with the outter barrel of the calorimeter. Where the behavior fundamentally changes (around $|\eta|=1.2$), is where the transition between the barrel and endcap or extended barrel occurs.

In [None]:
plt.scatter(m125_m55_ltlow.JetEta, m125_m55_ltlow.Lz)
plt.ylabel('$L_z$ [mm]')
plt.xlabel('Jet $\eta$')
plt.ylim((0.0, 20000))
plt.plot((-1.2, -1.2), (0, 15000), color="black")
plt.plot((1.2, 1.2), (0, 15000), color="black")
_ = plt.show()

In [None]:
plt.scatter(m125_m55_ltlow.JetEta, np.sqrt(np.square(m125_m55_ltlow.Lx)+np.square(m125_m55_ltlow.Ly)))
plt.ylabel('$L_{xy}$ [mm]')
plt.xlabel('Jet $\eta$')
plt.ylim((0.0, 20000))
plt.plot((-1.2, -1.2), (0, 15000), color="black")
plt.plot((1.2, 1.2), (0, 15000), color="black")
_ = plt.show()

## What are the large $R$ variables?

In the $L_xy$ vs $\eta$ plots above, there is a significant number of particles that have very large $L_zy$ (and $L_z$). I suspect those are LLP's that are randomly associated with jets. We can probably see this by looking at the $\Delta R$ between the LLP and the jet.

In [None]:
Lx = m125_m55_ltlow.Lx[m125_m55_ltlow.IsLLP]
Ly = m125_m55_ltlow.Ly[m125_m55_ltlow.IsLLP]
Lxy = np.sqrt(np.square(Lx)+np.square(Ly))
Lz = m125_m55_ltlow.Lz[m125_m55_ltlow.IsLLP]
Leta = m125_m55_ltlow.Leta[m125_m55_ltlow.IsLLP]
Lphi = m125_m55_ltlow.Lphi[m125_m55_ltlow.IsLLP]

plt.hist(Lz, bins=40)
plt.title('m125_m55_ltlow')
plt.yscale('log')
plt.xlabel('$L_z$ [mm]')
plt.plot((7500, 7500), (0, 1e5), color="black")
plt.show()

plt.hist(Lxy, bins=40)
plt.title('m125_m55_ltlow')
plt.yscale('log')
plt.xlabel('$L_{xy}$ [mm]')
plt.plot((4400, 4400), (0, 1e5), color="black")
plt.show()

In [None]:
too_far_lz = Lz > 7500
too_far_lxy = Lxy > 4400
print (f'outliers in Lxy: {sum(too_far_lxy)}, outliers in Lx: {sum(too_far_lz)}')

In [None]:
plt.hist(Lz[too_far_lz], bins=40)
plt.title('m125_m55_ltlow - $L_z$ outliers')
plt.xlabel('$L_z$ [mm]')
plt.show()

plt.hist(Lxy[too_far_lz], bins=40)
plt.title('m125_m55_ltlow - $L_{xy}$ outliers')
plt.xlabel('$L_{xy}$ [mm]')
plt.show()

It took a few iterations to get $\Delta R$ calculated properly. There were bugs: ironically, not in the back-end, but in how I was coding up the request!

Here is proof that $\Delta R$ is getting correctly calculated

In [None]:
JL_D_eta = m125_m55_ltlow.JetEta[m125_m55_ltlow.IsLLP] - Leta
JL_D_phi = m125_m55_ltlow.JetPhi[m125_m55_ltlow.IsLLP] - Lphi
while sum(JL_D_phi>=np.pi) > 0:
    JL_D_phi = JL_D_phi - (JL_D_phi>=np.pi)*2*np.pi
while sum(JL_D_phi<-np.pi) > 0:
    JL_D_phi = JL_D_phi + (JL_D_phi<-np.pi)*2*np.pi
JL_DeltaR = np.sqrt(np.square(JL_D_eta) + np.square(JL_D_phi))
plt.hist(JL_DeltaR, bins=40)
plt.show()

In [None]:
bins = np.linspace(0.0, 0.4, 40)

plt.hist(JL_DeltaR, bins, label='all')
plt.hist(JL_DeltaR[too_far_lz], bins, label='outlier')
plt.yscale('log')
plt.xlabel('$\Delta R$')
plt.title('Outliers in $L_z$ ($L_z>7500$ mm)')
plt.legend()
plt.show()

plt.hist(JL_DeltaR, bins, label='all')
plt.hist(JL_DeltaR[too_far_lxy], bins, label='outlier')
plt.yscale('log')
plt.xlabel('$\Delta R$')
plt.title('Outliers in $L_{xy}$ ($L_{xy}>4400$ mm)')
plt.legend()
plt.show()

## MLP Input Variables

Look at the input variables for Lxy predictor

In [None]:
plt.hist(m125_m55_ltlow.EMM_BL0, range=(0.0,1.0), bins=20)
plt.title('m125_m55_ltlow')
plt.yscale('log')
_ = plt.xlabel('EMM BL0 [Fraction]')