In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [17]:
data = pd.read_csv("../data/data.csv")
data = data.rename(columns={"E1": "E1 [GeV]", "E2": "E2 [GeV]", "px1 ": "px1 [GeV]", "px2": "px2 [GeV]", "py1": "py1 [GeV]", "py2": "py2 [GeV]", "pz1": "pz1 [GeV]", "pz2": "pz2 [GeV]", "phi1": "phi1 [rad]", "phi2": "phi2 [rad]", "pt1": "pt1 [GeV]", "pt2": "pt2 [GeV]", "M": "M [GeV]"})
data.head(10)

Unnamed: 0,Run,Event,E1 [GeV],px1 [GeV],py1 [GeV],pz1 [GeV],pt1 [GeV],eta1,phi1 [rad],Q1,E2 [GeV],px2 [GeV],py2 [GeV],pz2 [GeV],pt2 [GeV],eta2,phi2 [rad],Q2,M [GeV]
0,147115,366639895,58.7141,-7.31132,10.531,-57.2974,12.8202,-2.20267,2.17766,1,11.2836,-1.03234,-1.88066,-11.0778,2.14537,-2.34403,-2.07281,-1,8.94841
1,147115,366704169,6.61188,-4.15213,-0.579855,-5.11278,4.19242,-1.02842,-3.00284,-1,17.1492,-11.7135,5.04474,11.4647,12.7536,0.808077,2.73492,1,15.893
2,147115,367112316,25.5419,-11.4809,2.04168,22.7246,11.661,1.42048,2.9656,1,15.8203,-1.4728,2.25895,-15.5888,2.69667,-2.45508,2.14857,1,38.3877
3,147115,366952149,65.3959,7.51214,11.8871,63.8662,14.0619,2.21838,1.00721,1,25.1273,4.08786,2.59641,24.6563,4.84272,2.33021,0.565865,-1,3.72862
4,147115,366523212,61.4504,2.95284,-14.6227,-59.6121,14.9179,-2.09375,-1.37154,-1,13.8871,-0.277757,-2.4256,-13.6708,2.44145,-2.4237,-1.68481,-1,2.74718
5,147115,366663412,6.39616,-5.45672,-2.09068,-2.60078,5.84352,-0.431551,-2.77571,-1,21.3865,15.1698,-8.8703,-12.1893,17.5728,-0.64745,-0.52912,-1,18.4023
6,147115,366639101,84.5058,8.82436,10.5789,83.3753,13.7761,2.50032,0.875576,1,12.6784,-1.13446,-3.20939,-12.2128,3.404,-1.98956,-1.91057,-1,65.3239
7,147115,367133576,77.0057,10.0029,9.17545,-75.8,13.5737,-2.42103,0.742282,1,9.11623,-1.72295,-1.48674,-8.82761,2.27574,-2.06494,-2.42965,1,11.2912
8,147115,368639137,9.68787,1.11192,2.05064,-9.40284,2.3327,-2.10218,1.07394,1,63.4597,-1.85844,12.7946,-62.1287,12.9289,-2.27355,1.71504,1,3.58678
9,147115,367825395,27.8812,11.939,-18.3462,17.2696,21.8888,0.724032,-0.993887,1,12.9218,-5.0263,11.6026,2.66263,12.6445,0.20905,1.9796,-1,34.2685


## Data Description
- Run: The run number of the event.
- Event: The event number.
- E1, E2: The total energy of the electron (GeV) for electrons 1 and 2.
- px1,py1,pz1,px2,py2,pz2: The components of the momentum of the electron 1 and 2 (GeV).
- pt1, pt2: The transverse momentum of the electron 1 and 2 (GeV).
- eta1, eta2: The pseudorapidity of the electron 1 and 2.
- phi1, phi2: The phi angle of the electron 1 and 2 (rad).
- Q1, Q2: The charge of the electron 1 and 2.
- M: The invariant mass of two electrons (GeV).


In [18]:
data.shape

(100000, 19)

## Cleaning the data

Check how many rows are unique based on the Event number.

In [19]:
data.value_counts(subset="Event") # There are 99976 unique rows

# If you want to see which rows are duplicates, run this code:
# duplicateCheck = data.duplicated(subset=["Event"], keep="first")
# print(data[duplicateCheck])

Event
440012942    3
439950319    3
519135297    2
542302783    2
542369397    2
            ..
264030150    1
264011215    1
264010599    1
264009979    1
264072987    1
Name: count, Length: 99976, dtype: int64

Deleting the duplicated rows from the original data.

In [20]:
data = data.drop_duplicates(subset=["Event"], keep="first")
data

Unnamed: 0,Run,Event,E1 [GeV],px1 [GeV],py1 [GeV],pz1 [GeV],pt1 [GeV],eta1,phi1 [rad],Q1,E2 [GeV],px2 [GeV],py2 [GeV],pz2 [GeV],pt2 [GeV],eta2,phi2 [rad],Q2,M [GeV]
0,147115,366639895,58.71410,-7.311320,10.531000,-57.29740,12.82020,-2.202670,2.177660,1,11.28360,-1.032340,-1.88066,-11.077800,2.14537,-2.344030,-2.072810,-1,8.94841
1,147115,366704169,6.61188,-4.152130,-0.579855,-5.11278,4.19242,-1.028420,-3.002840,-1,17.14920,-11.713500,5.04474,11.464700,12.75360,0.808077,2.734920,1,15.89300
2,147115,367112316,25.54190,-11.480900,2.041680,22.72460,11.66100,1.420480,2.965600,1,15.82030,-1.472800,2.25895,-15.588800,2.69667,-2.455080,2.148570,1,38.38770
3,147115,366952149,65.39590,7.512140,11.887100,63.86620,14.06190,2.218380,1.007210,1,25.12730,4.087860,2.59641,24.656300,4.84272,2.330210,0.565865,-1,3.72862
4,147115,366523212,61.45040,2.952840,-14.622700,-59.61210,14.91790,-2.093750,-1.371540,-1,13.88710,-0.277757,-2.42560,-13.670800,2.44145,-2.423700,-1.684810,-1,2.74718
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,146511,522575834,12.31310,-10.658000,5.164440,3.36858,11.84330,0.280727,2.690370,-1,1.80181,0.668609,-1.58437,0.537805,1.71967,0.307851,-1.171470,1,8.44779
99996,146511,522786431,18.46420,7.854990,15.133000,-7.08659,17.05020,-0.404510,1.092010,1,14.69110,-1.418020,-2.28117,-14.443500,2.68598,-2.383880,-2.126960,1,20.71540
99997,146511,522906124,4.18566,-3.273500,-0.308507,-2.59013,3.28801,-0.723075,-3.047630,1,72.81740,-11.074900,-9.28179,-71.369300,14.45010,-2.300410,-2.444050,-1,12.71350
99998,146511,523243830,54.46220,11.352600,11.880900,51.92400,16.43280,1.867800,0.808132,-1,8.58671,0.378009,3.07828,8.007050,3.10141,1.677170,1.448610,1,4.69670


In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 99976 entries, 0 to 99999
Data columns (total 19 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Run         99976 non-null  int64  
 1   Event       99976 non-null  int64  
 2   E1 [GeV]    99976 non-null  float64
 3   px1 [GeV]   99976 non-null  float64
 4   py1 [GeV]   99976 non-null  float64
 5   pz1 [GeV]   99976 non-null  float64
 6   pt1 [GeV]   99976 non-null  float64
 7   eta1        99976 non-null  float64
 8   phi1 [rad]  99976 non-null  float64
 9   Q1          99976 non-null  int64  
 10  E2 [GeV]    99976 non-null  float64
 11  px2 [GeV]   99976 non-null  float64
 12  py2 [GeV]   99976 non-null  float64
 13  pz2 [GeV]   99976 non-null  float64
 14  pt2 [GeV]   99976 non-null  float64
 15  eta2        99976 non-null  float64
 16  phi2 [rad]  99976 non-null  float64
 17  Q2          99976 non-null  int64  
 18  M [GeV]     99891 non-null  float64
dtypes: float64(15), int64(4)
memor

In [22]:
data = data.dropna(axis=0)
data

Unnamed: 0,Run,Event,E1 [GeV],px1 [GeV],py1 [GeV],pz1 [GeV],pt1 [GeV],eta1,phi1 [rad],Q1,E2 [GeV],px2 [GeV],py2 [GeV],pz2 [GeV],pt2 [GeV],eta2,phi2 [rad],Q2,M [GeV]
0,147115,366639895,58.71410,-7.311320,10.531000,-57.29740,12.82020,-2.202670,2.177660,1,11.28360,-1.032340,-1.88066,-11.077800,2.14537,-2.344030,-2.072810,-1,8.94841
1,147115,366704169,6.61188,-4.152130,-0.579855,-5.11278,4.19242,-1.028420,-3.002840,-1,17.14920,-11.713500,5.04474,11.464700,12.75360,0.808077,2.734920,1,15.89300
2,147115,367112316,25.54190,-11.480900,2.041680,22.72460,11.66100,1.420480,2.965600,1,15.82030,-1.472800,2.25895,-15.588800,2.69667,-2.455080,2.148570,1,38.38770
3,147115,366952149,65.39590,7.512140,11.887100,63.86620,14.06190,2.218380,1.007210,1,25.12730,4.087860,2.59641,24.656300,4.84272,2.330210,0.565865,-1,3.72862
4,147115,366523212,61.45040,2.952840,-14.622700,-59.61210,14.91790,-2.093750,-1.371540,-1,13.88710,-0.277757,-2.42560,-13.670800,2.44145,-2.423700,-1.684810,-1,2.74718
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,146511,522575834,12.31310,-10.658000,5.164440,3.36858,11.84330,0.280727,2.690370,-1,1.80181,0.668609,-1.58437,0.537805,1.71967,0.307851,-1.171470,1,8.44779
99996,146511,522786431,18.46420,7.854990,15.133000,-7.08659,17.05020,-0.404510,1.092010,1,14.69110,-1.418020,-2.28117,-14.443500,2.68598,-2.383880,-2.126960,1,20.71540
99997,146511,522906124,4.18566,-3.273500,-0.308507,-2.59013,3.28801,-0.723075,-3.047630,1,72.81740,-11.074900,-9.28179,-71.369300,14.45010,-2.300410,-2.444050,-1,12.71350
99998,146511,523243830,54.46220,11.352600,11.880900,51.92400,16.43280,1.867800,0.808132,-1,8.58671,0.378009,3.07828,8.007050,3.10141,1.677170,1.448610,1,4.69670


In [23]:
data.shape

(99891, 19)

We are left with 99891 unique rows with no empty values.

In [25]:
data.to_csv("../data/electron.csv", index=False)