In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import gc

In [2]:
#Path to data
pi0_data_path = 'Data-Raw/pi0.npz'
eta_data_path = 'Data-Raw/eta.npz'
bck_data_path = 'Data-Raw/bck.npz'

Read in data and split the raw data into train and test sets
This is done in a function, so that pythons garbage collect frees up any memory leaving scope of function

In [3]:
def load_split_pi0(test_size):
    data_pi0 = np.load(pi0_data_path, allow_pickle=True)
    
    pi0_ClusterN = data_pi0["ClusterN"]
    pi0_Cluster = data_pi0["Cluster"]
    pi0_ClusterTiming = data_pi0["ClusterTiming"]
    

    pi0_ClusterType = data_pi0["ClusterType"]
    pi0_ClusterE = data_pi0["ClusterE"]
    pi0_ClusterPt = data_pi0["ClusterPt"]
    pi0_ClusterModuleNumber = data_pi0["ClusterModuleNumber"]
    pi0_ClusterCol = data_pi0["ClusterCol"]
    pi0_ClusterRow = data_pi0["ClusterRow"]
    pi0_ClusterM02 = data_pi0["ClusterM02"]
    pi0_ClusterM20 = data_pi0["ClusterM20"]
    pi0_ClusterDistFromVert = data_pi0["ClusterDistFromVert"]
    pi0_PartE = data_pi0["PartE"]
    pi0_PartPt = data_pi0["PartPt"]
    pi0_PartEta = data_pi0["PartEta"]
    pi0_PartPhi = data_pi0["PartPhi"]
    pi0_PartIsPrimary = data_pi0["PartIsPrimary"]
    pi0_PartPID = data_pi0["PartPID"]

    pi0_ClusterN_train, pi0_ClusterN_test, pi0_Cluster_train, pi0_Cluster_test, pi0_ClusterTiming_train, pi0_ClusterTiming_test, pi0_ClusterE_train, pi0_ClusterE_test, \
    pi0_ClusterType_train, pi0_ClusterType_test,\
    pi0_ClusterPt_train, pi0_ClusterPt_test, pi0_ClusterModuleNumber_train, pi0_ClusterModuleNumber_test, \
    pi0_ClusterCol_train, pi0_ClusterCol_test, pi0_ClusterRow_train, pi0_ClusterRow_test, \
    pi0_ClusterM02_train, pi0_ClusterM02_test, pi0_ClusterM20_train, pi0_ClusterM20_test, pi0_ClusterDistFromVert_train, \
    pi0_ClusterDistFromVert_test, pi0_PartE_train, pi0_PartE_test, pi0_PartPt_train, pi0_PartPt_test, pi0_PartEta_train, \
    pi0_PartEta_test, pi0_PartPhi_train, pi0_PartPhi_test, pi0_PartIsPrimary_train, pi0_PartIsPrimary_test, \
    pi0_PartPID_train, pi0_PartPID_test = train_test_split(
        pi0_ClusterN, pi0_Cluster, pi0_ClusterTiming, pi0_ClusterE, pi0_ClusterType, pi0_ClusterPt, pi0_ClusterModuleNumber, pi0_ClusterCol, pi0_ClusterRow,
        pi0_ClusterM02, pi0_ClusterM20, pi0_ClusterDistFromVert, pi0_PartE, pi0_PartPt, pi0_PartEta, pi0_PartPhi, pi0_PartIsPrimary, 
        pi0_PartPID, test_size=0.2, random_state=42) 
    
    return pi0_ClusterN_train, pi0_ClusterN_test, pi0_Cluster_train, pi0_Cluster_test, pi0_ClusterTiming_train, pi0_ClusterTiming_test, pi0_ClusterE_train, pi0_ClusterE_test, pi0_ClusterType_train, pi0_ClusterType_test, pi0_ClusterPt_train, pi0_ClusterPt_test, pi0_ClusterModuleNumber_train, pi0_ClusterModuleNumber_test, pi0_ClusterCol_train, pi0_ClusterCol_test, pi0_ClusterRow_train, pi0_ClusterRow_test, pi0_ClusterM02_train, pi0_ClusterM02_test, pi0_ClusterM20_train, pi0_ClusterM20_test, pi0_ClusterDistFromVert_train, pi0_ClusterDistFromVert_test, pi0_PartE_train, pi0_PartE_test, pi0_PartPt_train, pi0_PartPt_test, pi0_PartEta_train, pi0_PartEta_test, pi0_PartPhi_train, pi0_PartPhi_test, pi0_PartIsPrimary_train, pi0_PartIsPrimary_test, pi0_PartPID_train, pi0_PartPID_test

In [4]:
def split_pi0():
    pi0_ClusterN_train, pi0_ClusterN_test, pi0_Cluster_train, pi0_Cluster_test, pi0_ClusterTiming_train, pi0_ClusterTiming_test, pi0_ClusterE_train, pi0_ClusterE_test, pi0_ClusterType_train, pi0_ClusterType_test,pi0_ClusterPt_train, pi0_ClusterPt_test, pi0_ClusterModuleNumber_train, pi0_ClusterModuleNumber_test, pi0_ClusterCol_train, pi0_ClusterCol_test, pi0_ClusterRow_train, pi0_ClusterRow_test, pi0_ClusterM02_train, pi0_ClusterM02_test, pi0_ClusterM20_train, pi0_ClusterM20_test, pi0_ClusterDistFromVert_train, pi0_ClusterDistFromVert_test, pi0_PartE_train, pi0_PartE_test, pi0_PartPt_train, pi0_PartPt_test, pi0_PartEta_train, pi0_PartEta_test, pi0_PartPhi_train, pi0_PartPhi_test, pi0_PartIsPrimary_train, pi0_PartIsPrimary_test, pi0_PartPID_train, pi0_PartPID_test = load_split_pi0(0.2)
        
    np.savez_compressed('Data-Split/pi0_train', Size=pi0_ClusterE_train.size, ClusterN=pi0_ClusterN_train, Cluster=pi0_Cluster_train
                    , ClusterTiming=pi0_ClusterTiming_train, ClusterE=pi0_ClusterE_train, ClusterPt=pi0_ClusterPt_train
                    , ClusterModuleNumber=pi0_ClusterModuleNumber_train, ClusterType=pi0_ClusterType_train
                    , ClusterRow=pi0_ClusterRow_train, ClusterCol=pi0_ClusterCol_train, ClusterM02=pi0_ClusterM02_train
                    , ClusterM20=pi0_ClusterM20_train, ClusterDistFromVert=pi0_ClusterDistFromVert_train , PartE=pi0_PartE_train, PartPt=pi0_PartPt_train
                    , PartEta=pi0_PartEta_train, PartPhi=pi0_PartPhi_train, PartIsPrimary=pi0_PartIsPrimary_train
                    , PartPID=pi0_PartPID_train)

    np.savez_compressed('Data-Split/pi0_test', Size=pi0_ClusterE_test.size, ClusterN=pi0_ClusterN_test, Cluster=pi0_Cluster_test
                    , ClusterTiming=pi0_ClusterTiming_test, ClusterE=pi0_ClusterE_test, ClusterPt=pi0_ClusterPt_test
                    , ClusterModuleNumber=pi0_ClusterModuleNumber_test, ClusterType=pi0_ClusterType_test
                    , ClusterRow=pi0_ClusterRow_test, ClusterCol=pi0_ClusterCol_test, ClusterM02=pi0_ClusterM02_test
                    , ClusterM20=pi0_ClusterM20_test, ClusterDistFromVert=pi0_ClusterDistFromVert_test, PartE=pi0_PartE_test, PartPt=pi0_PartPt_test
                    , PartEta=pi0_PartEta_test, PartPhi=pi0_PartPhi_test, PartIsPrimary=pi0_PartIsPrimary_test
                    , PartPID=pi0_PartPID_test)

In [5]:
split_pi0()

In [6]:
def load_split_eta(test_size):
    data_eta = np.load(eta_data_path, allow_pickle=True)
    
    eta_ClusterN = data_eta["ClusterN"]
    eta_Cluster = data_eta["Cluster"]
    eta_ClusterTiming = data_eta["ClusterTiming"]
    

    eta_ClusterType = data_eta["ClusterType"]
    eta_ClusterE = data_eta["ClusterE"]
    eta_ClusterPt = data_eta["ClusterPt"]
    eta_ClusterModuleNumber = data_eta["ClusterModuleNumber"]
    eta_ClusterCol = data_eta["ClusterCol"]
    eta_ClusterRow = data_eta["ClusterRow"]
    eta_ClusterM02 = data_eta["ClusterM02"]
    eta_ClusterM20 = data_eta["ClusterM20"]
    eta_ClusterDistFromVert = data_eta["ClusterDistFromVert"]
    eta_PartE = data_eta["PartE"]
    eta_PartPt = data_eta["PartPt"]
    eta_PartEta = data_eta["PartEta"]
    eta_PartPhi = data_eta["PartPhi"]
    eta_PartIsPrimary = data_eta["PartIsPrimary"]
    eta_PartPID = data_eta["PartPID"]

    eta_ClusterN_train, eta_ClusterN_test, eta_Cluster_train, eta_Cluster_test, eta_ClusterTiming_train, eta_ClusterTiming_test, eta_ClusterE_train, eta_ClusterE_test, \
    eta_ClusterType_train, eta_ClusterType_test,\
    eta_ClusterPt_train, eta_ClusterPt_test, eta_ClusterModuleNumber_train, eta_ClusterModuleNumber_test, \
    eta_ClusterCol_train, eta_ClusterCol_test, eta_ClusterRow_train, eta_ClusterRow_test, \
    eta_ClusterM02_train, eta_ClusterM02_test, eta_ClusterM20_train, eta_ClusterM20_test, eta_ClusterDistFromVert_train, \
    eta_ClusterDistFromVert_test, eta_PartE_train, eta_PartE_test, eta_PartPt_train, eta_PartPt_test, eta_PartEta_train, \
    eta_PartEta_test, eta_PartPhi_train, eta_PartPhi_test, eta_PartIsPrimary_train, eta_PartIsPrimary_test, \
    eta_PartPID_train, eta_PartPID_test = train_test_split(
        eta_ClusterN, eta_Cluster, eta_ClusterTiming, eta_ClusterE, eta_ClusterType, eta_ClusterPt, eta_ClusterModuleNumber, eta_ClusterCol, eta_ClusterRow,
        eta_ClusterM02, eta_ClusterM20, eta_ClusterDistFromVert, eta_PartE, eta_PartPt, eta_PartEta, eta_PartPhi, eta_PartIsPrimary, 
        eta_PartPID, test_size=0.2, random_state=42) 
    
    return eta_ClusterN_train, eta_ClusterN_test, eta_Cluster_train, eta_Cluster_test, eta_ClusterTiming_train, eta_ClusterTiming_test, eta_ClusterE_train, eta_ClusterE_test, eta_ClusterType_train, eta_ClusterType_test, eta_ClusterPt_train, eta_ClusterPt_test, eta_ClusterModuleNumber_train, eta_ClusterModuleNumber_test, eta_ClusterCol_train, eta_ClusterCol_test, eta_ClusterRow_train, eta_ClusterRow_test, eta_ClusterM02_train, eta_ClusterM02_test, eta_ClusterM20_train, eta_ClusterM20_test, eta_ClusterDistFromVert_train, eta_ClusterDistFromVert_test, eta_PartE_train, eta_PartE_test, eta_PartPt_train, eta_PartPt_test, eta_PartEta_train, eta_PartEta_test, eta_PartPhi_train, eta_PartPhi_test, eta_PartIsPrimary_train, eta_PartIsPrimary_test, eta_PartPID_train, eta_PartPID_test

In [7]:
def split_eta():
    eta_ClusterN_train, eta_ClusterN_test, eta_Cluster_train, eta_Cluster_test, eta_ClusterTiming_train, eta_ClusterTiming_test, eta_ClusterE_train, eta_ClusterE_test, eta_ClusterType_train, eta_ClusterType_test,eta_ClusterPt_train, eta_ClusterPt_test, eta_ClusterModuleNumber_train, eta_ClusterModuleNumber_test, eta_ClusterCol_train, eta_ClusterCol_test, eta_ClusterRow_train, eta_ClusterRow_test, eta_ClusterM02_train, eta_ClusterM02_test, eta_ClusterM20_train, eta_ClusterM20_test, eta_ClusterDistFromVert_train, eta_ClusterDistFromVert_test, eta_PartE_train, eta_PartE_test, eta_PartPt_train, eta_PartPt_test, eta_PartEta_train, eta_PartEta_test, eta_PartPhi_train, eta_PartPhi_test, eta_PartIsPrimary_train, eta_PartIsPrimary_test, eta_PartPID_train, eta_PartPID_test = load_split_eta(0.2)
    
    np.savez_compressed('Data-Split/eta_train', Size=eta_ClusterE_train.size, ClusterN=eta_ClusterN_train, Cluster=eta_Cluster_train
                    , ClusterTiming=eta_ClusterTiming_train, ClusterE=eta_ClusterE_train, ClusterPt=eta_ClusterPt_train
                    , ClusterModuleNumber=eta_ClusterModuleNumber_train, ClusterType=eta_ClusterType_train
                    , ClusterRow=eta_ClusterRow_train, ClusterCol=eta_ClusterCol_train, ClusterM02=eta_ClusterM02_train
                    , ClusterM20=eta_ClusterM20_train, ClusterDistFromVert=eta_ClusterDistFromVert_train , PartE=eta_PartE_train, PartPt=eta_PartPt_train
                    , PartEta=eta_PartEta_train, PartPhi=eta_PartPhi_train, PartIsPrimary=eta_PartIsPrimary_train
                    , PartPID=eta_PartPID_train)

    np.savez_compressed('Data-Split/eta_test', Size=eta_ClusterE_test.size, ClusterN=eta_ClusterN_test, Cluster=eta_Cluster_test
                    , ClusterTiming=eta_ClusterTiming_test, ClusterE=eta_ClusterE_test, ClusterPt=eta_ClusterPt_test
                    , ClusterModuleNumber=eta_ClusterModuleNumber_test, ClusterType=eta_ClusterType_test
                    , ClusterRow=eta_ClusterRow_test, ClusterCol=eta_ClusterCol_test, ClusterM02=eta_ClusterM02_test
                    , ClusterM20=eta_ClusterM20_test, ClusterDistFromVert=eta_ClusterDistFromVert_test, PartE=eta_PartE_test, PartPt=eta_PartPt_test
                    , PartEta=eta_PartEta_test, PartPhi=eta_PartPhi_test, PartIsPrimary=eta_PartIsPrimary_test
                    , PartPID=eta_PartPID_test)
        

In [8]:
split_eta()

In [6]:
 eta_ClusterN_train, eta_ClusterN_test, eta_Cluster_train, eta_Cluster_test, eta_ClusterTiming_train, eta_ClusterTiming_test, eta_ClusterE_train, eta_ClusterE_test, eta_ClusterType_train, eta_ClusterType_test,eta_ClusterPt_train, eta_ClusterPt_test, eta_ClusterModuleNumber_train, eta_ClusterModuleNumber_test, eta_ClusterCol_train, eta_ClusterCol_test, eta_ClusterRow_train, eta_ClusterRow_test, eta_ClusterM02_train, eta_ClusterM02_test, eta_ClusterM20_train, eta_ClusterM20_test, eta_ClusterDistFromVert_train, eta_ClusterDistFromVert_test, eta_PartE_train, eta_PartE_test, eta_PartPt_train, eta_PartPt_test, eta_PartEta_train, eta_PartEta_test, eta_PartPhi_train, eta_PartPhi_test, eta_PartIsPrimary_train, eta_PartIsPrimary_test, eta_PartPID_train, eta_PartPID_test = load_split_eta(0.2)

In [None]:
np.savez_compressed('Data-Split/eta_train', Size=eta_ClusterE_train.size, ClusterN=eta_ClusterN_train, Cluster=eta_Cluster_train
                    , ClusterTiming=eta_ClusterTiming_train, ClusterE=eta_ClusterE_train, ClusterPt=eta_ClusterPt_train
                    , ClusterModuleNumber=eta_ClusterModuleNumber_train, ClusterType=eta_ClusterType_train
                    , ClusterRow=eta_ClusterRow_train, ClusterCol=eta_ClusterCol_train, ClusterM02=eta_ClusterM02_train
                    , ClusterM20=eta_ClusterM20_train, ClusterDistFromVert=eta_ClusterDistFromVert_train , PartE=eta_PartE_train, PartPt=eta_PartPt_train
                    , PartEta=eta_PartEta_train, PartPhi=eta_PartPhi_train, PartIsPrimary=eta_PartIsPrimary_train
                    , PartPID=eta_PartPID_train)

np.savez_compressed('Data-Split/eta_test', Size=eta_ClusterE_test.size, ClusterN=eta_ClusterN_test, Cluster=eta_Cluster_test
                    , ClusterTiming=eta_ClusterTiming_test, ClusterE=eta_ClusterE_test, ClusterPt=eta_ClusterPt_test
                    , ClusterModuleNumber=eta_ClusterModuleNumber_test, ClusterType=eta_ClusterType_test
                    , ClusterRow=eta_ClusterRow_test, ClusterCol=eta_ClusterCol_test, ClusterM02=eta_ClusterM02_test
                    , ClusterM20=eta_ClusterM20_test, ClusterDistFromVert=eta_ClusterDistFromVert_test, PartE=eta_PartE_test, PartPt=eta_PartPt_test
                    , PartEta=eta_PartEta_test, PartPhi=eta_PartPhi_test, PartIsPrimary=eta_PartIsPrimary_test
                    , PartPID=eta_PartPID_test)

In [7]:
def load_split_bck(test_size):
    data_bck = np.load(bck_data_path, allow_pickle=True)
    
    bck_ClusterN = data_bck["ClusterN"]
    bck_Cluster = data_bck["Cluster"]
    bck_ClusterTiming = data_bck["ClusterTiming"]
    

    bck_ClusterType = data_bck["ClusterType"]
    bck_ClusterE = data_bck["ClusterE"]
    bck_ClusterPt = data_bck["ClusterPt"]
    bck_ClusterModuleNumber = data_bck["ClusterModuleNumber"]
    bck_ClusterCol = data_bck["ClusterCol"]
    bck_ClusterRow = data_bck["ClusterRow"]
    bck_ClusterM02 = data_bck["ClusterM02"]
    bck_ClusterM20 = data_bck["ClusterM20"]
    bck_ClusterDistFromVert = data_bck["ClusterDistFromVert"]
    bck_PartE = data_bck["PartE"]
    bck_PartPt = data_bck["PartPt"]
    bck_PartEta = data_bck["PartEta"]
    bck_PartPhi = data_bck["PartPhi"]
    bck_PartIsPrimary = np.zeros_like(bck_PartE, dtype=bool)
    bck_PartPID = data_bck["PartPID"]

    bck_ClusterN_train, bck_ClusterN_test, bck_Cluster_train, bck_Cluster_test, bck_ClusterTiming_train, bck_ClusterTiming_test, bck_ClusterE_train, bck_ClusterE_test, \
    bck_ClusterType_train, bck_ClusterType_test,\
    bck_ClusterPt_train, bck_ClusterPt_test, bck_ClusterModuleNumber_train, bck_ClusterModuleNumber_test, \
    bck_ClusterCol_train, bck_ClusterCol_test, bck_ClusterRow_train, bck_ClusterRow_test, \
    bck_ClusterM02_train, bck_ClusterM02_test, bck_ClusterM20_train, bck_ClusterM20_test, bck_ClusterDistFromVert_train, \
    bck_ClusterDistFromVert_test, bck_PartE_train, bck_PartE_test, bck_PartPt_train, bck_PartPt_test, bck_PartEta_train, \
    bck_PartEta_test, bck_PartPhi_train, bck_PartPhi_test, bck_PartIsPrimary_train, bck_PartIsPrimary_test, \
    bck_PartPID_train, bck_PartPID_test = train_test_split(
        bck_ClusterN, bck_Cluster, bck_ClusterTiming, bck_ClusterE, bck_ClusterType, bck_ClusterPt, bck_ClusterModuleNumber, bck_ClusterCol, bck_ClusterRow,
        bck_ClusterM02, bck_ClusterM20, bck_ClusterDistFromVert, bck_PartE, bck_PartPt, bck_PartEta, bck_PartPhi, bck_PartIsPrimary, 
        bck_PartPID, test_size=0.2, random_state=42) 
    
    return bck_ClusterN_train, bck_ClusterN_test, bck_Cluster_train, bck_Cluster_test, bck_ClusterTiming_train, bck_ClusterTiming_test, bck_ClusterE_train, bck_ClusterE_test, bck_ClusterType_train, bck_ClusterType_test, bck_ClusterPt_train, bck_ClusterPt_test, bck_ClusterModuleNumber_train, bck_ClusterModuleNumber_test, bck_ClusterCol_train, bck_ClusterCol_test, bck_ClusterRow_train, bck_ClusterRow_test, bck_ClusterM02_train, bck_ClusterM02_test, bck_ClusterM20_train, bck_ClusterM20_test, bck_ClusterDistFromVert_train, bck_ClusterDistFromVert_test, bck_PartE_train, bck_PartE_test, bck_PartPt_train, bck_PartPt_test, bck_PartEta_train, bck_PartEta_test, bck_PartPhi_train, bck_PartPhi_test, bck_PartIsPrimary_train, bck_PartIsPrimary_test, bck_PartPID_train, bck_PartPID_test

In [None]:
 bck_ClusterN_train, bck_ClusterN_test, bck_Cluster_train, bck_Cluster_test, bck_ClusterTiming_train, bck_ClusterTiming_test, bck_ClusterE_train, bck_ClusterE_test, bck_ClusterType_train, bck_ClusterType_test,bck_ClusterPt_train, bck_ClusterPt_test, bck_ClusterModuleNumber_train, bck_ClusterModuleNumber_test, bck_ClusterCol_train, bck_ClusterCol_test, bck_ClusterRow_train, bck_ClusterRow_test, bck_ClusterM02_train, bck_ClusterM02_test, bck_ClusterM20_train, bck_ClusterM20_test, bck_ClusterDistFromVert_train, bck_ClusterDistFromVert_test, bck_PartE_train, bck_PartE_test, bck_PartPt_train, bck_PartPt_test, bck_PartEta_train, bck_PartEta_test, bck_PartPhi_train, bck_PartPhi_test, bck_PartIsPrimary_train, bck_PartIsPrimary_test, bck_PartPID_train, bck_PartPID_test = load_split_bck(0.2)

In [None]:
np.savez_compressed('Data-Split/bck_train', Size=bck_ClusterE_train.size, ClusterN=bck_ClusterN_train, Cluster=bck_Cluster_train
                    , ClusterTiming=bck_ClusterTiming_train, ClusterE=bck_ClusterE_train, ClusterPt=bck_ClusterPt_train
                    , ClusterModuleNumber=bck_ClusterModuleNumber_train, ClusterType=bck_ClusterType_train
                    , ClusterRow=bck_ClusterRow_train, ClusterCol=bck_ClusterCol_train, ClusterM02=bck_ClusterM02_train
                    , ClusterM20=bck_ClusterM20_train, ClusterDistFromVert=bck_ClusterDistFromVert_train , PartE=bck_PartE_train, PartPt=bck_PartPt_train
                    , PartEta=bck_PartEta_train, PartPhi=bck_PartPhi_train, PartIsPrimary=bck_PartIsPrimary_train
                    , PartPID=bck_PartPID_train)

np.savez_compressed('Data-Split/bck_test', Size=bck_ClusterE_test.size, ClusterN=bck_ClusterN_test, Cluster=bck_Cluster_test
                    , ClusterTiming=bck_ClusterTiming_test, ClusterE=bck_ClusterE_test, ClusterPt=bck_ClusterPt_test
                    , ClusterModuleNumber=bck_ClusterModuleNumber_test, ClusterType=bck_ClusterType_test
                    , ClusterRow=bck_ClusterRow_test, ClusterCol=bck_ClusterCol_test, ClusterM02=bck_ClusterM02_test
                    , ClusterM20=bck_ClusterM20_test, ClusterDistFromVert=bck_ClusterDistFromVert_test, PartE=bck_PartE_test, PartPt=bck_PartPt_test
                    , PartEta=bck_PartEta_test, PartPhi=bck_PartPhi_test, PartIsPrimary=bck_PartIsPrimary_test
                    , PartPID=bck_PartPID_test)

In [13]:
def load_data(path, test_size):
    data = np.load(path, allow_pickle=True)
    
    data_ClusterN = data["ClusterN"]
    data_Cluster = data["Cluster"]
    data_ClusterTiming = data["ClusterTiming"]
    
    data_ClusterType = data["ClusterType"]
    data_ClusterE = data["ClusterE"]
    data_ClusterPt = data["ClusterPt"]
    data_ClusterModuleNumber = data["ClusterModuleNumber"]
    data_ClusterCol = data["ClusterCol"]
    data_ClusterRow = data["ClusterRow"]
    data_ClusterM02 = data["ClusterM02"]
    data_ClusterM20 = data["ClusterM20"]
    data_ClusterDistFromVert = data["ClusterDistFromVert"]
    data_PartE = data["PartE"]
    data_PartPt = data["PartPt"]
    data_PartEta = data["PartEta"]
    data_PartPhi = data["PartPhi"]
    if 'bck' in path:
        data_PartIsPrimary = np.zeros_like(data_PartE, dtype=bool)
    else:
        data_PartIsPrimary = data["PartIsPrimary"]
    data_PartPID = data["PartPID"]

    data_ClusterN_train, data_ClusterN_test, data_Cluster_train, data_Cluster_test, data_ClusterTiming_train, data_ClusterTiming_test, data_ClusterE_train, data_ClusterE_test, \
    data_ClusterType_train, data_ClusterType_test,\
    data_ClusterPt_train, data_ClusterPt_test, data_ClusterModuleNumber_train, data_ClusterModuleNumber_test, \
    data_ClusterCol_train, data_ClusterCol_test, data_ClusterRow_train, data_ClusterRow_test, \
    data_ClusterM02_train, data_ClusterM02_test, data_ClusterM20_train, data_ClusterM20_test, data_ClusterDistFromVert_train, \
    data_ClusterDistFromVert_test, data_PartE_train, data_PartE_test, data_PartPt_train, data_PartPt_test, data_PartEta_train, \
    data_PartEta_test, data_PartPhi_train, data_PartPhi_test, data_PartIsPrimary_train, data_PartIsPrimary_test, \
    data_PartPID_train, data_PartPID_test = train_test_split(
        data_ClusterN, data_Cluster, data_ClusterTiming, data_ClusterE, data_ClusterType, data_ClusterPt, data_ClusterModuleNumber, data_ClusterCol, data_ClusterRow,
        data_ClusterM02, data_ClusterM20, data_ClusterDistFromVert, data_PartE, data_PartPt, data_PartEta, data_PartPhi, data_PartIsPrimary, 
        data_PartPID, test_size=test_size, random_state=42)
    
    return data_ClusterN_train, data_ClusterN_test, data_Cluster_train, data_Cluster_test, data_ClusterTiming_train, data_ClusterTiming_test, data_ClusterE_train, data_ClusterE_test, data_ClusterType_train, data_ClusterType_test, data_ClusterPt_train, data_ClusterPt_test, data_ClusterModuleNumber_train, data_ClusterModuleNumber_test, data_ClusterCol_train, data_ClusterCol_test, data_ClusterRow_train, data_ClusterRow_test, data_ClusterM02_train, data_ClusterM02_test, data_ClusterM20_train, data_ClusterM20_test, data_ClusterDistFromVert_train, data_ClusterDistFromVert_test, data_PartE_train, data_PartE_test, data_PartPt_train, data_PartPt_test, data_PartEta_train, data_PartEta_test, data_PartPhi_train, data_PartPhi_test, data_PartIsPrimary_train, data_PartIsPrimary_test, data_PartPID_train, data_PartPID_test

In [14]:
def split_data(path, name,test_size=0.2):
    
    data_ClusterN_train, data_ClusterN_test, data_Cluster_train, data_Cluster_test, data_ClusterTiming_train, data_ClusterTiming_test, data_ClusterE_train, data_ClusterE_test, data_ClusterType_train, data_ClusterType_test,data_ClusterPt_train, data_ClusterPt_test, data_ClusterModuleNumber_train, data_ClusterModuleNumber_test, data_ClusterCol_train, data_ClusterCol_test, data_ClusterRow_train, data_ClusterRow_test, data_ClusterM02_train, data_ClusterM02_test, data_ClusterM20_train, data_ClusterM20_test, data_ClusterDistFromVert_train, data_ClusterDistFromVert_test, data_PartE_train, data_PartE_test, data_PartPt_train, data_PartPt_test, data_PartEta_train, data_PartEta_test, data_PartPhi_train, data_PartPhi_test, data_PartIsPrimary_train, data_PartIsPrimary_test, data_PartPID_train, data_PartPID_test = load_data(path, test_size) 
    
    np.savez_compressed('Data-Split/'+name+'_train', Size=data_ClusterE_train.size, ClusterN=data_ClusterN_train, Cluster=data_Cluster_train
                    , ClusterTiming=data_ClusterTiming_train, ClusterE=data_ClusterE_train, ClusterPt=data_ClusterPt_train
                    , ClusterModuleNumber=data_ClusterModuleNumber_train, ClusterType=data_ClusterType_train
                    , ClusterRow=data_ClusterRow_train, ClusterCol=data_ClusterCol_train, ClusterM02=data_ClusterM02_train
                    , ClusterM20=data_ClusterM20_train, ClusterDistFromVert=data_ClusterDistFromVert_train , PartE=data_PartE_train, PartPt=data_PartPt_train
                    , PartEta=data_PartEta_train, PartPhi=data_PartPhi_train, PartIsPrimary=data_PartIsPrimary_train
                    , PartPID=data_PartPID_train)

    np.savez_compressed('Data-Split/'+name+'_test', Size=data_ClusterE_test.size, ClusterN=data_ClusterN_test, Cluster=data_Cluster_test
                    , ClusterTiming=data_ClusterTiming_test, ClusterE=data_ClusterE_test, ClusterPt=data_ClusterPt_test
                    , ClusterModuleNumber=data_ClusterModuleNumber_test, ClusterType=data_ClusterType_test
                    , ClusterRow=data_ClusterRow_test, ClusterCol=data_ClusterCol_test, ClusterM02=data_ClusterM02_test
                    , ClusterM20=data_ClusterM20_test, ClusterDistFromVert=data_ClusterDistFromVert_test, PartE=data_PartE_test, PartPt=data_PartPt_test
                    , PartEta=data_PartEta_test, PartPhi=data_PartPhi_test, PartIsPrimary=data_PartIsPrimary_test
                    , PartPID=data_PartPID_test)

In [10]:
split_data(pi0_data_path, 'pi0')

In [11]:
split_data(eta_data_path, 'eta')

In [15]:
split_data(bck_data_path, 'bck')