## Notebook Merge_Datasets
This notebook takes the data from pi0, eta and bck previously separated into individual train/test-sets and creates a train-dataset and a test-dataset 

In [1]:
import numpy as np

In [2]:
#path to files
pi0_train = 'Data-Split/pi0_train.npz'
eta_train = 'Data-Split/eta_train.npz'
bck_train = 'Data-Split/bck_train.npz'

pi0_test = 'Data-Split/pi0_test.npz'
eta_test = 'Data-Split/eta_test.npz'
bck_test = 'Data-Split/bck_test.npz'

In [3]:
#Create dataloader
data_pi0_train = np.load(pi0_train, allow_pickle=True)
data_eta_train = np.load(eta_train, allow_pickle=True)
data_bck_train = np.load(bck_train, allow_pickle=True)

data_pi0_test = np.load(pi0_test, allow_pickle=True)
data_eta_test = np.load(eta_test, allow_pickle=True)
data_bck_test = np.load(bck_test, allow_pickle=True)

print(list(data_bck_train.keys()))

['Size', 'Cluster', 'ClusterE', 'ClusterPt', 'ClusterModuleNumber', 'ClusterType', 'ClusterX', 'ClusterY', 'ClusterM02', 'ClusterM20', 'PartE', 'PartPt', 'PartEta', 'PartPhi', 'PartIsPrimary', 'PartPID']


## Calculate size of dataset

In [4]:
pi0_size_train = data_pi0_train['Size']
eta_size_train = data_eta_train['Size']
bck_size_train = data_bck_train['Size']

size_train = pi0_size_train.item() + eta_size_train.item() + bck_size_train.item()

In [5]:
pi0_size_test = data_pi0_test['Size']
eta_size_test = data_eta_test['Size']
bck_size_test = data_bck_test['Size']

size_test = pi0_size_test.item() + eta_size_test.item() + bck_size_test.item()

## Initialise arrays 
These arrays are initialised to fit the whole dataset

In [6]:
Cluster_train = np.zeros((size_train, 50, 50), dtype=np.float32)
ClusterType_train = np.zeros((size_train), dtype=np.ubyte)
ClusterE_train = np.zeros((size_train), dtype=np.float32)
ClusterPt_train = np.zeros((size_train), dtype=np.float32)
ClusterModuleNumber_train = np.zeros((size_train), dtype=np.ubyte)
ClusterX_train = np.zeros((size_train), dtype=np.ubyte)
ClusterY_train = np.zeros((size_train), dtype=np.ubyte)
ClusterM20_train = np.zeros((size_train), dtype=np.float32)
ClusterM02_train = np.zeros((size_train), dtype= np.float32)
PartE_train = np.zeros((size_train), dtype=np.float32)
PartPt_train = np.zeros((size_train), dtype=np.float32)
PartEta_train = np.zeros((size_train), dtype=np.float32)
PartPhi_train = np.zeros((size_train), dtype=np.float32)
PartIsPrimary_train = np.zeros((size_train), dtype=bool)
PartPID_train = np.zeros((size_train), dtype=np.short)

In [7]:
Cluster_test = np.zeros((size_test, 50, 50), dtype=np.float32)
ClusterType_test = np.zeros((size_test), dtype=np.ubyte)
ClusterE_test = np.zeros((size_test), dtype=np.float32)
ClusterPt_test = np.zeros((size_test), dtype=np.float32)
ClusterModuleNumber_test = np.zeros((size_test), dtype=np.ubyte)
ClusterX_test = np.zeros((size_test), dtype=np.ubyte)
ClusterY_test = np.zeros((size_test), dtype=np.ubyte)
ClusterM20_test = np.zeros((size_test), dtype=np.float32)
ClusterM02_test = np.zeros((size_test), dtype= np.float32)
PartE_test = np.zeros((size_test), dtype=np.float32)
PartPt_test = np.zeros((size_test), dtype=np.float32)
PartEta_test = np.zeros((size_test), dtype=np.float32)
PartPhi_test = np.zeros((size_test), dtype=np.float32)
PartIsPrimary_test = np.zeros((size_test), dtype=bool)
PartPID_test = np.zeros((size_test), dtype=np.short)

# Load data into arrays

In [8]:
def load_into(arr,keyword, dataset):
    if dataset == 'train':
        arr[0:pi0_size_train] = data_pi0_train[keyword]
        arr[pi0_size_train:pi0_size_train+eta_size_train] = data_eta_train[keyword]
        arr[-bck_size_train:] = data_bck_train[keyword]
    if dataset == 'test':
        arr[0:pi0_size_test] = data_pi0_test[keyword]
        arr[pi0_size_test:pi0_size_test+eta_size_test] = data_eta_test[keyword]
        arr[-bck_size_test:] = data_bck_test[keyword]

In [9]:
load_into(Cluster_train, 'Cluster', 'train')
load_into(ClusterType_train, 'ClusterType', 'train')
load_into(ClusterE_train, 'ClusterE', 'train')
load_into(ClusterPt_train, 'ClusterPt', 'train')
load_into(ClusterModuleNumber_train, 'ClusterModuleNumber', 'train')
load_into(ClusterX_train, 'ClusterX', 'train')
load_into(ClusterY_train, 'ClusterY', 'train')
load_into(ClusterM20_train, 'ClusterM20','train')
load_into(ClusterM02_train, 'ClusterM02','train')
load_into(PartE_train, 'PartE','train')
load_into(PartPt_train, 'PartPt','train')
load_into(PartEta_train, 'PartEta', 'train')
load_into(PartPhi_train, 'PartPhi', 'train')
load_into(PartIsPrimary_train, 'PartIsPrimary', 'train')
load_into(PartPID_train, 'PartPID', 'train')

In [10]:
load_into(Cluster_test, 'Cluster', 'test')
load_into(ClusterType_test, 'ClusterType', 'test')
load_into(ClusterE_test, 'ClusterE', 'test')
load_into(ClusterPt_test, 'ClusterPt', 'test')
load_into(ClusterModuleNumber_test, 'ClusterModuleNumber', 'test')
load_into(ClusterX_test, 'ClusterX', 'test')
load_into(ClusterY_test, 'ClusterY', 'test')
load_into(ClusterM20_test, 'ClusterM20','test')
load_into(ClusterM02_test, 'ClusterM02','test')
load_into(PartE_test, 'PartE','test')
load_into(PartPt_test, 'PartPt','test')
load_into(PartEta_test, 'PartEta', 'test')
load_into(PartPhi_test, 'PartPhi', 'test')
load_into(PartIsPrimary_test, 'PartIsPrimary', 'test')
load_into(PartPID_test, 'PartPID', 'test')

## Save the train- and test-datasets for further usage

In [13]:
np.savez_compressed('../CNN/Data/data_train', Cluster=Cluster_train
                    , ClusterE=ClusterE_train, ClusterPt=ClusterPt_train
                    , ClusterModuleNumber=ClusterModuleNumber_train, ClusterType=ClusterType_train
                    , ClusterX=ClusterX_train, ClusterY=ClusterY_train, ClusterM02=ClusterM02_train
                    , ClusterM20=ClusterM20_train, PartE=PartE_train, PartPt=PartPt_train
                    , PartEta=PartEta_train, PartPhi=PartPhi_train, PartIsPrimary=PartIsPrimary_train
                    , PartPID=PartPID_train)

In [14]:
np.savez_compressed('../CNN/Data/data_test', Cluster=Cluster_test
                    , ClusterE=ClusterE_test, ClusterPt=ClusterPt_test
                    , ClusterModuleNumber=ClusterModuleNumber_test, ClusterType=ClusterType_test
                    , ClusterX=ClusterX_test, ClusterY=ClusterY_test, ClusterM02=ClusterM02_test
                    , ClusterM20=ClusterM20_test, PartE=PartE_test, PartPt=PartPt_test
                    , PartEta=PartEta_test, PartPhi=PartPhi_test, PartIsPrimary=PartIsPrimary_test
                    , PartPID=PartPID_test)