In [1]:
import os
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import pandas as pd

In [2]:
path_prefix = '/Work/EPE/ML4pi/'
plotpath = path_prefix+'plots/'
modelpath_c = path_prefix+''
modelpath = path_prefix+''

In [3]:
cell_meta = {
    'EMB1': {
        'cell_size_phi': 0.098,
        'cell_size_eta': 0.0031,
        'len_phi': 4,
        'len_eta': 128
    },
    'EMB2': {
        'cell_size_phi': 0.0245,
        'cell_size_eta': 0.025,
        'len_phi': 16,
        'len_eta': 16
    },
    'EMB3': {
        'cell_size_phi': 0.0245,
        'cell_size_eta': 0.05,
        'len_phi': 16,
        'len_eta': 8
    },
    'TileBar0': {
        'cell_size_phi': 0.1,
        'cell_size_eta': 0.1,
        'len_phi': 4,
        'len_eta': 4
    },
    'TileBar1': {
        'cell_size_phi': 0.1,
        'cell_size_eta': 0.1,
        'len_phi': 4,
        'len_eta': 4
    },
    'TileBar2': {
        'cell_size_phi': 0.1,
        'cell_size_eta': 0.2,
        'len_phi': 4,
        'len_eta': 2
    },
}

In [4]:
layers = ["EMB1", "EMB2", "EMB3", "TileBar0", "TileBar1", "TileBar2"]
cell_size_phi = [0.098, 0.0245, 0.0245, 0.1, 0.1, 0.1]
cell_size_eta = [0.0031, 0.025, 0.05, 0.1, 0.1, 0.2]
len_phi = [4, 16, 16, 4, 4, 4]
len_eta = [128, 16, 8, 4, 4, 2]

In [5]:
import sys
sys.path.append(path_prefix)
from util import resolution_util as ru
from util import plot_util as pu
from util import ml_util as mu
import uproot3 as ur

In [6]:
data_path = path_prefix + "v7/"
pionp_file = ur.open(data_path + "piplus.root")
pp_tree = pionp_file["ClusterTree"]
pp_tree.show()

runNumber                  (no streamer)              asdtype('>i4')
eventNumber                (no streamer)              asdtype('>i4')
truthE                     (no streamer)              asdtype('>f4')
truthPt                    (no streamer)              asdtype('>f4')
truthEta                   (no streamer)              asdtype('>f4')
truthPhi                   (no streamer)              asdtype('>f4')
clusterIndex               (no streamer)              asdtype('>i4')
nCluster                   (no streamer)              asdtype('>i4')
clusterE                   (no streamer)              asdtype('>f4')
clusterECalib              (no streamer)              asdtype('>f4')
clusterPt                  (no streamer)              asdtype('>f4')
clusterEta                 (no streamer)              asdtype('>f4')
clusterPhi                 (no streamer)              asdtype('>f4')
cluster_nCells             (no streamer)              asdtype('>i4')
cluster_sumCellE           (no str

In [7]:
branches = ['clusterIndex', 'truthE', 'nCluster', 'clusterE', 'clusterECalib', 'clusterPt',
            'clusterEta', 'clusterPhi', 'cluster_nCells', 'cluster_sumCellE', 'cluster_ENG_CALIB_TOT', 
            'cluster_ENG_CALIB_OUT_T', 'cluster_ENG_CALIB_DEAD_TOT', 'cluster_EM_PROBABILITY', 'cluster_HAD_WEIGHT', 
            'cluster_CENTER_MAG', 'cluster_FIRST_ENG_DENS', 'cluster_cellE_norm']
pp = pp_tree.pandas.df(branches, flatten=False)
pp.head()

Unnamed: 0_level_0,clusterIndex,truthE,nCluster,clusterE,clusterECalib,clusterPt,clusterEta,clusterPhi,cluster_nCells,cluster_sumCellE,cluster_ENG_CALIB_TOT,cluster_ENG_CALIB_OUT_T,cluster_ENG_CALIB_DEAD_TOT,cluster_EM_PROBABILITY,cluster_HAD_WEIGHT,cluster_CENTER_MAG,cluster_FIRST_ENG_DENS,cluster_cellE_norm
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,0,1950.726807,3,1706.589233,1847.573242,1393.735718,0.658092,-2.112122,353,1705.227661,1790.404785,7.076038,44.067638,0.01813,1.024731,2423.317871,4.277404e-07,"[0.31853524, 0.0076545365, 0.25650582, 0.02611..."
1,1,1950.726807,3,58.192909,78.681816,46.81514,0.683789,-2.178753,45,54.173908,73.329506,0.75551,11.988488,0.001,1.191867,3425.696777,1.755061e-10,"[0.027513273, 0.0021719737, 0.0013425057, 0.00..."
2,2,1950.726807,3,0.34919,0.769498,0.315112,0.460979,-2.405282,2,0.285924,0.259033,0.692205,0.078698,0.000623,1.21251,3913.996338,1.60322e-12,"[0.6538643, 0.18118072, 0.16495493]"
3,0,18.619093,2,11.992596,21.729856,11.850097,0.154927,-1.119611,33,11.971533,12.955562,1.463615,2.49996,0.00657,1.167025,2252.05127,8.310836e-10,"[0.16594508, 0.0015814339, 0.05095686, 0.01340..."
4,1,18.619093,2,0.398268,0.827007,0.393829,0.15,-0.93266,1,0.398268,0.116087,0.330997,0.014711,0.0036,1.109439,3054.038818,5.170689e-12,[1.0]


In [8]:
pp['clusterEoverCalib'] = pp.clusterE / pp.cluster_ENG_CALIB_TOT
pp['clusterEoverTruth'] = pp.clusterE / pp.truthE
pp['cluster_ENG_ALL'] = pp.cluster_ENG_CALIB_TOT + pp.cluster_ENG_CALIB_OUT_T + pp.cluster_ENG_CALIB_DEAD_TOT
# regression variables
pp['logE'] = np.log(pp.clusterE)
pp['logECalib'] = np.log(pp.cluster_ENG_CALIB_TOT)
pp['logECalibAll'] = np.log(pp.cluster_ENG_ALL)

In [9]:
from sklearn.preprocessing import StandardScaler
# create scaler
scaler_e = StandardScaler()
scaler_cal = StandardScaler()
scaler_calAll = StandardScaler()
# fit and transform in one step
pp['s_logE'] = scaler_e.fit_transform(pp['logE'].to_numpy().reshape(-1,1))
pp['s_logECalib'] = scaler_cal.fit_transform(pp['logECalib'].to_numpy().reshape(-1,1))
pp['s_logECalibAll'] = scaler_calAll.fit_transform(pp['logECalibAll'].to_numpy().reshape(-1,1))

In [26]:
epsilon = 0.000000000001
pp['cluster_fixEM'] = pp.cluster_EM_PROBABILITY.replace(to_replace=0,value=epsilon)
pp['cluster_logEM'] = np.log(pp.cluster_fixEM)
scaler_EM = StandardScaler()
pp['s_logEM'] = scaler_EM.fit_transform(pp.cluster_logEM.to_numpy().reshape(-1,1))
scaler_eta = StandardScaler()
pp['s_eta'] = scaler_eta.fit_transform(pp.clusterEta.to_numpy().reshape(-1,1))
pp['logCenter'] = np.log(pp.cluster_CENTER_MAG)
scaler_center = StandardScaler()
pp['s_logCenter'] = scaler_center.fit_transform(pp.logCenter.to_numpy().reshape(-1,1))
scaler_density = StandardScaler()
pp['s_Density'] = scaler_density.fit_transform(pp.cluster_FIRST_ENG_DENS.to_numpy().reshape(-1,1))

In [10]:
def setupCells(tree, layer, nrows = -1, indices = [], flatten=True):
    array = tree.array(layer)
    if nrows > 0:
        array = array[:nrows]
    elif len(indices) > 0:
        array = array[indices]
    num_pixels = cell_meta[layer]['len_phi'] * cell_meta[layer]['len_eta']
    if flatten:
        array = array.reshape(len(array), num_pixels)
    
    return array

In [34]:
a = pp_tree["EMB1"].show()

EMB1                       (no streamer)              asdtype("('>f4', (128, 4))")


In [11]:
EMB1_cells = setupCells(pp_tree, 'EMB1') # implicitly flatten
EMB2_cells = setupCells(pp_tree, 'EMB2')
EMB3_cells = setupCells(pp_tree, 'EMB3') # implicitly flatten

TileBar0_cells = setupCells(pp_tree, 'TileBar0') # implicitly flatten
TileBar1_cells = setupCells(pp_tree, 'TileBar1') # implicitly flatten
TileBar2_cells = setupCells(pp_tree, 'TileBar2') # implicitly flatten

In [33]:
# [len(a) for a in EMB1_cells]

In [17]:
def standardCells(array, layer, nrows = -1):
    if nrows > 0:
        working_array = array[:nrows]
    else:
        working_array = array

    scaler = StandardScaler()
    if type(layer) == str:
        num_pixels = cell_meta[layer]['len_phi'] * cell_meta[layer]['len_eta']
    elif type(layer) == list:
        num_pixels = 0
        for l in layer:
            num_pixels += cell_meta[l]['len_phi'] * cell_meta[l]['len_eta']
    else:
        print('you should not be here')

    num_clusters = len(working_array)

    flat_array = np.array(working_array.reshape(num_clusters * num_pixels, 1))


    scaled = scaler.fit_transform(flat_array)

    reshaped = scaled.reshape(num_clusters, num_pixels)
    return reshaped, scaler

In [27]:
combine_All_cells = np.concatenate((EMB1_cells, EMB2_cells, EMB3_cells, TileBar0_cells, TileBar1_cells, TileBar2_cells), axis=1)
s_All_cells, scaler_All_cells = standardCells(combine_All_cells, ['EMB1', 'EMB2', 'EMB3', 'TileBar0', 'TileBar1', 'TileBar2'])
All_input = np.column_stack((pp['s_logE'],pp['s_eta'],s_All_cells))

In [31]:
[len(a) for a in All_input]

TypeError: 'tuple' object is not callable