In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pylab as py

# Walk through of script: correct_padnumbers_usinglookupfile.py

## Edit h5 file of data set
### Goal: replace column 4 from values of 20,000 with correct pad number from the lookup table
#### save the look up table as a 2d numpy array

In [2]:
#CoBo, AsAd, AGET, Channel, Pad number 
lookup = pd.read_csv('flatlookup.csv', usecols=[0,1,2,3,4], names=['CoBo','AsAd','AGET','Channel','pad_number'])             
CoBo = lookup['CoBo'] 
AsAd = lookup['AsAd']
AGET = lookup['AGET']
Channel = lookup['Channel']
pad_number = lookup['pad_number']

In [3]:
#save the lookup table as a 2d numpy array called 'lookuparray' 
#Note: NOT in order based on pad number
lookuparray = np.zeros(shape=(10240,5), dtype=int)
for i in range(10240):
    lookuparray[i] = np.array([CoBo[i], AsAd[i], AGET[i], Channel[i], pad_number[i]])

In [4]:
#store the first 4 col of lookuplist lacking the pad number
lookuparrayfirst4col = np.zeros(shape=(10240,4), dtype=int)
for i in range(10240):
    lookuparrayfirst4col[i] = lookuparray[i,:4]

### extract the data from the h5 file

In [5]:
#list of event names to extract data for each event from the h5 file
events = []
for i in range(1,75):
    events.append('get/'+str(i))

In [6]:
#store h5 data in the 3d array 'dataarrayevent'
dataarrayevent = np.zeros(shape=(74,10240,517), dtype=int)
dataarray = np.zeros(shape=(10240,517), dtype=int)
#r to read only, r+ to read/write
with h5py.File('run_0210.h5', 'r') as f:
    for i in range(74):
        dset = f[events[i]]
        dataarray = np.copy(dset)
        dataarrayevent[i] = dataarray

In [7]:
#store the first 4 col of each event in the 3d array 'dataarrayeventfirst4col'
dataarrayeventfirst4col = np.zeros(shape=(74,10240,4), dtype=int)
for i in range(74):
    dataarrayeventfirst4col[i] = dataarrayevent[i,:,:4]

### compare 'lookuparray' with each 'dataarray' for each of 74 events in 'dataarrayevent'
# Inefficient method

In [None]:
#for each event (denoted by i) and for each lookuparrayfirst4col pad number (denoted by j) compare the lookuparrayfirst4col
# values against the dataarrayeventfirst4col value (event denoted by i and pad number denoted by k)
for i in range(74):                     #events
    for j in range(10240):              #lookuparrayfirst4col pad numbers
        for k in range(10240):          #dataarrayeventfirst4col pad numbers
            #if the lookup values match the dataarray values
            if(np.array_equal(lookuparrayfirst4col[j], dataarrayeventfirst4col[i][k])):
                #set the dataarrayevent pad number value as the lookuparray pad number value
                dataarrayevent[i][k][4] = lookuparray[j][4]


### dataarrayevent now has the corrected padnumbers

In [26]:
#save dataarrayevents on cluster incase the data is lost
for i in range(74):
    text = "event_"+str(i+1)
    data = dataarrayevent[i]
    np.save(text, data) 

## Write a corrected h5 file

In [55]:
#original h5 file name is 'run_0210.h5'
h5 = h5py.File('corrected_run_0210.h5', 'w')
for i in range(74):
    name = 'get/'+str(i+1)
    h5.create_dataset(name, data=dataarrayevent[i])
h5.close()

### check h5 file is formated as expected

In [40]:
#list of event names to extract data for each event from the h5 file
events = []
for i in range(1,75):
    events.append('get/'+str(i))

In [54]:
with h5py.File('corrected_run_0210.h5', 'r') as f:
    #for i in range(74):
    dset = f['get/1']
    print(dset[1][4])
        

6865
