In [1]:
import random
import h5py
import numpy as np
import tqdm
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from sklearn.preprocessing import StandardScaler
import math
import sys
import os


In [3]:
# Get the current working directory
current_directory = os.getcwd()

# Define the sample size
sample_size = 512

# Define the paths relative to the current working directory
data_raw_where = os.path.join(current_directory, 'data/')
data_sampled_where = os.path.join(current_directory, 'data/data_sampled/')
file_path = os.path.join(current_directory, 'data/run210.h5')

##### Check Directories

In [4]:
print("Current working directory:", current_directory)
print("Data Raw Directory:", data_raw_where)
print("Data Sampled Directory:", data_sampled_where)
print("H5 File Path:", file_path)

Current working directory: /home/DAVIDSON/dmkurdydyk/FRIB_Distant_Transfer
Data Raw Directory: /home/DAVIDSON/dmkurdydyk/FRIB_Distant_Transfer/data/
Data Sampled Directory: /home/DAVIDSON/dmkurdydyk/FRIB_Distant_Transfer/data/data_sampled/
H5 File Path: /home/DAVIDSON/dmkurdydyk/FRIB_Distant_Transfer/data/run210.h5


In [5]:
# Open the H5 file
file = h5py.File(file_path, 'r')

# The following will be needed to run later cells to align event ids
original_keys = list(file.keys()) # the .keys() function lists the labels in a dictionary
original_length = len(original_keys)

#making an array of the lengths of events
event_lens = np.zeros(original_length, int)

In [6]:
#For experimental data, some events in the h5 file might be empty, the following code remove empty events:

#count non-empty events
count = 0
#record non-empty events index
index = np.zeros(original_length, int)

for i in range(original_length):
    event = original_keys[i]
    event_lens[i-count] = len(file[event])
    if event_lens[i-count] < 5:
        count += 1
    else:
        index[i-count] = i
#remove empty event index
original_length = original_length - count


event_lens = event_lens[:original_length]
index = index[:original_length]

np.save(data_raw_where + 'event_lens_old.npy', event_lens)

In [7]:
#making an array of the events data-- [event #, instance, data value]
#length of each event is based on the longest event in dataset
#5th index now corresponds to index of event id in original_keys
# each instance will index according to the following 
# [0]x,[1]y,[2]z, [3]time, [4]Amplitude, [5]Event_id index

event_data = np.zeros((original_length, np.max(event_lens), 6), float) 

j = 0
for n in tqdm.tqdm(index):
    name = original_keys[n]
    event = file[name]
    ev_len = len(event)
    #converting event into an array
    for i,e in enumerate(event):
        instant = np.array(list(e))
        event_data[j][i][:3] = np.array(instant[:3]) #X, Y, Z
        event_data[j][i][3] = np.array(instant[4]) #Amplitude (charge)
        event_data[j][i][4] = ev_len # #points in events 
        event_data[j][i][-1] = int(n) #event Id
    j += 1
np.save(data_raw_where + 'more_than_5_old', event_data)

100%|██████████| 77659/77659 [14:57<00:00, 86.51it/s] 


In [8]:
#Randomly choose sample_size of events
event_lens = np.load(data_raw_where + 'event_lens_old.npy')
data = np.load(data_raw_where + 'more_than_5_old.npy')
#insert desired array to sample from 

new_data = np.zeros((original_length, sample_size, 6), float) 
for i in tqdm.tqdm(range(original_length)):
    instant = 0
    ev_len = event_lens[i]    #length of event-- i.e. number of instances
    random_points = np.random.choice(range(ev_len), sample_size, replace = True if sample_size > ev_len else False)    #choosing the random instances to sample
    for r in random_points:
        new_data[i,instant] = data[i,r]
        instant += 1
np.save(data_sampled_where + str(sample_size)+'_sampled_old', new_data)      #creating new dataset within the h5 file for the event

100%|██████████| 77659/77659 [00:31<00:00, 2470.17it/s]
