# Background

The experiments record the trajectories (a series of x,y,z coordinates, with associated amplitude of each point) of products resulting from collision/reaction between Mg22 and alpha particles in ATTPC.

# User-Desired Settings

The isotope used in this experiment is Mg22.

In [None]:
ISOTOPE = 'Fission'

For each point, high amplitude (of voltage?) suggests high quality, whereas low amplitude suggests the detection may result from noise. We set the lowest amplitude for the point to be incorporated into our analysis as 50.

In [None]:
amp_threshold = 0

The neural network model requires a fixed number of inputs. Whereas the actual events comprise different number of points, we will select exactly 512 points (may be redundant) as final inputs of each event.

In [None]:
sample_size = 512

We create a folder named "test" to store the outputs.

In [None]:
dir_name = 'fission_data/'

# Data Processing

## Import Libraries

In [None]:
import h5py
import numpy as np
import tqdm
import math
import random
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.colors import LogNorm
from mpl_toolkits import mplot3d

## Import Data

In [None]:
path = dir_name+'Bi200.h5'
file = h5py.File(path, 'r')
file = file
event_ids = list(file.keys())

Make a 1d array of event lengths (number of points in each event).

In [None]:
num_of_event = len(event_ids) # 4330
ev_lens = np.zeros(num_of_event, int)
for i in range(num_of_event):
    event_id = event_ids[i]
    event = file[event_id]['HitArray']
    ev_lens[i] = len(file[event_id]['HitArray'])

evlen_path = dir_name + ISOTOPE + '_exp_A0_ev_lens'
np.save(evlen_path, ev_lens)

Make a 3d array of data.\
axis 0 (size 4330): events \
axis 1 (size 1852): points within each event\
axis 2 (size 4): x[0], y[1], z[2], amplitude[3] of each point

In [None]:
max_ev_len = np.max(ev_lens)  # 1312
data = np.zeros((num_of_event, max_ev_len, 4), float) 
for n in tqdm.tqdm(range(num_of_event)):
    event_id = event_ids[n]
    event = file[event_id]['HitArray']
    #converting event into an array
    for i,e in enumerate(event):
        instant = np.array(list(e))
        data[n][i][0:3] = np.array(instant[0:3]) # x,y,z
        data[n][i][3] = np.array(instant[4]) # amplitude

data_path = dir_name + ISOTOPE + '_exp_A0_XYZC'  
np.save(data_path, data)

When running this notebook the second time, simply reload the data (instead of spending 10 min to repeat the step above).

In [None]:
evlen_path = dir_name + ISOTOPE + '_exp_A0_ev_lens.npy'
data_path = dir_name + ISOTOPE + '_exp_A0_XYZC.npy'
ev_lens = np.load(evlen_path)
data = np.load(data_path)
num_of_event = len(data)
max_ev_len = len(data[0])

In [None]:
null_ind = np.zeros(num_of_event, int)
nullInd_path = dir_name + ISOTOPE + '_exp_nullInd'
np.save(nullInd_path, null_ind)

### Plot distributions

In [None]:
# Calculate total number of hits
count = sum(ev_lens)

# Initialize arrays for x, y, and amplitude (charge) data
x_data = np.zeros(count, float)
y_data = np.zeros(count, float)
a_data = np.zeros(count, float)

# Fill the data arrays
count = 0
for i in range(num_of_event):
    for j in range(ev_lens[i]):
        x_data[count] = data[i][j][0]
        y_data[count] = data[i][j][1]
        a_data[count] = data[i][j][3]
        count += 1
a_data = np.where(a_data > 0, a_data, 1)  
a_data = np.where(a_data < 10000, a_data, 10000)  
# log_a_data = np.log10(a_data)        
        
# Plot the 2D histogram for (x, y)
plt.figure(figsize=(6, 5))
plt.hist2d(x_data, y_data, bins=(100, 100), range=[[-300, 300], [-300, 300]], norm=LogNorm())
plt.xlabel('x')
plt.ylabel('y')
plt.title('Distribution of (x, y)')
plt.colorbar()
plt.show()

# Plot the histogram for amplitude
plt.figure(figsize=(6, 3))
plt.hist(a_data, bins=100, density=True)
plt.ylabel('Count')
plt.xlabel('Amplitude')
plt.title('Distribution of Amplitude')
plt.show()

### Plot Events

This plotting function is used for four datasets in this notebook: \
mode = 0: raw data\
mode = 1: null vs no-null data\
mode = 2: no-null data

In [None]:
palette = ['k', 'c']
def plot_events(evlen_path, data_path, nullInd_path, amp_threshold, mode, rows):
    ev_lens = np.load(evlen_path)
    data =  np.load(data_path) 
    null_inds = np.load(nullInd_path)
    
    fig = plt.figure(figsize=(17,rows*4))
    
    for n in range(rows*5):
        ax = fig.add_subplot(rows, 5, n+1, projection='3d')
        ev_len = ev_lens[n]
        evt = data[n,:ev_len,:]

        pt_count = 0
        wedge_pt_count = 0
        
        for i,e in enumerate(evt):
            x = e[0] #get x value of point
            y = e[1] #get y value of point
            z = e[2] #get z value of point
            a = e[3] #get amplitude of point
            if mode == 0:
                if a >= amp_threshold:
                    color = palette[0]
                    pt_count += 1
                    if (-130 < x < 10 and -10 < y < 130 and (-x/3) < y < (-3*x)):
                        wedge_pt_count += 1
                else:
                    color = palette[1]
            else:
                color = palette[0]
            ax.scatter3D(x,y,z, color = color, s = 1)

        ax.set_xlabel('x')
        ax.set_ylabel('y')
        ax.set_zlabel('z')
            
        if mode == 0:
            high_amp = patches.Patch(color=palette[0], label = 'A>='+str(amp_threshold))
            low_amp = patches.Patch(color=palette[1], label = 'A<'+str(amp_threshold))
            plt.legend(handles=[high_amp, low_amp], fontsize='small')
            plt.title('Event {} \n'.format(n) + str(wedge_pt_count) + ' / ' + str(pt_count) + ' / ' + str(ev_len) + ' points')
        elif mode == 1:
            if (null_inds[n] == 1):
                plt.title('Event {} \n'.format(n) + 'Null')
            else:
                plt.title('Event {} \n'.format(n) + str(ev_len) + ' points')
        elif mode == 2 or mode == 4:
            plt.title('Event {} \n'.format(int(data[n,0,3])) + str(ev_len) + ' points')
            
        n += 1

The subplots below display the raw xyz data. (mode = 0.)\
The three numbers are (1) the number of points with amplitudes higher than the user-desired threshold; (2) the number of points detected in the "wedge" region; (3) the total number of poitns.

In [None]:
evlen_path = dir_name + ISOTOPE + '_exp_A0_ev_lens.npy'
data_path = dir_name + ISOTOPE + '_exp_A0_XYZC.npy'
nullInd_path = dir_name + ISOTOPE + '_exp_nullInd.npy'
plot_events(evlen_path, data_path, nullInd_path, amp_threshold, 2, 2)

## Sample Points

In [None]:
evlen_path = dir_name + ISOTOPE + '_exp_A0_ev_lens.npy'
data_path = dir_name + ISOTOPE + '_exp_A0_XYZC.npy'
data_noNull = np.load(data_path)
num_of_event = len(data_noNull)
max_ev_len = len(data_noNull[0])
ev_lens = np.load(evlen_path)
data_sampled = np.zeros((num_of_event, sample_size, 4), float) #XYZC

for n in tqdm.tqdm(range(num_of_event)):
    ev_len = ev_lens[n]
    if ev_len >= sample_size:
        data_sampled[n,:sample_size,:] = data_noNull[n,:sample_size,:]
    else:
        data_sampled[n,:ev_len,:] = data_noNull[n,:ev_len,:]
        need = sample_size - ev_len
        random_points = np.random.choice(range(ev_len), need, replace=True if need > ev_len else False) 
        instant = ev_len
        for r in random_points:
            data_sampled[n,instant,:] = data_noNull[n,r,:] 
            instant += 1

data_path = dir_name + ISOTOPE + '_exp_A'+ str(amp_threshold) + '_noNull_size' + str(sample_size) + '_XYZC'
np.save(data_path, data_sampled)

## Normalize x, y, z, a

In [None]:
data_path = dir_name + ISOTOPE + '_exp_A' + str(amp_threshold) + '_noNull_size' + str(sample_size) + '_XYZC.npy'
data_sampled = np.load(data_path)
data_scaled = np.copy(data_sampled)


data_sampled[:,:,3] = np.where(data_sampled[:,:,3] > 0, data_sampled[:,:,3], 1)
data_sampled[:,:,3] = np.where(data_sampled[:,:,3] < 10000, data_sampled[:,:,3], 10000)   
data_scaled[:,:,3] = np.log10(data_sampled[:,:,3])

# Standardize each feature
for n in range(3):
    if n == 0 or n == 1:
        data_scaled[:,:,n] /= 250
    else:
        data_scaled[:,:,n] = data_scaled[:,:,n]/500 + 1

# Path to save the scaled data
data_path_scaled = dir_name + ISOTOPE + '_exp_A' + str(amp_threshold) + '_noNull_size' + str(sample_size) + '_scaled_XYZC.npy'
np.save(data_path_scaled, data_scaled)


The subplots below display normalized no-null events. (mode = 2.)

In [None]:
data_path = dir_name + ISOTOPE + '_exp_A'+ str(amp_threshold) + '_noNull_size' + str(sample_size) + '_scaled_XYZC.npy'
evlen_path = dir_name + ISOTOPE + '_exp_A0_ev_lens.npy'
nullInd_path = dir_name + ISOTOPE + '_exp_nullInd.npy'
plot_events(evlen_path, data_path, nullInd_path, amp_threshold_scaled, 2, 2)