# Notebook dedicated to compute geometric observables that discriminate directional patterns using hit spatial distribution

Ideas: Centroid, Inertia Tensor.

In [1]:
import numpy as np
import pandas as pd
import random 

import glob
import re
import os

import seaborn as sn
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm

# Load Numpy files

In [2]:
# ============ Data Cuts Settings ============
energy_inf_cut = 2.5
energy_sup_cut = 12

posr_cut = 5500

time_res_inf_cut = -1
time_res_sup_cut = 5

nhits_cut = 20 #Nhits cut to be applied after the time residual cuts

In [20]:
main_dir = f'E:/Data/solars/mc/ML Data/np_arrays/Train/E_{energy_inf_cut}_{energy_sup_cut}_MeV_R_{posr_cut}_mm_Delta_{time_res_inf_cut}_{time_res_sup_cut}_ns/'
solar_fpattern = 'SolarNue_'
tl208_fpattern = '208Tl_'

observable_name = ['evtid', 'energy', 'posr', 'cos_alpha', 'hit_residual', 'hit_pmtQHS', 'position', 'hitpmt_xyz']

solar_dict = {var_i: np.array([]) for var_i in observable_name}
tl208_dict = {var_i: np.array([]) for var_i in observable_name}

#Load each varibles within a loop of variables and fill the dictionaries
#Loop over var names
for var_i in observable_name:
    #create flist for the multiple var_i observables (exm: evID_0, evID_1, ...)
    solar_var_i_flist = glob.glob(main_dir + solar_fpattern + var_i + '*.npy')
    tl208_var_i_flist = glob.glob(main_dir + tl208_fpattern + var_i + '*.npy')

    #Loop over flist:
    for solar_file_i in solar_var_i_flist:
        load_solar_var_i = np.load(solar_file_i)
        solar_dict[var_i] = np.append(solar_dict[var_i], load_solar_var_i)

    for tl208_file_i in tl208_var_i_flist:
        load_208tl_var_i = np.load(tl208_file_i)
        tl208_dict[var_i] = np.append(tl208_dict[var_i], load_208tl_var_i)

# Reshape the coordinate geometric observables
solar_dict['position'] = solar_dict['position'].reshape(-1, 3)
solar_dict['hitpmt_xyz'] = solar_dict['hitpmt_xyz'].reshape(-1, 3)

tl208_dict['position'] = tl208_dict['position'].reshape(-1, 3)
tl208_dict['hitpmt_xyz'] = tl208_dict['hitpmt_xyz'].reshape(-1, 3)

In [21]:
max(tl208_dict['energy'])

5.350650787353516

## Remove Redundant Values

In [22]:
# List of Observables
nonper_hit_observables_name = ['evtid', 'posr', 'energy', 'position'] 
per_hit_observables = ['cos_alpha', 'hit_residual', 'hit_pmtQHS', 'hitpmt_xyz']


# ============= Solar Processing =============

#Extract the redundant evID
solar_evID = solar_dict['evtid']

# Empty lists which will contain the nonper-hit observables
solar_dict_ev = {var_i: [] for var_i in nonper_hit_observables_name + per_hit_observables}

#evID index break list
solar_data_break_i = [0]  # array whose elements are the index from where an event start and end.

solar_N_data = len(solar_evID)

print('Looking for break index in solars evID')
for i_dx in range(solar_N_data - 1):
    if solar_evID[i_dx] != solar_evID[i_dx+1]:
        solar_data_break_i.append(i_dx+1)

#add the last index to complete the data_break_i list
solar_data_break_i.append(solar_N_data - 1)

N_solar_terms = len(solar_data_break_i)

#Start to save the non-redundant (non-per-hit) values and separate the time residuals and angles for each event
#Solar Loop
print(f'saving the observables for solars')
for i_dx in range(N_solar_terms - 1):
    init_i = solar_data_break_i[i_dx]
    final_i = solar_data_break_i[i_dx+1]
    
    for var_i in nonper_hit_observables_name:
        nonper_hit_var_i_value = solar_dict[var_i][init_i : final_i][0]
        solar_dict_ev[var_i].append(nonper_hit_var_i_value)

    for var_i in per_hit_observables:
        per_hit_var_i_values = solar_dict[var_i][init_i : final_i]
        solar_dict_ev[var_i].append(per_hit_var_i_values)

#Transform to array
print('Transforming solars to np.array')
for var_i in nonper_hit_observables_name:
    print(f'transforming to np.array {var_i}')
    solar_dict_ev[var_i] = np.array(solar_dict_ev[var_i])

Looking for break index in solars evID
saving the observables for solars
Transforming solars to np.array
transforming to np.array evtid
transforming to np.array posr
transforming to np.array energy
transforming to np.array position


In [23]:
# ============= Tl208 Processing =============

#Extract the redundant evID
tl208_evID = tl208_dict['evtid']

# Empty lists which will contain the nonper-hit observables
tl208_dict_ev = {var_i: [] for var_i in nonper_hit_observables_name + per_hit_observables}

#evID index break list
tl208_data_break_i = [0]  # array whose elements are the index from where an event start and end.

tl208_N_data = len(tl208_evID)

print('Looking for break index in Tl208 evID')
for i_dx in range(tl208_N_data - 1):
    if tl208_evID[i_dx] != tl208_evID[i_dx+1]:
        tl208_data_break_i.append(i_dx+1)

#add the last index to complete the data_break_i list
tl208_data_break_i.append(tl208_N_data - 1)

N_tl208_terms = len(tl208_data_break_i)

#Start to save the non-per-hit values and separate the time residuals and angles for each event
# Loop on Events
print(f'saving the observables for Tl208')
for i_dx in range(N_tl208_terms - 1):
    init_i = tl208_data_break_i[i_dx]
    final_i = tl208_data_break_i[i_dx+1]
    
    for var_i in nonper_hit_observables_name:
        nonper_hit_var_i_value = tl208_dict[var_i][init_i : final_i][0]
        tl208_dict_ev[var_i].append(nonper_hit_var_i_value)

    for var_i in per_hit_observables:
        per_hit_var_i_values = tl208_dict[var_i][init_i : final_i]
        tl208_dict_ev[var_i].append(per_hit_var_i_values)

#Transform to array
print('Transforming Tl208 to np.array')
for var_i in nonper_hit_observables_name:
    print(f'transforming to np.array {var_i}')
    tl208_dict_ev[var_i] = np.array(tl208_dict_ev[var_i])

Looking for break index in Tl208 evID
saving the observables for Tl208
Transforming Tl208 to np.array
transforming to np.array evtid
transforming to np.array posr
transforming to np.array energy
transforming to np.array position


# Test with Geometric-Charge Weighted Observables 

## Centroid
The idea is to find the center of mass (CM) coordinates of the PMTs taking the charge as the mass. Then, build a direction vector using the CM and the reconstructed position. It can be used to:

- Compute the modulus of the vector $|D|$. If $|D| \sim 0$ the event is more isotropic and is a probable background;
- Compare the direction of $D$ and evaluates if it matches with the Sun direction for solar events. For example, the scalar product $D \cdot v_{sun}$ should be close to one if $D$ describes the event direction;

## Inertia Tensor

The idea is compute the matrix elements of the Inertia Tensor (IT) using the hit PMT coordinates the charge as the masses of the PMTs. Then, compute the principal values of inertia that should describe the hit pattern charged weighted distribution and may distinguish directional events from isotropic events.