# Notebook Dedicated to create the list of candidates (GTID + runID + subrunID)

In [1]:
import numpy as np
import glob
import pandas as pd
import seaborn as sn

# Load Data

## Load all Data Separately from Analysis15, Analysis15_bMR and Analysis20_bMR

In [18]:
main_dir = '/home/joankl/data/solars/real_data/bisMSB/first_candidates/'

runID_analysis15 = np.load(main_dir + 'analysis15/resume_files/runID.npy')
subrunID_analysis15 = np.load(main_dir + 'analysis15/resume_files/subrunID.npy')

runID_analysis15_bMR = np.load(main_dir + 'analysis15_bMR/resume_files/runID.npy')
subrunID_analysis15_bMR = np.load(main_dir + 'analysis15_bMR/resume_files/subrunID.npy')

runID_analysis20_bMR = np.load(main_dir + 'analysis20_bMR/resume_files/runID.npy')
subrunID_analysis20_bMR = np.load(main_dir + 'analysis20_bMR/resume_files/subrunID.npy')

print(f'for Analysis15, the run ID range is: [{min(runID_analysis15)}, {max(runID_analysis15)}]')
print(f'for Analysis15_bMR, the run ID range is: [{min(runID_analysis15_bMR)}, {max(runID_analysis15_bMR)}]')
print(f'for Analysis20_bMR, the run ID range is: [{min(runID_analysis20_bMR)}, {max(runID_analysis20_bMR)}]')

for Analysis15, the run ID range is: [364500.0, 371216.0]
for Analysis15_bMR, the run ID range is: [358084.0, 364307.0]
for Analysis20_bMR, the run ID range is: [354099.0, 358072.0]


## Load all Data Simultaneously

In [2]:
# Observabels to load
obs_list = ['energy_corrected', 'eventID', 'runID', 'subrunID']

# Define Directories
pattern_dir = '/home/joankl/data/solars/real_data/bisMSB/first_candidates/analysis*/resume_files/'
full_fdir = glob.glob(pattern_dir)

# Create empty dictionary to save the data
obs_dict = {var: np.array([]) for var in obs_list}

# Loop over the pattern dir:
for fdir_i in full_fdir:
    # Loop on the obs_list
    for obs_i in obs_list:
        obs_arr = np.load(fdir_i + obs_i + '.npy')

        #Save the observables
        obs_dict[obs_i] = np.append(obs_dict[obs_i], obs_arr)

# Extract data of interest and apply cuts
energy = obs_dict['energy_corrected']

en_cut = 5.0

condition = (energy >= en_cut)

energy = obs_dict['energy_corrected'][condition]
eventID = obs_dict['eventID'][condition]
runID = obs_dict['runID'][condition]
subrunID = obs_dict['subrunID'][condition]

In [9]:
max(runID)

np.float64(371216.0)

In [7]:
min(runID)

np.float64(354099.0)

In [20]:
subrunID

array([12., 11., 11.,  5., 10.,  4.,  1.,  5.,  2.,  9., 11.,  2.,  5.,
        5., 16.,  8.,  1.,  2.,  1., 15., 12.,  3.,  6., 10., 14., 10.,
        9., 15., 12.,  1., 11.,  7., 12.,  6.,  2.,  1., 11., 14., 15.,
       13.,  4.,  8., 12.,  4.,  4.,  4., 10., 12.,  5.,  3.,  4.,  1.,
        2.,  9.,  3.,  0.,  8.,  3., 13.,  3., 11., 11.,  3.,  2.,  2.,
       11., 14., 14., 12.,  4., 11.,  5.,  9., 16.,  8., 11.,  4., 14.,
        7.,  5., 11.,  3., 14.,  1., 11.,  3.,  7.,  5.,  3.,  0.,  9.,
        9.,  1.,  5.,  2.,  4.,  4., 13.,  9.,  3.,  9., 11.,  7., 13.,
        8.,  3.,  5.,  6., 14.,  2.,  7.,  8.,  2., 10.,  1.,  7.,  7.,
        5.,  1.,  6.,  9., 11.,  2., 16.,  9., 14., 10.,  2.,  6., 11.,
        7.,  7., 12.,  9., 16.,  7.,  0.,  8., 14., 12.,  9.,  9., 10.,
       16., 10., 12.,  5.,  3.,  3., 14.,  5.,  0., 11.,  1.,  1.,  7.,
        8.,  1.,  9., 11.,  3.,  1.,  5.,  8.,  1., 10., 14.,  7., 14.,
       14., 16.,  4., 13.,  3., 12., 16., 15.,  9.,  8.,  6.,  5

# Create the Excel Folder

In [14]:
f_outname = f'filtered_solar_analysis_E_cut_{en_cut}_MeV_R_cut_5500_mm'  # Output file name

df = pd.DataFrame({'eventID': np.array(eventID, dtype = np.int64),
                   'runID': runID,
                   'subrunID': subrunID})

df.to_excel(f_outname + '.xlsx', index = False)

# Create txt list

In [17]:
data = np.column_stack((runID, eventID))
np.savetxt(f"filtered_solar_analysis_E_cut_{en_cut}_MeV_R_cut_5500_mm.txt", data,
           fmt="%d, %d",          # enteros
           delimiter=", ",
           comments="")