# Notebook Dedicated to Construct the Filtered file.dat to Smartly Download RATDS from Grid Server

In [22]:
import numpy as np
import re
import uproot

# Create Resumed Data List to Download from Grid

The idea is to take the file.dat (in txt format) and read all the entries and compare the runID and subrunID with the candidates. Then, only save the lines of file.dat that contains the run and subrun of candidates.

## Filtering Function

In [35]:
def filter_filelist(in_file_dir, out_file_name, candidate_list):
    
    """
    Function designed to read the txt file of the dat file and return
    the filtered lines that corresponde to the run and subrun of the candidates

    Parameters:
    - in_file_dir: Directory and name of the file.dat.txt
    - out_file_name: name of the output file 
    - candidate_list: Set of tuples (runID, subrunID) of the candidates.
    """
    
    # Usamos un set para que la búsqueda sea instantánea O(1)
    candidate_set = set(candidate_list)
    lines_found = 0

    # Expresión regular para capturar run (r...) y subrun (s...)
    # El patrón busca '_r' seguido de dígitos y '_s' seguido de dígitos
    patron = re.compile(r'_r(\d+)_s(\d+)_')

    with open(in_file_dir, 'r') as f_in, open(out_file_name, 'w') as f_out:
        for linea in f_in:
            # El nombre del archivo es la primera columna (separada por tabulador)
            filename = linea.split('\t')[0]
            
            match = patron.search(filename)
            if match:
                # Convertimos a int para ignorar los ceros a la izquierda (0000366261 -> 366261)
                run_id = int(match.group(1))
                subrun_id = int(match.group(2))
                
                # Comprobamos si este par (run, subrun) está en tus candidatos
                if (run_id, subrun_id) in candidate_set:
                    f_out.write(linea)
                    lines_found += 1
                    
    print(f"Proceso finalizado. Se encontraron {lines_found} archivos coincidentes.")

# Load Data: runID and subrunID Candidates

In [30]:
main_dir = '/home/joankl/data/solars/real_data/bisMSB/first_candidates/'

runID_analysis15 = np.load(main_dir + 'analysis15/resume_files/runID.npy')
subrunID_analysis15 = np.load(main_dir + 'analysis15/resume_files/subrunID.npy')
analysis15_candidates = list(zip(runID_analysis15, subrunID_analysis15))

runID_analysis15_bMR = np.load(main_dir + 'analysis15_bMR/resume_files/runID.npy')
subrunID_analysis15_bMR = np.load(main_dir + 'analysis15_bMR/resume_files/subrunID.npy')
analysis15bMR_candidates = list(zip(runID_analysis15_bMR, subrunID_analysis15_bMR))

runID_analysis20_bMR = np.load(main_dir + 'analysis20_bMR/resume_files/runID.npy')
subrunID_analysis20_bMR = np.load(main_dir + 'analysis20_bMR/resume_files/subrunID.npy')
analysis20bMR_candidates = list(zip(runID_analysis20_bMR, subrunID_analysis20_bMR))

## Start Filtering File Names

In [36]:
# Analysis15
in_file_dir = '/home/joankl/jupyter-projects/Solar-Neutrinos-Project/real data analysis/bisMSB/dat_files_for_RATDS/Analysis15_802_Bronze_364311_371216_July2025_ratds_RAL_updatedSept25.dat.txt'
out_file_name = 'Analysis15_802_Bronze_364311_371216_July2025_ratds_RAL_updatedSept25_solar_filtered.dat.txt'
filter_filelist(in_file_dir, out_file_name, analysis15_candidates)

Proceso finalizado. Se encontraron 3895 archivos coincidentes.


In [45]:
subrunID_analysis15[np.where(runID_analysis15 == 366266)[0][0]]

np.float64(6.0)

In [40]:
min(subrunID_analysis15)

np.float64(0.0)

In [11]:
fdir = '/home/joankl/data/solars/real_data/bisMSB/first_candidates/ratds/out.root'

file = uproot.open(fdir)
data = file['T;1']

In [12]:
file.keys()

['T;1', 'runT;1', 'log;1', 'macro;1', 'db;1', 'meta;1']

In [13]:
data.keys()

['ds',
 'ds/TObject',
 'ds/TObject/fUniqueID',
 'ds/TObject/fBits',
 'ds/mc',
 'ds/mc/mc.fUniqueID',
 'ds/mc/mc.fBits',
 'ds/mc/mc.particles',
 'ds/mc/mc.parents',
 'ds/mc/mc.tracks',
 'ds/mc/mc.trackIDs',
 'ds/mc/mc.pmts',
 'ds/mc/mc.unbuiltHits.fUniqueID',
 'ds/mc/mc.unbuiltHits.fBits',
 'ds/mc/mc.unbuiltHits.normal',
 'ds/mc/mc.unbuiltHits.inward',
 'ds/mc/mc.unbuiltHits.owl',
 'ds/mc/mc.unbuiltHits.lowGain',
 'ds/mc/mc.unbuiltHits.butt',
 'ds/mc/mc.unbuiltHits.neck',
 'ds/mc/mc.unbuiltHits.fecd',
 'ds/mc/mc.unbuiltHits.spare',
 'ds/mc/mc.unbuiltHits.hqe',
 'ds/mc/mc.unbuiltHits.invalid',
 'ds/mc/mc.universalTime.fUniqueID',
 'ds/mc/mc.universalTime.fBits',
 'ds/mc/mc.universalTime.days',
 'ds/mc/mc.universalTime.seconds',
 'ds/mc/mc.universalTime.nanoSeconds',
 'ds/mc/mc.mcTime',
 'ds/mc/mc.mcid',
 'ds/mc/mc.mcPECount',
 'ds/mc/mc.nCherPhotons',
 'ds/mc/mc.nScintPhotons',
 'ds/mc/mc.nReemittedPhotons',
 'ds/mc/mc.numDirPE',
 'ds/mc/mc.intialScintTime',
 'ds/mc/mc.scintEnergyDeposit

In [16]:
gtid = np.array(data['ds/evs/evs.gtid'])
runID = np.array(data['ds/runID'])
runID

array([355159], dtype=uint32)