This notebook computed the reconstruction efficiencies and prepares the datasets for further analysis.

In [86]:
import os
import glob
import numpy  as np
import pandas as pd

from invisible_cities.evm.mixer            import get_file_number
from invisible_cities.io.dst_io            import load_dst
from invisible_cities.database.load_db     import RadioactivityData

In [104]:
# cut on event energy. 
# It is used in both nexus efficiency (ie truth energy deposition) 
# and reconstructed energy after the IC processing
Emin, Emax = 2.4, 2.5

# isotopes and path of the production 
# the assumed structure for nexus and IC are: path/g4volume/nexus and path/g4volume/prod/city
isotopes = ["214Bi", "208Tl", "0nubb", "muons"]
path = os.path.expandvars("$LUSTRE/NEXT100/{isotope}/")

efficiencies_outfile = f"eficiencies_{Emin}_{Emax}_MeV.csv"
tracks_outfile       = f"tracks_{Emin}_{Emax}_MeV.h5"

## Check reconstruction chain 1-1 correspondence

In [None]:
for isotope in isotopes:
    g4volumes = os.listdir(path.format(isotope=isotope))
    for g4volume in g4volumes:
        nexus_filenames = glob.glob( os.path.join(path.format(isotope=isotope)
                                   , f"{g4volume}/nexus/nexus_*.h5"))
    
        isaura_filenames = glob.glob( os.path.join(path.format(isotope=isotope)
                                   , f"{g4volume}/prod/isaura/isaura_*.h5"))
        
        nexus_file_n  = [get_file_number(filename) for filename in nexus_filenames]
        isaura_file_n = [get_file_number(filename) for filename in isaura_filenames]
        
        diff = np.setxor1d(nexus_file_n, isaura_file_n)
        
        if (len(diff) != 0):
            msg = f"({isotope}, {g4volume}): not 1-1 correspondence for file numbers" + str(diff)
            raise Exception(msg)

## MC Efficiencies

In [None]:
eff = pd.DataFrame()
for isotope in isotopes:
    
    print("Processing", isotope)
    
    g4volumes = os.listdir(path.format(isotope=isotope))
    for i, g4volume in enumerate(g4volumes, 1):

        print("Processed", f"{i}/{len(g4volumes)}".ljust(30), end="\r")
        
        filenames = glob.glob( os.path.join(path.format(isotope=isotope)
                             , f"{g4volume}/nexus/nexus_*.h5"))
        nsim   = 0
        nsaved = 0
        for filename in filenames:
            configuration = load_dst(filename, "MC", "configuration").set_index("param_key")
            hits          = load_dst(filename, "MC",          "hits")

            mc_energy = hits[hits.label == "ACTIVE"].groupby("event_id").energy.sum()

            nsim += int(configuration.loc["num_events"].param_value)
            sel = mc_energy.between(Emin, Emax)
            nsaved += sel.sum()

        eff.loc[len(eff), ("Isotope", "G4Volume", "nsim", "nsaved")] = (isotope, g4volume, nsim, nsaved)

## IC efficiencies

In [None]:
eff = eff.set_index(["Isotope", "G4Volume"])

for isotope in isotopes:
    
    print("Processing", isotope)
    
    g4volumes = os.listdir(path.format(isotope=isotope))
    for i, g4volume in enumerate(g4volumes, 1):

        print("Processed", f"{i}/{len(g4volumes)}".ljust(30), end="\r")

        filenames = glob.glob( os.path.join(path.format(isotope=isotope)
                             , f"{g4volume}/prod/isaura/isaura_*.h5"))
        npass = 0
        for filename in filenames:
            eventMap      = load_dst(filename,      "Run", "eventMap").set_index("nexus_evt")
            hits          = load_dst(filename,       "MC",     "hits")
            try:   tracks = load_dst(filename, "Tracking",   "Tracks").set_index("event")
            except:continue
            mc_energy = hits[hits.label == "ACTIVE"].groupby("event_id").energy.sum()
            sel = mc_energy.between(Emin, Emax)
            npass += len(np.unique(tracks.index.intersection(eventMap.loc[sel].evt_number.values)))

        eff.loc[(isotope, g4volume), "npass"] = npass

Processed 1/20                          

In [148]:
eff.to_csv(efficiencies_outfile, index=False)

## Create analysis file

In [96]:
def load_tracks_with_info(path, isotope, Emin=2.4, Emax=2.5):
    
    tracks = pd.DataFrame()
    
    g4volumes = os.listdir(path.format(isotope=isotope))
    for i, g4volume in enumerate(g4volumes, 1):
        
        proc = f"Processing volume: {i} / {len(g4volumes)}."
        print(proc.ljust(100), end="\r")
              
        filenames = glob.glob( os.path.join(path.format(isotope=isotope)
                             , f"{g4volume}/prod/isaura/isaura_*.h5"))

        for f, filename in enumerate(filenames, 1):

            proc = f"(Volume {i} / {len(g4volumes)}) Processed files: {f} / {len(filenames)}"
            print(proc.ljust(100), end="\r")

            try:
                tracks_ = pd.read_hdf(filename, f"Tracking/Tracks").set_index("event")
            except KeyError:
                print(filename)
                continue
            # energy cut
            energies = tracks_.groupby(level=0).energy.sum()
            sel = (Emin <= energies) & (energies <= Emax)
            tracks_ = tracks_.loc[sel].reset_index()
            tracks_["G4Volume"] = g4volume
            tracks_["file"]     = get_file_number(filename)
            tracks = pd.concat([tracks, tracks_])
    return tracks

In [107]:
for isotope in isotopes:
    tracks = load_tracks_with_info(path, isotope, Emin=2.4, Emax=2.5)
    tracks.to_hdf(tracks_outfile, isotope, index=False)