In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pickle
import os, sys
import numpy as np
import matplotlib as mat
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.colors as colors
import matplotlib.gridspec as gridspec
import pandas as pd
import tables
import h5py
import math
from scipy.stats import mstats
import matplotlib as mpl
import matplotlib.font_manager as font_manager


In [4]:
sys.path.append('/data/user/tvaneede/GlobalFit/reco_processing/notebooks/weighting')

from datasets import datasets

Check if Neha's Nue, NuMu and NuTau files are the sum of all her files. ALLHESE, no 60 TeV cut!

Conclusion: No they are not! I am trusting the separate track, cascade, double cascade files

In [5]:
flavor_datasets_spice = {
    "NuE" : ["22046", "22047", "22082", "22083"],
    "NuMu" : ["22043", "22044", "22079", "22080"],
    "NuTau" : ["22049", "22050", "22085", "22086"],
    
}

In [7]:
hdf_path = "/data/ana/Diffuse/GlobalFit_Flavor/taupede/SnowStorm/RecowithBfr/Baseline/hdf_files/NoDeepCore/AllHESE/"

for flavor in ["NuE", "NuMu", "NuTau"]:

    print(20*"-")

    hdf_total = pd.HDFStore( f"{hdf_path}/{flavor}.hdf5",'r')

    hdfs = {}

    total = 0

    print(f"checking for flavor {flavor}")
    print( "Combined hdf", len(hdf_total["I3MCWeightDict"]) )

    for filetype in flavor_datasets_spice[flavor]:
        for channel in ["Tracks", "Cascades", "DoubleCascades"]:
            name = f"{filetype}_{channel}"
            hdfs[name] = pd.HDFStore( f"{hdf_path}/{name}.hdf5",'r')
            nevts = len( hdfs[name]["I3MCWeightDict"] )
            total += nevts
            # print( filetype, channel, nevts )

    print("summed", total)


--------------------
checking for flavor NuE
Combined hdf 45861
summed 65680
--------------------
checking for flavor NuMu
Combined hdf 32728
summed 46848
--------------------
checking for flavor NuTau
Combined hdf 40816
summed 58597


Check if Neha's 60 TeV cut files could be manually obtained from her ALL files

Conclusion: yes that works!

In [8]:
hdf_path_all = "/data/ana/Diffuse/GlobalFit_Flavor/taupede/SnowStorm/RecowithBfr/Baseline/hdf_files/NoDeepCore/AllHESE/"
hdf_path_60TeV = "/data/ana/Diffuse/GlobalFit_Flavor/taupede/SnowStorm/RecowithBfr/Baseline/hdf_files/NoDeepCore/"

for flavor in ["NuE", "NuMu", "NuTau"]:

    print(20*"-")

    hdfs = {}

    total_all = 0
    total_60TeV = 0
    total_all_cut_60TeV = 0

    print(f"checking for flavor {flavor}")

    for filetype in flavor_datasets_spice[flavor]:
        for channel in ["Tracks", "Cascades", "DoubleCascades"]:
            name = f"{filetype}_{channel}"

            # all
            hdf = pd.HDFStore( f"{hdf_path_all}/{name}.hdf5",'r')
            nevts = len( hdf["I3MCWeightDict"] )
            total_all += nevts

            # apply mask
            mask = ( hdf["RecoETot"].value > 60e3 )
            nevts = len( hdf["I3MCWeightDict"][mask] )
            total_all_cut_60TeV += nevts

            # 60 TeV files
            hdf = pd.HDFStore( f"{hdf_path_60TeV}/{name}.hdf5",'r')
            nevts = len( hdf["I3MCWeightDict"] )
            total_60TeV += nevts

    print("total_all", total_all)
    print("total_60TeV", total_60TeV)
    print("total_all_cut_60TeV", total_all_cut_60TeV)



--------------------
checking for flavor NuE
total_all 65680
total_60TeV 61716
total_all_cut_60TeV 61716
--------------------
checking for flavor NuMu
total_all 46848
total_60TeV 44388
total_all_cut_60TeV 44388
--------------------
checking for flavor NuTau
total_all 58597
total_60TeV 56249
total_all_cut_60TeV 56249


Now lets check the h5 files that I made from Neha's reco files. See if my sum is the same as the combined datasets. Then also compare with Neha.

Conclusion: yes it matches Neha. Now I can use my own hdf5 files that I created.

In [9]:
hdf_path = "/data/user/tvaneede/GlobalFit/reco_processing/hdf/output/spice_tau_reco/"

for flavor in ["NuE", "NuMu", "NuTau"]:

    print(20*"-")

    hdf_total = pd.HDFStore( f"{hdf_path}/{flavor}.h5",'r')

    hdfs = {}

    total = 0

    print(f"checking for flavor {flavor}")
    print( "Combined hdf", len(hdf_total["I3MCWeightDict"]) )

    mask = hdf_total["RecoETot"].value > 60e3
    print( "60 TeV", len(hdf_total["I3MCWeightDict"][mask]) )


    for filetype in flavor_datasets_spice[flavor]:
        name = f"{flavor}_{filetype}"
        hdfs[name] = pd.HDFStore( f"{hdf_path}/{name}.h5",'r')
        nevts = len( hdfs[name]["I3MCWeightDict"] )
        total += nevts
        # print( filetype, channel, nevts )

    print("summed", total)


--------------------
checking for flavor NuE
Combined hdf 65680
60 TeV 61716
summed 65680
--------------------
checking for flavor NuMu
Combined hdf 46848
60 TeV 44388
summed 46848
--------------------
checking for flavor NuTau
Combined hdf 58597
60 TeV 56249
summed 58597


Now lets check the muons, first from Neha

In [10]:
flavor_datasets_spice["MuonGun"] = ["21315", "21316", "21317"]

hdf_path = "/data/ana/Diffuse/GlobalFit_Flavor/taupede/MuonGun/RecowithBfr/hdf_files/NoDeepCore/"

for flavor in ["MuonGun"]:

    print(20*"-")

    hdfs = {}

    total = 0

    print(f"checking for flavor {flavor}")

    for filetype in flavor_datasets_spice[flavor]:
        for channel in ["Tracks", "Cascades", "DoubleCascades"]:
            name = f"{filetype}_{channel}"
            hdfs[name] = pd.HDFStore( f"{hdf_path}/{name}.hdf5",'r')
            try:
                nevts = len( hdfs[name]["RecoL"] )
            except:
                nevts = 0
            total += nevts
            print( filetype, channel, nevts )

    print("summed", total)

--------------------
checking for flavor MuonGun
21315 Tracks 19
21315 Cascades 0
21315 DoubleCascades 0
21316 Tracks 1
21316 Cascades 0
21316 DoubleCascades 0
21317 Tracks 0
21317 Cascades 0
21317 DoubleCascades 0
summed 20


Now lets check my Muon Files

In [11]:
hdf_path = "/data/user/tvaneede/GlobalFit/reco_processing/hdf/output/spice_tau_reco"

for flavor in ["MuonGun"]:

    print(20*"-")

    hdfs = {}

    total = 0

    print(f"checking for flavor {flavor}")

    for filetype in flavor_datasets_spice[flavor]:
        name = f"{flavor}_{filetype}"
        hdfs[name] = pd.HDFStore( f"{hdf_path}/{name}.h5",'r')
        try:
            nevts = len( hdfs[name]["RecoL"] )
        except:
            nevts = 0
        total += nevts
        print( filetype, nevts )

    print("summed", total)

--------------------
checking for flavor MuonGun
21315 0
21316 0
21317 0
summed 0


Lets make a table of the number of Neha's events for each flavor.

In [12]:
hdf_path = "/data/user/tvaneede/GlobalFit/reco_processing/hdf/output/spice_tau_reco/"

# Table header
print(f"{'Flavor':<6} | {'HESE':>10} | {'RecoETot > 60 TeV':>18}")
print("-" * 40)

for flavor in ["NuE", "NuMu", "NuTau"]:
    hdf_total = pd.HDFStore(f"{hdf_path}/{flavor}.h5", 'r')
    
    total_count = len(hdf_total["I3MCWeightDict"])
    mask = hdf_total["RecoETot"].value > 60e3
    masked_count = len(hdf_total["I3MCWeightDict"][mask])
    
    print(f"{flavor:<6} | {total_count:>10} | {masked_count:>18}")
    
    hdf_total.close()


Flavor |       HESE |  RecoETot > 60 TeV
----------------------------------------
NuE    |      65680 |              61716
NuMu   |      46848 |              44388
NuTau  |      58597 |              56249


Now lets do the snowstorm simulations for Neha

In [13]:
flavor_datasets_spice_ensemble = {
    "NuE" : ["22014", "22015"],
    "NuMu" : ["22011", "22012"],
    "NuTau" : ["22017", "22018"],   
}

In [14]:
hdf_path = "/data/user/tvaneede/GlobalFit/reco_processing/hdf/output/spice_tau_reco_ensemble/"

# Table header
print(f"{'Flavor':<6} | {'HESE':>10} | {'RecoETot > 60 TeV':>18}")
print("-" * 40)

for flavor in ["NuE", "NuMu", "NuTau"]:
    hdf_total = pd.HDFStore(f"{hdf_path}/{flavor}.h5", 'r')
    
    total_count = len(hdf_total["I3MCWeightDict"])
    mask = hdf_total["RecoETot"].value > 60e3
    masked_count = len(hdf_total["I3MCWeightDict"][mask])
    
    print(f"{flavor:<6} | {total_count:>10} | {masked_count:>18}")
    
    hdf_total.close()

Flavor |       HESE |  RecoETot > 60 TeV
----------------------------------------
NuE    |     131712 |             123581
NuMu   |      93963 |              89092
NuTau  |     116876 |             112284


Now lets do the ftp

In [15]:
flavor_datasets_ftp = {
    "NuE" : ["22612", "22613", "22663", "22664"],
    "NuMu" : ["22644", "22645", "22670", "22671"],
    "NuTau" : ["22634", "22635", "22667", "22666"],
}

In [27]:
hdf_path = "/data/user/tvaneede/GlobalFit/reco_processing/hdf/output/ftp_l3casc/"

# Table header
print(f"{'Flavor':<6} | {'Total':>10} | {'HESE':>18}")
print("-" * 40)

for flavor in ["NuE", "NuMu", "NuTau"]:
    hdf_total = pd.HDFStore(f"{hdf_path}/{flavor}.h5", 'r')
    
    total_count = len(hdf_total["I3MCWeightDict"])
    mask = hdf_total["HESE_CausalQTot"].value > 6000
    masked_count = len(hdf_total["HESE_CausalQTot"][mask])

    print(f"{flavor:<6} | {total_count:>10} | {masked_count:>18}")
    
    hdf_total.close()

Flavor |      Total |               HESE
----------------------------------------
NuE    |    2125204 |             244095
NuMu   |    1961223 |             112840
NuTau  |    2009543 |             212300


In [28]:
hdf_path = "/data/user/tvaneede/GlobalFit/reco_processing/hdf/output/ftp_l3casc_ensemble/"

# Table header
print(f"{'Flavor':<6} | {'Total':>10} | {'HESE':>18}")
print("-" * 40)

for flavor in ["NuE", "NuMu", "NuTau"]:
    hdf_total = pd.HDFStore(f"{hdf_path}/{flavor}.h5", 'r')
    
    total_count = len(hdf_total["I3MCWeightDict"])
    mask = hdf_total["HESE_CausalQTot"].value > 6000
    masked_count = len(hdf_total["HESE_CausalQTot"][mask])
    
    print(f"{flavor:<6} | {total_count:>10} | {masked_count:>18}")
    
    hdf_total.close()

Flavor |      Total |               HESE
----------------------------------------
NuE    |    1281899 |             154383
NuMu   |    1192064 |              71187
NuTau  |    1115825 |             120820
