In [1]:
# import libraries
import os, sys, glob, pickle, tables

import numpy as np
import matplotlib.pyplot as plt

from pandas import DataFrame

# pip install simweights <--- run this once to install the package
# then replace <username> with your username and <version> with your Python version
sys.path.append('/home/<username>/.local/lib/python3.<version>/site-packages')
import simweights

In [2]:
# Paths to h5 files
PATHS = ['/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/p/12360_v1s/h5files/*.h5',
'/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/He/12630_v1s/h5files/*.h5',
'/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/O/12631_v1s/h5files/*.h5',
'/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/Fe/12362_v1s/h5files/*.h5']

In [3]:
# Define important functions
def weighting(path):
    weighter = None
    filelist = glob.glob(path)

    for filename in filelist:
        file_obj = tables.open_file(filename, 'r')
    
        if weighter is None:
            weighter = simweights.IceTopWeighter(file_obj)
        else:
            weighter += simweights.IceTopWeighter(file_obj)
    return weighter

def weighted_quantiles(values, weights, quantiles=0.5):
    i = np.argsort(values)
    c = np.cumsum(weights[i])
    return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]]

def weighted_percentile(data, weights, percentile):
    """
    Compute the weighted percentile of a dataset.
    
    Parameters:
    - data: array-like, the data values.
    - weights: array-like, same length as data.
    - percentile: float between 0 and 100.
    
    Returns:
    - The weighted percentile value.
    """
    data = np.asarray(data)
    weights = np.asarray(weights)
    
    # Sort data and weights by data
    sorted_indices = np.argsort(data)
    data_sorted = data[sorted_indices]
    weights_sorted = weights[sorted_indices]

    # Compute the cumulative sum of weights
    cumulative_weights = np.cumsum(weights_sorted)
    normalized_weights = cumulative_weights / cumulative_weights[-1]  # normalize to 1

    # Find where the normalized cumulative weight exceeds the desired percentile
    return np.interp(percentile / 100, normalized_weights, data_sorted) 

In [4]:
# Load in the SIBYLL2.1 particle sims
weighter = None

if os.path.isfile('pickles/energy.pkl'):
    with open('pickles/energy.pkl', 'rb') as file:
        primary_energy = pickle.load(file)
else:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/hits.pkl'):
    with open('pickles/hits.pkl', 'rb') as file:
        hits = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/particle_type.pkl'):
    with open('pickles/particle_type.pkl', 'rb') as file:
        particle_type = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/beta_cut_pass.pkl'):
    with open('pickles/beta_cut_pass.pkl', 'rb') as file:
        beta_cut = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/event.pkl'):
    with open('pickles/event.pkl', 'rb') as file:
        event = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/laputop_frac.pkl'):
    with open('pickles/laputop_frac.pkl', 'rb') as file:
        laputop_frac = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/max_inside.pkl'):
    with open('pickles/max_inside.pkl', 'rb') as file:
        max_inside = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/max_signal_6.pkl'):
    with open('pickles/max_signal_6.pkl', 'rb') as file:
        max_signal_6 = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/neighbour_max_signal_4.pkl'):
    with open('pickles/neighbour_max_signal_4.pkl', 'rb') as file:
        neighbour_max_signal_4 = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/reco_pass.pkl'):
    with open('pickles/reco_pass.pkl', 'rb') as file:
        reco_pass = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/runs.pkl'):
    with open('pickles/runs.pkl', 'rb') as file:
        runs = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/standard_filter.pkl'):
    with open('pickles/standard_filter.pkl', 'rb') as file:
        standard_filter = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/station_density_pass.pkl'):
    with open('pickles/station_density_pass.pkl', 'rb') as file:
        station_density_pass = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/sub_event_stream.pkl'):
    with open('pickles/sub_event_stream.pkl', 'rb') as file:
        sub_event_stream = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/sub_event.pkl'):
    with open('pickles/sub_event.pkl', 'rb') as file:
        sub_event = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/Hweights.pkl'):
    with open('pickles/Hweights.pkl', 'rb') as file:
        Hweights = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)
                
if os.path.isfile('pickles/Gweights.pkl'):
    with open('pickles/Gweights.pkl', 'rb') as file:
        sub_event = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

In [None]:
# Define quality cuts
quality_cut = max_inside * station_density_pass * standard_filter * reco_pass * neighbour_max_signal_4 * max_signal_6 * laputop_frac * beta_cut
quality_cut = quality_cut.astype(bool)

In [6]:
# If there are missing pkl files, load them with weighter
if not os.path.isfile('pickles/energy.pkl'):
    primary_energy = weighter.get_column('MCPrimary', 'energy')
if not os.path.isfile('pickles/particle_type.pkl'):
    particle_type = weighter.get_column('MCPrimary', 'type')
if not os.path.isfile('pickles/hits.pkl'):
    hits = weighter.get_column('IceTopHLCSeedRTPulses_SnowUnAttenuated_info', 'nstrings')
    
if not os.path.isfile('pickles/Hweights.pkl'):
    # (p, He, N, Al, Fe) version of this model.
    fluxH = simweights.GaisserH4a_IT()

    # Get the weights by passing the flux to the weighter
    Hweights = weighter.get_weights(fluxH)

In [7]:
# Dictionary set-ups
CUTS = {
    'proton': particle_type==2.21200000e+03,
    'helium' : particle_type==1.00002004e+09,
    'oxygen': particle_type==1.00008016e+09,
    'iron': particle_type==1.00026056e+09,
    'true': np.tile(True, len(primary_energy))
}
A = {
    'proton': 1.0073,
    'helium': 4.0026,
    'oxygen': 16,
    'iron': 55.845  
}
Z = {
    'true': 0,
    'proton': 1,
    'helium': 2,
    'oxygen': 8,
    'iron': 26  
}
'''
Tier 1: 2011-14 is 3≤n<5
Tier 2: 2011 is 5≤n<10; 2012-2013 is 5≤n<9; 2014 is 5≤n<8
Tier 3: 2011 is 10≤n<14, from 2012-13 9≤n<16; 2014-15 is 8≤n<15; 
Tier 4: 2011 is 14≤n, 2012-13 16≤n; 2014-15 is 15≤n; 2016-17 is 14≤n; 2018-19 is 13≤n; 2020-21 is 12≤n
'''
TIERS = {
    'Tier 3': (8<=hits)*(hits<15),
    'Tier 4': 15<=hits
}
COLORS = {
    'true' : 'black',
    'proton': 'red',
    'helium': 'orange',
    'oxygen': 'purple',
    'iron': 'blue'
}
LABEL = {
    'true' : 'Data',
    'proton': 'P',
    'helium': 'He',
    'oxygen': 'O',
    'iron': 'Fe'
}
WEIGHTS = {
    'true' : {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'proton': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'helium': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'oxygen': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'iron': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)}
}
BINS = {
    'true' : {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'proton': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'helium': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'oxygen': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'iron': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)}
}

In [None]:
# Graph Log10 Energy vs Log 10 weighted counts
ncols = len(TIERS)

fig, axs = plt.subplots(figsize = (13 * ncols, 8), ncols=ncols)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    print(tier)
    print()
    
    for comp, comp_cut in zip(CUTS.keys(), CUTS.values()):
        combined_cut = comp_cut * tier_cut * quality_cut
        WEIGHTS[comp][tier], BINS[comp][tier], patches1 = ax.hist(np.log10(primary_energy)[combined_cut], bins=50, weights=Hweights[combined_cut], log=True, label = LABEL[comp], histtype='step', color=COLORS[comp])
        
        if comp=='proton' or comp=='iron' or comp=='true':
            print(f'    {comp}')
            print(f'        Median: {round(weighted_quantiles(primary_energy[combined_cut]/1000000, Hweights[combined_cut]), 2)} PeV')
            print(f'        68%: {round(weighted_percentile(primary_energy[combined_cut]/1000000, Hweights[combined_cut], 16), 2)}-{round(weighted_percentile(primary_energy[combined_cut]/1000000, Hweights[combined_cut], 84), 2)} PeV')
            print()

    ax.set_xlabel('Energy')
    ax.set_ylabel('Counts')
    ax.set_title(f'Log10 Energy vs Log 10 Weighted Counts for {tier}')
    ax.legend()

In [None]:
# Mean Atomic Charge

fig, axs = plt.subplots(figsize=(13 * ncols, 8), ncols=ncols)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    totZ = 0
    totH = 0
        
    for comp, comp_cut in zip(CUTS.keys(), CUTS.values()):
        if not comp=='true':
            totZ += WEIGHTS[comp][tier] * np.log(Z[comp])
            totH += WEIGHTS[comp][tier]
            
    meanZ = totZ / totH

    ax.plot(BINS['true'][tier][:-1], meanZ, label = 'Mean Z')

    ax.set_xlabel('log10(Energy/GeV)')
    ax.set_ylabel('Mean log10(Z)')
    ax.set_title(f'Mean logZ vs. Energy for {tier}')

    ax.legend()

In [None]:
# Particle Fraction vs. Energy
fig, axs = plt.subplots(figsize=(13 * ncols, 8), ncols=ncols)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    for comp in CUTS:
        if not comp=='true':
            ax.plot(BINS[comp][tier][:-1], WEIGHTS[comp][tier]/totH, label=LABEL[comp])

    ax.set_xlabel('log10(Energy/GeV)')
    ax.set_ylabel('Fraction of Particles')
    ax.set_title(f'Particle Fraction vs. Energy for {tier}')

    ax.legend()

In [None]:
# Table of the overall percentage of particles, ln(A), and average rigidity
PCOMP = {
    'Tier 3': {'proton': None, 'helium': None, 'oxygen': None, 'iron': None},
    'Tier 4': {'proton': None, 'helium': None, 'oxygen': None, 'iron': None}
}
LNA = {
    'Tier 3': {'ln(A)': None},
    'Tier 4': {'ln(A)': None}
}
AVG_RIGIDITY = {
    'Tier 3': {'proton': None, 'helium': None, 'oxygen': None, 'iron': None},
    'Tier 4': {'proton': None, 'helium': None, 'oxygen': None, 'iron': None},
}
for tier, tier_cut in zip(TIERS.keys(), TIERS.values()):
    for comp, comp_cut in zip(CUTS.keys(), CUTS.values()):
        if not comp=='true':
            combined_cut = comp_cut * tier_cut * quality_cut
                
            PCOMP[tier][comp] = round(len(primary_energy[combined_cut])/len(primary_energy[tier_cut * quality_cut]), 3)
            AVG_RIGIDITY[tier][comp] = np.log10(np.average(primary_energy[combined_cut]/Z[comp]))

    LNA[tier]['ln(A)'] = np.average(np.log(particle_type[tier_cut * quality_cut]))

print('Partial Comp:')
print()
print(DataFrame(PCOMP))
print()
print('ln(A):')
print(DataFrame(LNA))
print()
print('log10(Avg. Rigidity):')
print()
print(DataFrame(AVG_RIGIDITY))

In [None]:
fluxG = simweights.GlobalSplineFit_IT()

print(fluxG.pdgids)
print(fluxG.groups)