In [2]:
# import libraries
import os, sys, glob, pickle, tables

import numpy as np
import matplotlib.pyplot as plt

from pandas import DataFrame

# pip install simweights <--- run this once to install the package
# then replace <username> with your username and <version> with your Python version
sys.path.append('/home/<username>/.local/lib/python3.<version>/site-packages')
import simweights

In [3]:
PATHS = ['/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/p/12360_v1s/h5files/*.h5',
'/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/He/12630_v1s/h5files/*.h5',
'/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/O/12631_v1s/h5files/*.h5',
'/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/Fe/12362_v1s/h5files/*.h5']

In [4]:
def weighting(path):
    weighter = None
    filelist = glob.glob(path)

    for filename in filelist:
        file_obj = tables.open_file(filename, 'r')
    
        if weighter is None:
            weighter = simweights.IceTopWeighter(file_obj)
        else:
            weighter += simweights.IceTopWeighter(file_obj)
    return weighter

In [24]:
# Load in the SIBYLL2.1 particle sims
weighter = None

primary_energy = []
hits = []
particle_type = []

if os.path.isfile('pickles/energy.pkl'):
    with open('pickles/energy.pkl', 'rb') as file:
        primary_energy = pickle.load(file)
else:
    for path in PATHS:
        weighter = weighting(path)

if os.path.isfile('pickles/hits.pkl'):
    with open('pickles/hits.pkl', 'rb') as file:
        hits = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)
    
if os.path.isfile('pickles/type.pkl'):
    with open('pickles/type.pkl', 'rb') as file:
        particle_type = pickle.load(file)
elif not weighter==None:
    for path in PATHS:
        weighter = weighting(path)

In [6]:
# probably want to save these to pickle files
if not os.path.isfile('pickles/energy.pkl'):
    primary_energy = weighter.get_column('MCPrimary', 'energy')
if not os.path.isfile('pickles/type.pkl'):
    particle_type = weighter.get_column('MCPrimary', 'type')
if not os.path.isfile('pickles/hits.pkl'):
    hits = weighter.get_column('IceTopHLCSeedRTPulses_SnowUnAttenuated_info', 'nstrings')
weights = []
if not os.path.isfile('pickles/weights.pkl'):
    # (p, He, N, Al, Fe) version of this model.
    fluxH = simweights.GaisserH4a_IT()

    # Get the weights by passing the flux to the weighter
    weights = weighter.get_weights(fluxH)
else:
    with open('pickles/weights.pkl', 'rb') as file:
        weights = pickle.load(file)

In [7]:
# Dictionary for weight data (practice in using dictionaries)
CUTS = {
    'proton': particle_type==2.21200000e+03,
    'helium' : particle_type==1.00002004e+09,
    'oxygen': particle_type==1.00008016e+09,
    'iron': particle_type==1.00026056e+09,
    'true': np.tile(True, len(primary_energy))
}
Z = {
    'true': 0,
    'proton': 1,
    'helium': 2,
    'oxygen': 8,
    'iron': 26  
}
TIERS = {
    'Tier 1': (3<=hits)*(hits<5),
    'Tier 2': (5<=hits)*(hits<9),
    'Tier 3': (9<=hits)*(hits<16),
    'Tier 4': 16<=hits
}
COLORS = {
    'true' : 'black',
    'proton': 'red',
    'helium': 'orange',
    'oxygen': 'green',
    'iron': 'blue'
}
LABEL = {
    'true' : 'Data',
    'proton': 'P',
    'helium': 'He',
    'oxygen': 'O',
    'iron': 'Fe'
}
WEIGHTS = {
    'true' : {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'proton': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'helium': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'oxygen': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'iron': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)}
}
BINS = {
    'true' : {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'proton': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'helium': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'oxygen': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)},
    'iron': {'Tier1': np.arange(5, 8.5, .1), 'Tier 2': np.arange(5, 8.5, .1), 'Tier 3': np.arange(5, 8.5, .1), 'Tier 4': np.arange(5, 8.5, .1)}
}

In [8]:
def weighted_quantiles(values, weights, quantiles=0.5):
    i = np.argsort(values)
    c = np.cumsum(weights[i])
    return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]]

def weighted_percentile(data, weights, percentile):
    """
    Compute the weighted percentile of a dataset.
    
    Parameters:
    - data: array-like, the data values.
    - weights: array-like, same length as data.
    - percentile: float between 0 and 100.
    
    Returns:
    - The weighted percentile value.
    """
    data = np.asarray(data)
    weights = np.asarray(weights)
    
    # Sort data and weights by data
    sorted_indices = np.argsort(data)
    data_sorted = data[sorted_indices]
    weights_sorted = weights[sorted_indices]

    # Compute the cumulative sum of weights
    cumulative_weights = np.cumsum(weights_sorted)
    normalized_weights = cumulative_weights / cumulative_weights[-1]  # normalize to 1

    # Find where the normalized cumulative weight exceeds the desired percentile
    return np.interp(percentile / 100, normalized_weights, data_sorted)

In [None]:
# Graph Log10 Energy vs Log 10 weighted counts

fig, axs = plt.subplots(figsize = (52, 8), ncols=4)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    print(tier)
    print()
    
    for comp, comp_cut in zip(CUTS.keys(), CUTS.values()):
        WEIGHTS[comp][tier], BINS[comp][tier], patches1 = ax.hist(np.log10(primary_energy)[comp_cut * tier_cut], bins=50, weights=weights[comp_cut * tier_cut], log=True, label = LABEL[comp], histtype='step', color=COLORS[comp])
        if comp=='proton' or comp=='iron' or comp=='true':
            print(f'    {comp}')
            print(f'        Median: {round(weighted_quantiles(primary_energy[comp_cut * tier_cut]/1000000, weights[comp_cut * tier_cut]), 2)} PeV')
            print(f'        68%: {round(weighted_percentile(primary_energy[comp_cut * tier_cut]/1000000, weights[comp_cut * tier_cut], 16), 2)}-{round(weighted_percentile(primary_energy[comp_cut * tier_cut]/1000000, weights[comp_cut * tier_cut], 84), 2)} PeV')
            print()
    ax.set_xlabel('Energy')
    ax.set_ylabel('Counts')
    ax.set_title(f'Log10 Energy vs Log 10 Weighted Counts for {tier}')
    ax.legend()

In [None]:
### Particle Fraction vs. Energy
fig, axs = plt.subplots(figsize=(52, 8), ncols=4)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    for comp in CUTS:
        if not comp=='true':
            ax.plot(BINS[comp][tier][:-1], WEIGHTS[comp][tier]/len(primary_energy[tier_cut]), label=LABEL[comp])

    ax.set_xlabel('log10(Energy/GeV)')
    ax.set_ylabel('Fraction of Particles')
    ax.set_title(f'Particle Fraction vs. Energy for {tier}')

    ax.legend()

In [None]:
# Mean Atomic Charge

fig, axs = plt.subplots(figsize=(52, 8), ncols=4)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    totZ = 0
    
    for comp, comp_cut in zip(CUTS.keys(), CUTS.values()):
        if not comp=='true':
            totZ += WEIGHTS[comp][tier] * np.log(Z[comp])

    meanZ = totZ / len(primary_energy[tier_cut])

    ax.plot(BINS['true'][tier][:-1], meanZ, label = 'Mean Z')

    ax.set_xlabel('log10(Energy/GeV)')
    ax.set_ylabel('Mean log10(Z)')
    ax.set_title(f'Mean logZ vs. Energy for {tier}')

    ax.legend()

In [None]:
# Make a table of the overall percentage of particles
TABLE = {
    'Tier 1': {'proton': None, 'helium': None, 'oxygen': None, 'iron': None, 'ln(A)': None},
    'Tier 2': {'proton': None, 'helium': None, 'oxygen': None, 'iron': None, 'ln(A)': None},
    'Tier 3': {'proton': None, 'helium': None, 'oxygen': None, 'iron': None, 'ln(A)': None},
    'Tier 4': {'proton': None, 'helium': None, 'oxygen': None, 'iron': None, 'ln(A)': None}
}
for tier, tier_cut in zip(TIERS.keys(), TIERS.values()):
    #TABLE[tier][ln(A)] = np.log()

    for comp, comp_cut in zip(CUTS.keys(), CUTS.values()):
        if not comp=='true':
            TABLE[tier][comp] = round(len(primary_energy[comp_cut * tier_cut])/len(primary_energy[tier_cut]), 3)
print(DataFrame(TABLE))
#for tier in TIERS:
#    print(f"{tier}:")
#    print()
#    for cut in CUTS:
#        if not cut=='true':
#            combined_cuts = CUTS[cut] * TIERS[tier]
#            print(f"[{cut}, {round(len(primary_energy[combined_cuts])/len(primary_energy[TIERS[tier]]), 3)}]")
#            print()

In [None]:
fluxG = simweights.GlobalSplineFit_IT()

print(fluxG.pdgids)
print(fluxG.groups)