# IceTop Rigidity Analysis

Before running this notebook, please make sure you have the following .npy files in the appropriate folders:

- energy.npy

- Gweights.npy

- nstrings.npy

- Hweights.npy

- particle_type.npy

- IceTop_reco_succeeded.npy

- zenith.npy

If not, please run save_data.py for the year and interaction model you would like to analyze.

# Table of Contents
##### 1. [Set-up](#set-up)
##### 2. [Rate vs. Energy](#weighted-rate-vs-energy)
##### 3. [Mean Atomic Charge](#mean-atomic-charge)
##### 4. [Fractional Composition vs. Energy](#fractional-compositions-vs-energy)
##### 5. [Table of fractional comp, ln(A), <log(R)>](#table-of-percentage-of-particles--and-mean--of-rigidity)
##### 6. [Rate vs. Energy](#weighted-rate-vs-rigidity)
##### 7. [Energy vs. Rigidity](#energy-vs-rigidity)

In [1]:
# Import libraries
import os

import numpy as np
import matplotlib.pyplot as plt

from pandas import DataFrame

In [2]:
# Control Panel: Set the nbins for each graph, the year and the simulation you want to analyze
nbins = 50
year = 2015
simulation = 'SIBYLL2.1'

## Set-Up

In [3]:
# Define a function for weighted means or weighted quantiles
def weighted_quantiles(values, weights, quantiles=0.5):
    i = np.argsort(values)
    c = np.cumsum(weights[i])
    return values[i[np.searchsorted(c, np.array(quantiles) * c[-1])]]

In [4]:
# Load in the SIBYLL2.1 particle sims
KEYS = ['energy', 'type', 'zenith', 'IceTop_reco_succeeded', 'nstrings', 'Hweights', 'Gweights']

for key in KEYS:
    with open(f'saved_data/{year}/{simulation}/{key}.npy', 'rb') as file:
        globals()[key] = np.load(file)

In [5]:
# Dictionary set-ups
# Set up composition-dependent dictionaries based on year and simulation (some only have Fe and p)
if year == 2012 and (simulation == 'EPOS-LHC' or simulation == 'SIBYLL2.3'):
    # Cuts for different compositions
    CUTS = {
        'proton': type==2.21200000e+03,
        'iron': type==1.00026056e+09,
        'true': np.tile(True, len(energy))
    }

    # Graph colors by composition
    COLORS = {
        'true' : 'black',
        'proton': 'red',
        'iron': 'blue'
    }

    # Graph labels by composition
    LABEL = {
        'true' : 'Data',
        'proton': 'p',
        'iron': 'Fe'
    }
else:
    # Cuts for different compositions
    CUTS = {
        'proton': type==2.21200000e+03,
        'helium' : type==1.00002004e+09,
        'oxygen': type==1.00008016e+09,
        'iron': type==1.00026056e+09,
        'true': np.tile(True, len(energy))
    }

    # Graph colors by composition
    COLORS = {
        'true' : 'black',
        'proton': 'red',
        'helium': 'orange',
        'oxygen': 'purple',
        'iron': 'blue'
    }

    # Graph labels by composition
    LABEL = {
        'true' : 'Data',
        'proton': 'p',
        'helium': 'He',
        'oxygen': 'O',
        'iron': 'Fe'
    }

# amu of different compositions
A = {
    'proton': 1.0073,
    'helium': 4.0026,
    'oxygen': 16,
    'iron': 55.845  
}

# Number of protons for different compositions
Z = {
    'proton': 1,
    'helium': 2,
    'oxygen': 8,
    'iron': 26  
}

# IceTop Tiers and quality cuts - quality cuts may not work well with some years and Tiers (e.g. reco_succeeded cancels out Tier 4 for 2012)
quality_cuts = np.logical_and(zenith < np.radians(55), IceTop_reco_succeeded==1)

if year == 2011:
    TIERS = {
        'Tier 1': np.logical_and(3<=nstrings, nstrings<5, quality_cuts),
        'Tier 2': np.logical_and(5<=nstrings, nstrings<10, quality_cuts),
        'Tier 3': np.logical_and(10<=nstrings, nstrings<14, quality_cuts),
        'Tier 4': np.logical_and(14<=nstrings, quality_cuts)
    }
elif year == 2012:
    TIERS = {
        'Tier 1': np.logical_and(3<=nstrings, nstrings<5, quality_cuts),
        'Tier 2': np.logical_and(5<=nstrings, nstrings<9, quality_cuts),
        'Tier 3': np.logical_and(9<=nstrings, nstrings<16, quality_cuts),
        'Tier 4': np.logical_and(16<=nstrings, zenith < np.radians(55))
    }
elif year == 2013:
    TIERS = {
        'Tier 1': np.logical_and(3<=nstrings, nstrings<5, quality_cuts),
        'Tier 2': np.logical_and(5<=nstrings, nstrings<9, quality_cuts),
        'Tier 3': np.logical_and(9<=nstrings, nstrings<16, quality_cuts),
        'Tier 4': np.logical_and(16<=nstrings, quality_cuts)
    }
elif year == 2014:
    TIERS = {
        'Tier 1': np.logical_and(3<=nstrings, nstrings<5, quality_cuts),
        'Tier 2': np.logical_and(5<=nstrings, nstrings<8, quality_cuts),
        'Tier 3': np.logical_and(8<=nstrings, nstrings<15, quality_cuts),
        'Tier 4': np.logical_and(15<=nstrings, quality_cuts)
    }
elif year == 2015:
    TIERS = {
        'Tier 3': np.logical_and(8<=nstrings, nstrings<15, quality_cuts),
        'Tier 4': np.logical_and(15<=nstrings, zenith < np.radians(55))
    }
elif year == 2016 or year == 2017:
    TIERS = {
        'Tier 3': np.logical_and(7<=nstrings, nstrings<14, quality_cuts),
        'Tier 4': np.logical_and(14<=nstrings, quality_cuts)
    }
elif year == 2018 or year == 2019:
    TIERS = {
        'Tier 3': np.logical_and(6<=nstrings, nstrings<13, quality_cuts),
        'Tier 4': np.logical_and(13<=nstrings, quality_cuts)
    }
elif year == 2020 or year == 2021:
    TIERS = {
        'Tier 3': np.logical_and(5<=nstrings, nstrings<12, quality_cuts),
        'Tier 4': np.logical_and(12<=nstrings, quality_cuts)
    }


# Weights
SIMS = {
    'H4a': Hweights,
    'GSF': Gweights
}

# Flux from histogram
FLUX = {
    'true'  : {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},},
    'proton': {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},},
    'helium': {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},},
    'oxygen': {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},},
    'iron'  : {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},}
}

# Histogram bins
BINS = {
    'true'  : {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},},
    'proton': {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},},
    'helium': {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},},
    'oxygen': {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},},
    'iron'  : {'Tier 1': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 2': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 3': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},
               'Tier 4': {'H4a': np.arange(5, 8.5, .1), 'GSF': np.arange(5, 8.5, .1)},}
}

# Linestyle by weights
LINESTYLE = {
    'H4a': None,
    'GSF': '--'
}

# Dictionary for total flux across compositions for each Tier and weight, and for total flux times log(Z) across compositions for each Tier and weight
TOTALS = {
    'numerator'  : {'Tier 1': {'H4a': np.zeros(nbins), 'GSF': np.zeros(nbins)},
                    'Tier 2': {'H4a': np.zeros(nbins), 'GSF': np.zeros(nbins)},
                    'Tier 3': {'H4a': np.zeros(nbins), 'GSF': np.zeros(nbins)},
                    'Tier 4': {'H4a': np.zeros(nbins), 'GSF': np.zeros(nbins)}},
    'denominator': {'Tier 1': {'H4a': np.zeros(nbins), 'GSF': np.zeros(nbins)},
                    'Tier 2': {'H4a': np.zeros(nbins), 'GSF': np.zeros(nbins)},
                    'Tier 3': {'H4a': np.zeros(nbins), 'GSF': np.zeros(nbins)},
                    'Tier 4': {'H4a': np.zeros(nbins), 'GSF': np.zeros(nbins)}}
}

# Table for partial compositions, ln(A), and mean log of rigidity, energy, and Z for each Tier and weight
if year < 2015:
    TABLE = {
        'H4a': {'Tier 1': {},
                'Tier 2': {},
                'Tier 3': {},
                'Tier 4': {}},
        'GSF': {'Tier 1': {},
                'Tier 2': {},
                'Tier 3': {},
                'Tier 4': {}}
    }
else:
    TABLE = {
            'H4a': {'Tier 3': {},
                    'Tier 4': {}},
            'GSF': {'Tier 3': {},
                    'Tier 4': {}}
    }
        

In [6]:
# Calculate log10(rigidity)
rigidity = np.log10(energy) - 3

for comp, comp_cut in CUTS.items():
    if not comp == 'true':
        rigidity[comp_cut] -= np.log10(Z[comp])

# Weighted rate vs. energy

In [None]:
# Prepare the graphs
ncols = len(TIERS)
fig, axs = plt.subplots(figsize=(13 * ncols, 8), ncols=ncols)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    # Print Tier for table
    print(tier)
    print('--------------------------------------------------')
    
    for weight_name, weights in SIMS.items():
        # Print weights for table
        print(weight_name)

        for comp, comp_cut in CUTS.items():
            # Combine cuts
            combined_cut = np.logical_and(comp_cut, tier_cut)

            # Graph primary energy
            FLUX[comp][tier][weight_name], BINS[comp][tier][weight_name], patches1 = ax.hist(np.log10(energy[combined_cut]), weights=weights[combined_cut], bins=nbins, linestyle=LINESTYLE[weight_name], log=True, label=f'{LABEL[comp]} ({weight_name})', histtype='step', color=COLORS[comp])

            # Print median energy and its first sigma
            if comp=='proton' or comp=='iron' or comp=='true':
                print(f'    {comp}')
                print(f'        Median: {round(weighted_quantiles(energy[combined_cut] / 1000000, weights[combined_cut]), 2)} TeV')
                print(f'        68%: {round(weighted_quantiles(energy[combined_cut] / 1000000, weights[combined_cut], quantiles=.16), 2)}-{round(weighted_quantiles(energy[combined_cut] / 1000000, Hweights[combined_cut], .84), 2)} TeV')
                print()

    # Label axes and add legend and title
    ax.set_xlabel('$\log_{{10}}$(Energy) (GeV)')
    ax.set_ylabel('$\log_{{10}}$(Rates) (Hz)')
    ax.set_title(f'{year} {simulation} Energy vs. Weighted Counts for {tier}')
    ax.legend()

## Mean Atomic Charge

In [None]:
fig, axs = plt.subplots(figsize=(13 * ncols, 8), ncols=ncols)

for tier, ax in zip(TIERS, axs):    
    for weight_name in SIMS:
        for comp in CUTS:
            if not comp=='true':
                # Sum flux at for each comp, Tier and weight times the number of protons for the comp
                TOTALS['numerator'][tier][weight_name] += FLUX[comp][tier][weight_name] * np.log10(Z[comp])

                # Sum flux at for each comp, Tier and weight
                TOTALS['denominator'][tier][weight_name] += FLUX[comp][tier][weight_name]

        # Calculate <Z>
        meanZ = TOTALS['numerator'][tier][weight_name] / TOTALS['denominator'][tier][weight_name]

        # Plot <Z> vs. energy
        ax.plot(BINS['true'][tier][weight_name][:-1], meanZ, label=weight_name)

    ax.set_xlabel('$\log_{{10}}$(Energy/GeV)')
    ax.set_ylabel('Mean $\log_{{10}}(Z)$')
    ax.set_title(f'{year} {simulation} Mean $\log_{{10}}(Z)$ vs. Energy for {tier}')

    ax.legend()

## Fractional Compositions vs. Energy

In [None]:
# Particle Fraction vs. Energy
fig, axs = plt.subplots(figsize=(13 * ncols, 8), ncols=ncols)

for tier, ax in zip(TIERS, axs):
    for weight_name in SIMS:
        for comp in CUTS:
            # Plot fractional composition
            if not comp=='true':
                ax.plot(BINS[comp][tier][weight_name][:-1], FLUX[comp][tier][weight_name] / TOTALS['denominator'][tier][weight_name],
                label=f'{LABEL[comp]} ({weight_name})', linestyle=LINESTYLE[weight_name], color=COLORS[comp])

    ax.set_xlabel('$log_{{10}}$(Energy/GeV)')
    ax.set_ylabel('Fraction of Particles')
    ax.set_title(f'{year} {simulation} Particle Fraction vs. Energy for {tier}')

    ax.legend()

## Table of fractional comp, ln(A), and <log_10> of rigidity

In [None]:
print(f'{year} {simulation}')
print()

for weight_name, weights in SIMS.items():
    # Print weight name for data table
    print(weight_name)
    print()

    for tier, tier_cut in TIERS.items():
        # Start ln_A at zero
        ln_A = 0

        for comp in CUTS:
            if not comp=='true':
                # Save the partial compositions to the table
                TABLE[weight_name][tier][f'{comp} part'] = round(np.mean(FLUX[comp][tier][weight_name]) / np.mean(TOTALS['denominator'][tier][weight_name]), 3)

                # Calculate the mean ln(A) with partial compositions
                ln_A += TABLE[weight_name][tier][f'{comp} part'] * np.log(A[comp])

        # Save ln(A) to the table
        TABLE[weight_name][tier]['ln(A)'] = round(ln_A, 3)
        
        # Calculate mean log E (Scaled to TeV), mean log Z, and finally mean log rigidity
        TABLE[weight_name][tier]['<log(E)>'] = round(np.average(np.log10(energy)[tier_cut] - 3, weights=weights[tier_cut]), 3)
        TABLE[weight_name][tier]['<log(Z)>'] = round(np.mean(TOTALS['numerator'][tier][weight_name]) / np.mean(TOTALS['denominator'][tier][weight_name]), 3)
        TABLE[weight_name][tier]['median(log(R))'] = round(weighted_quantiles(rigidity[tier_cut], weights=weights[tier_cut]), 3)
        median_R = TABLE[weight_name][tier]['median(log(R))']

    # Print out the table for that weight
    print(DataFrame(TABLE[weight_name]))
    print()

In [None]:
# Alternate median(log(R)) caluclation
COLS = {
    'H4a': 'blue',
    'GSF': 'orange'
}
fig, axs = plt.subplots(figsize=(13, 8), ncols=1)

for tier, tier_cut in TIERS.items():
    print(tier)
    print()

    for weight_name, weihgts in SIMS.items():
        print(f'    {weight_name}')
        print(f'        Mean: {round(np.average(rigidity[tier_cut], weights=weights[tier_cut]), 3)}')
        print(f'        Median: {round(weighted_quantiles(rigidity[tier_cut], weights=weights[tier_cut]), 3)}')
        print()
        
        axs.scatter(tier, np.average(rigidity[tier_cut], weights=weights[tier_cut]), color=COLS[weight_name], label=f'Mean ({weight_name})', antialiased=True)
        axs.scatter(tier, weighted_quantiles(rigidity[tier_cut], weights=weights[tier_cut]), color=[0, 0, 0, 0], label=f'Median ({weight_name})', edgecolors=COLS[weight_name], antialiased=True)
        
axs.set_xlabel('Tier')
axs.set_ylabel('Median Rigidity (TV)')
axs.legend()

## Weighted rate vs rigidity

In [None]:
fig, axs = plt.subplots(figsize=(13 * ncols, 8), ncols=ncols)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    rate = np.zeros(nbins)
    for weight_name, weights in SIMS.items():
        for comp, comp_cut in CUTS.items():
            # Graph rate vs. rigidity
            combined_cut = np.logical_and(tier_cut, comp_cut)
            if not comp=='true':
                rate = ax.hist(rigidity[combined_cut], weights=weights[combined_cut], bins=nbins, color=COLORS[comp], log=True, label=f'{comp} ({weight_name})', histtype='step', linestyle=LINESTYLE[weight_name])
                ax.scatter(weighted_quantiles(rigidity[combined_cut], weights=weights[combined_cut]), np.median(rate[0]), label=f'Median {comp} ({weight_name})', c=COLORS[comp])

    ax.set_xlabel('$\log_{{10}}$(Rigidity) (TV)')
    ax.set_ylabel('$\log_{{10}}$(Rate) (Hz)')
    ax.set_title(f'{year} {simulation} Rate vs. Rigidity for {tier}')
    ax.legend()

## Energy vs. rigidity

In [None]:
fig, axs = plt.subplots(figsize=(13 * ncols, 8), ncols=ncols)

for tier, tier_cut, ax in zip(TIERS.keys(), TIERS.values(), axs):
    for weight_name, weights in SIMS.items():
        for comp, comp_cut in CUTS.items():
            if not comp=='true':
                rate = np.zeros(nbins)

                combined_cut = np.logical_and(comp_cut, tier_cut)
                # Graph rate vs. rigidity
                rate = ax.plot(np.log10(energy[combined_cut]), rigidity[combined_cut], linestyle=LINESTYLE[weight_name],
                        label=f'{LABEL[comp]} ({weight_name})', color=COLORS[comp])
    
    ax.set_xlabel('$\log_{{10}}$(Rigidity) (TV)')
    ax.set_ylabel('$\log_{{10}}$(Energy) (GeV)')
    ax.set_title(f'{year} {simulation} Energy vs. Rigidity')
    ax.legend()