# Energy Reconstruction Using CNN

## General Notes:
This notebook is designed for regression convolutional neural networks used to reconstruct the energy of a cosmic ray primary. It assumes all input is normalized as a best-practice.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import os
from glob import glob
from keras.models import load_model
from data_tools import load_preprocessed, dataPrep, getCut

## Model Selection

In [None]:
# Edit this file path to the models folder containing .h5 and .npy files for each model.
modelPrefix = os.getcwd()+'\\models'

# Edit this file path to the folder containing the simulationg data.
simPrefix = os.getcwd()+'\\simdata'

# Option to change font size for all labels within this notebook
labelParams = {'fontsize':16}

In [None]:
# List of available models

modelList = sorted(glob('%s\\*.h5' % modelPrefix))
modelList = [os.path.basename(m)[:-3] for m in modelList]

paramList = sorted(glob('%s\\*.npy' % modelPrefix))
paramList = [os.path.basename(p)[:-4] for p in paramList]

print('Available models:', sorted(set(modelList).intersection(paramList)))
print('\nModels without parameter files:', sorted(set(modelList).difference(paramList)))

In [None]:
# Keys you want to study
# Available models will appear above

keyList = ['', '']
# Example: keyList = ['test0', 'test1']

labels = {'':'', '':''}
# Example: labels = {'test0':'label0', 'test1':'label1'}

# Automatic intake of parameters from parameter files
p = {}
for key in keyList:
    d = np.load('%s\\%s.npy' % (modelPrefix, key), allow_pickle=True)
    p[key] = d.item()
    print(key, ':', p[key])

## Assessment Data

In [None]:
# Load data, x in four layers (q1, q2, t1, t2) and y as a dictionary with event-level parameters
x, y = load_preprocessed(simPrefix, 'assessment')

In [None]:
# Event-level parameters
# True values
theta, phi = y['dir'].transpose()
theta = np.pi - theta  # Define 0 degrees as overhead

## Notes

This notebook is designed to compare multiple models for beginners in machine learning

## Model Preperation

In [None]:
models = {}
recoE = {}

# Calculate reconstructed energies. This can take a bit, but should print out info on each key as it works
for key in keyList:

    # Comment these two lines if you want to rerun your energy reconstructions each time
    if key in models.keys():
        continue

    print('Working on %s...' % key)
    # Note: very sensitive to tensorflow/keras version.
    models[key] = load_model('%s/%s.h5' % (modelPrefix, key)) # Edit file path
    
    # Configure input data
    x_i = dataPrep(x, y, **p[key])
    comp = y['comp']
    energy = y['energy']  
    
    print('Model parameters')
    print(p[key])
    
    # Models should only output energy
    recoE[key] = models[key].predict([x_i[0], x_i[1]]).flatten()

## Plotting Parameters

In [None]:
ebins = np.linspace(5, 8, 181)
evalues = (ebins[:-1] + ebins[1:]) / 2

cutNames = ['No Cut', 'Quality Cut']
ncols, nrows = len(cutNames), len(keyList)

## Energy Reconstruction Plots

### Total Energy Resolution

In [None]:
histArgs = {'range':(-2,2), 'bins':121, 'histtype':'step', 'log':True, 'linewidth':4}
fig, axs = plt.subplots(figsize=(13*ncols, 8), ncols=ncols)

for i, cutName in enumerate(cutNames):
    ax = axs[i]
    for j, key in enumerate(keyList):
        cut = getCut(cutName, x, y, p, recoE, key)
        ax.hist((recoE[key][cut] - energy[cut]), label=labels[key], **histArgs)
    ax.set_title('Energy Resolution (%s)' % cutName, **labelParams)
    ax.set_xlabel(r'$\log_{10}(E_{\mathrm{reco}}/\mathrm{GeV}) - \log_{10}(E_{\mathrm{true}}/\mathrm{GeV})$', **labelParams)
    ax.set_ylabel('Counts', **labelParams)
    ax.legend()

In [None]:
# Plot energy resolution

histArgs = {'range':(-1,1), 'bins':121, 'histtype':'step', 'linewidth':4}
fig, axs = plt.subplots(figsize=(13*ncols, 8), ncols=ncols)

for i, cutName in enumerate(cutNames):
    ax = axs[i]
    for j, key in enumerate(keyList):
        cut = getCut(cutName, x, y, p, recoE, key)
        ax.hist((recoE[key][cut] - energy[cut]), label=labels[key], **histArgs)
    ax.set_title('Energy Resolution (%s)' % cutName, **labelParams)
    ax.set_xlabel(r'$\log_{10}(E_{\mathrm{reco}}/\mathrm{GeV}) - \log_{10}(E_{\mathrm{true}}/\mathrm{GeV})$', **labelParams)
    ax.set_ylabel('Counts', **labelParams)
    ax.legend()

In [None]:
# Summary parameters

for key in keyList:
    for i, cutName in enumerate(cutNames):
        cut = getCut(cutName, x, y, p, recoE, key)
        median, err_min, err_max = np.percentile(recoE[key][cut] - energy[cut], (50,16,84))
        print('Energy resolution for %s (%s): %.03f +%.03f %.03f' % (key, cutName, median, err_max, err_min))
    print()

### Two-Dimensional Visualization

In [None]:
np.seterr(divide = 'ignore')

fig, axs = plt.subplots(figsize=(13*ncols, 10*nrows), ncols=ncols, nrows=nrows, 
                        sharex=True, sharey=True)

for i, key in enumerate(keyList):
    for j, cutName in enumerate(cutNames):
        
        ax = axs[i, j] if len(keyList) > 1 else axs[j]
        cut = getCut(cutName, x, y, p, recoE, key)
        
        h, xedges, yedges = np.histogram2d(recoE[key][cut], energy[cut], bins=(ebins, ebins), 
                                           normed=False, weights=None)
        # Normalize
        ntot = np.sum(h, axis=0).astype(float)
        ntot[ntot==0] = 1.
        h /= ntot
        
        # Create contours
        contourValues = [0.025, 0.16, 0.84, 0.975]
        contourList = [[] for i in contourValues]
        for c, col in enumerate(h.transpose()):
            ccol = col.cumsum()
            for l, val in zip(contourList, contourValues):
                try: l += [np.where(ccol > val)[0][0]]
                except IndexError:
                    l += [0]
        for l in contourList:
            l.insert(0, l[0])
            if i >= len(contourList) / 2:
                l = [j+1 for j in l]     
        ax.plot(evalues, evalues, 'k', ls=':')
        for l in contourList:
            ax.step(ebins, ebins[l], color='red', linestyle='--')
        
        # Plot on a log scale
        extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]]
        im = ax.imshow(np.log10(h), extent=extent, origin='lower', interpolation='none', vmin=-3.5, vmax=-0.5)
        ax.set_title('%s (%s)' % (key, cutName), **labelParams)
        ax.set_xlabel(r'$\log_{10}(E_{\mathrm{true}}/\mathrm{GeV})$', **labelParams)
        ax.set_ylabel(r'$\log_{10}(E_{\mathrm{reco}}/\mathrm{GeV})$', **labelParams)
        fig.colorbar(im, ax=ax)