# Cosmic Ray Energy Reconstruction Using Convolutional Neural Network
## Utilizing Charge, Time, and Zenith

In [None]:
from csv import writer
from data_tools import data_prep, get_reco_nan_filter, load_preprocessed
from keras.callbacks import CSVLogger, EarlyStopping
from keras.layers import BatchNormalization, Concatenate, Conv2D, Dense, Flatten, Input, MaxPooling2D
from keras.models import Model
import numpy as np
import os

## Model Design

In [None]:
### Baseline data prep ###

# Edit these parameters
prep = {'clc':True, 'sta5':False, 'q':None, 't':None, 't_shift':True, 't_clip':0, 'normed':True, 'reco':None, 'cosz':False}

# Set the number of models to train under this CNN
num_models_to_train = 1

# Name for model(s)
model_name = ''

# Set the number of epochs the model(s) should run for
# Actual result may differ due to early stopping
num_epochs = 100

# Loss metric to use for training
# Suggestion to experiment with 'huber_loss', especially for MASTER'S THESIS model
loss_function = 'mean_squared_error'

# Optimizer to user for training
optimizer = 'adam'

# Other loss metrics to analyze while training
# Only for user to monitor - have no effect on model training
metrics = ['mae','mse']

# File directory to folder that holds models
model_prefix = os.getcwd()+'/models'

# File directory to folder that holds simulation data 
sim_prefix = os.getcwd()+'/simdata'

# Booleans for easier to read conditionals - no need to change this
has_reco, has_time = prep['reco'] != None, prep['t'] != False

In [None]:
# Load simulation data from files for training
x, y = load_preprocessed(sim_prefix, 'train')

# Prepare simulation data
x_i, idx = data_prep(x, y, **prep)

In [None]:
### Filter NaNs from reconstruction data ###
if has_reco:
    reco_nan_filter = get_reco_nan_filter(prep['reco'], y)

    for i, _ in enumerate(x_i):
        x_i[i] = x_i[i][reco_nan_filter]
        
    for key in y.keys():
        y[key] = y[key][reco_nan_filter]
    
    nan_loss = (len(reco_nan_filter)-sum(reco_nan_filter)) / len(reco_nan_filter) * 100
    print('Percentage of events with a NaN in reconstruction: %.02f' % nan_loss)

## Creating the Convolutional Neural Network

In [None]:
### \/ MASTER'S THESIS \/ ###
"""
# Charge is always included as a parameter in the model - highest correlation with energy of all inputs
# Charge input layer
charge_input = Input(shape=(10,10,idx), name='charge')
# Starts off with two Convolutional layers, each one has double the neurons of the previous
q_conv1_layer = BatchNormalization()(Conv2D(64, kernel_size=3, padding='same', activation='relu')(charge_input))
q_conv2_layer = BatchNormalization()(Conv2D(128, kernel_size=3, padding='same', activation='relu')(q_conv1_layer))
# A Maxpooling layer is applied after two Convolutional layers
q_maxpooling1_layer = MaxPooling2D(pool_size=2, strides=1, padding='same')(q_conv2_layer)
# Continues with two more Convolutional layers, each one has double the neurons of the previous
q_conv3_layer = BatchNormalization()(Conv2D(256, kernel_size=3, padding='same', activation='relu')(q_maxpooling1_layer))
q_conv4_layer = BatchNormalization()(Conv2D(512, kernel_size=3, padding='same', activation='relu')(q_conv3_layer))
# A second Maxpooling layer is applied after two additional Convolutional layers
q_maxpooling2_layer = MaxPooling2D(pool_size=2, strides=1, padding='same')(q_conv4_layer)
# Continues with one more Convolutional layer with again double the neurons
q_conv5_layer = BatchNormalization()(Conv2D(1024, kernel_size=3, padding='same', activation='relu')(q_maxpooling2_layer))
# A third maxpooling layer is applied after the final Convolutional layer
q_maxpooling3_layer = MaxPooling2D(pool_size=2, strides=1, padding='same')(q_conv5_layer)
# Layer is Flattened before Concatenated
q_flat_layer = Flatten()(q_maxpooling3_layer)

# Time has been found to provide information comparable to Zenith when given to more advanced CNNs
if has_time: # Whether time has been included as a parameter in the model
    # Time input layer 
    time_input = Input(shape=(10,10,x_i[0].shape[-1]-idx), name='time')
    # Starts off with two Convolutional layers, each one has double the neurons of the previous
    t_conv1_layer = BatchNormalization()(Conv2D(64, kernel_size=3, padding='same', activation='relu')(time_input))
    t_conv2_layer = BatchNormalization()(Conv2D(128, kernel_size=3, padding='same', activation='relu')(t_conv1_layer))
    # A Maxpooling layer is applied after two Convolutional layers
    t_maxpooling1_layer = MaxPooling2D(pool_size=2, strides=1, padding='same')(t_conv2_layer)
    # Continues with two more Convolutional layers, each one has double the neurons of the previous
    t_conv3_layer = BatchNormalization()(Conv2D(256, kernel_size=3, padding='same', activation='relu')(t_maxpooling1_layer))
    t_conv4_layer = BatchNormalization()(Conv2D(512, kernel_size=3, padding='same', activation='relu')(t_conv3_layer))
    # A second Maxpooling layer is applied after two additional Convolutional layers
    t_maxpooling2_layer = MaxPooling2D(pool_size=2, strides=1, padding='same')(t_conv4_layer)
    # Continues with one more Convolutional layer with again double the neurons
    t_conv5_layer = BatchNormalization()(Conv2D(1024, kernel_size=3, padding='same', activation='relu')(t_maxpooling2_layer))
    # A third maxpooling layer is applied after the final Convolutional layer
    t_maxpooling3_layer = MaxPooling2D(pool_size=2, strides=1, padding='same')(t_conv5_layer)
    # Layer is Flattened before Concatenated
    t_flat_layer = Flatten()(t_maxpooling3_layer)
"""
### \/ BASELINE \/ ###
# Charge is always included as a parameter in the model - highest correlation with energy of all inputs
# Charge input layer
charge_input = Input(shape=(10,10,idx), name='charge')
# Starts off with three Convolutional layers, each one has half the neurons of the previous
q_conv1_layer = Conv2D(64, kernel_size=3, padding='same', activation='relu')(charge_input)
q_conv2_layer = Conv2D(32, kernel_size=3, padding='same', activation='relu')(q_conv1_layer)
q_conv3_layer = Conv2D(16, kernel_size=3, padding='same', activation='relu')(q_conv2_layer)
# Layer is Flattened before Concatenated
q_flat_layer = Flatten()(q_conv3_layer)

# Time has been found to provide information comparable to Zenith when given to more advanced CNNs
if has_time: # Whether time has been included as a parameter in the model
    # Time input layer 
    time_input = Input(shape=(10,10,x_i[0].shape[-1]-idx), name='time')
    # Starts off with three Convolutional layers, each one has half the neurons of the previous
    t_conv1_layer = Conv2D(64, kernel_size=3, padding='same', activation='relu')(time_input)
    t_conv2_layer = Conv2D(32, kernel_size=3, padding='same', activation='relu')(t_conv1_layer)
    t_conv3_layer = Conv2D(16, kernel_size=3, padding='same', activation='relu')(t_conv2_layer)
    # Layer is Flattened before Concatenated
    t_flat_layer = Flatten()(t_conv3_layer)



### \/ BOTH \/ ###
# Including Zenith, although cheatsy, is (as of now), the best way to improve the model
if has_reco:
    # Zenith input layer
    zenith_input = Input(shape=(1), name='zenith')

# Flat layers are Concatenated before being passed into Dense layers
if has_time:
    if has_reco:
        concat_layer = Concatenate()([q_flat_layer, t_flat_layer, zenith_input])
    else:
        concat_layer = Concatenate()([q_flat_layer, t_flat_layer])
elif not has_reco:
        raise Exception('Why train the model on charge alone? It is not worth it, promise.')
else:
    concat_layer = Concatenate()([q_flat_layer, zenith_input])


### \/ MASTER'S THESIS \/ ###
"""
# The Concatenated layers run through one Dense layer
dense_layer = BatchNormalization()(Dense(2048, activation='relu')(concat_layer))
# This last Dense layer is the output of the model
output = Dense(1)(dense_layer)
"""
### \/ BASELINE \/ ###
# The Concatenated layers run through three Dense layers
dense1_layer = Dense(256, activation='relu')(concat_layer)
dense2_layer = Dense(256, activation='relu')(dense1_layer)
dense3_layer = Dense(256, activation='relu')(dense2_layer)
# This last Dense layer is the output of the model
output = Dense(1)(dense3_layer)

## Preparing, Training, and Saving Models

In [None]:
for num_model in range(num_models_to_train):

    ### Prepare model for compilation ###
    # Copy model name
    name = model_name

    # Make sure we don't overwrite any models
    i = 0
    while(os.path.exists('%s/%s.h5' % (model_prefix, name+str(i)))): i += 1
    name += str(i)
    
    if has_time:
        if has_reco:
            model = Model(inputs=[charge_input, time_input, zenith_input], outputs=output, name=name)
            fit_inputs = {'charge':x_i[0][...,:idx], 'time':x_i[0][...,idx:], 'zenith':x_i[1].reshape(-1,1)}
        else:
            model = Model(inputs=[charge_input, time_input], outputs=output, name=name)
            fit_inputs = {'charge':x_i[...,:idx], 'time':x_i[...,idx:]}
    else:
        model = Model(inputs=[charge_input, zenith_input], outputs=output, name=name)
        fit_inputs = {'charge':x_i[0][...,:idx], 'zenith':x_i[1].reshape(-1,1)}

    model.compile(loss=loss_function, optimizer=optimizer, metrics=metrics)
    #model.summary()


    # Training
    # Earlystoping stops the model from training when it starts to overfit to the data
    # The main parameter to change is patience - number of epochs where val_loss does not improve before stopping
    # Should experiment with restore_best_weights
    early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min', baseline=None, restore_best_weights=False) 
    csv_logger = CSVLogger('%s/%s' % (model_prefix, name))

    history = model.fit(fit_inputs, y=y['energy'], epochs=num_epochs, validation_split=0.15, callbacks=[early_stop, csv_logger])

    # Save the model results as a .npy and .h5 file
    model.save('%s/%s.h5' % (model_prefix, name))
    np.save('%s/%s.npy' % (model_prefix, name), prep)

    # Open a .csv file and write the results of the best epoch
    val_loss = np.min(history.history['val_loss'])
    index = history.history['val_loss'].index(val_loss)
    loss = history.history['loss'][index]
    new_row = [name, index, loss, val_loss]
    with open('models/results.csv', 'a') as f:
        writer(f).writerow(new_row)
    f.close()