# Notebook version of CNN_TestOnly

In [None]:
#########################
# Version of CNN on 12 May 2020
# 
# Evaluates net for given model and plots
# Takes in ONE file to Test on, can compare to old reco
# Runs Energy, Zenith, Track length (1 variable energy or zenith, 2 = energy then zenith, 3 = EZT)
#   Inputs:
#       -i input_file:  name of ONE file 
#       -d path:        path to input files
#       -o ouput_dir:   path to output_plots directory
#       -n name:        name for folder in output_plots that has the model you want to load
#       -e epochs:      epoch number of the model you want to load
#       --variables:    Number of variables to train for (1 = energy or zenith, 2 = EZ, 3 = EZT)
#       --first_variable: Which variable to train for, energy or zenith (for num_var = 1 only)
#       --compare_reco: boolean flag, true means you want to compare to a old reco (pegleg, retro, etc.)
#       -t test:        Name of reco to compare against, with "oscnext" used for no reco to compare with
####################################

In [2]:
import numpy
import h5py
import time
import os, sys
import matplotlib
import argparse

## Setup parameters

In [3]:
contained = False
first_var = "zenith" #zenith or energy
compare_reco = False


indir = "/data/icecube/jmicallef/processed_CNN_files/"
# Expects one file
input_file = "NuMu_140000_level2_uncleaned_cleanedpulsesonly_vertexDC_IC19_flat_95bins_36034evtperbin_CC.lt100.transformedinputstatic_transformed3output.testonly.hdf5" 
if contained:
    input_file = "NuMu_140000_level2_uncleaned_cleanedpulsesonly_vertexDC_IC19_flat_95bins_36034evtperbin_CC.lt100_contained.testonly.hdf5"
outdir = "/home/users/jmicallef/LowEnergyNeuralNetwork/"
test_file = indir + input_file
reco_name = "oscnext"

# Set up for 
if first_var == "zenith":
    letter="Z"
if first_var == "energy":
    letter="E"
if contained:
    filename = "numu_flat_%s_5_100_CC_uncleaned_cleanedpulsesonly_3600kevents_nologcharge_oldvertexDC_lrEpochs50_containedIC19"%letter
    epoch = 252
else:
    filename = "numu_flat_%s_5_100_CC_uncleaned_cleanedpulsesonly_3600kevents_nologcharge_IC19_lrEpochs50"%letter
    epoch = 175

output_variables = 1
if compare_reco:
    reco_name = "PegLeg"
    if contained:
        input_file = "Level5p_IC86.2013_genie_numu.014640.IC19_vertexDC_CC.lt100.transformedinputstatic_transformed3output_file00_contained.testonly.hdf5"
    else:
        input_file = "Level5p_IC86.2013_genie_numu.014640.lt200_vertexDCCC.lt100.transformedinputstatic_transformed3output.testonly.hdf5"
    


dropout = 0.2
learning_rate = 1e-3
DC_drop_value = dropout
IC_drop_value = dropout
connected_drop_value = dropout
min_energy = 5
max_energy = 100.

save = True
save_folder_name = "%soutput_plots/%s/"%(outdir,filename)
if save==True:
    if os.path.isdir(save_folder_name) != True:
        os.mkdir(save_folder_name)
load_model_name = "%s%s_%iepochs_model.hdf5"%(save_folder_name,filename,epoch) 
use_old_weights = True

save_folder_name += "%s_%sepochs/"%(reco_name,epoch)
if os.path.isdir(save_folder_name) != True:
    os.mkdir(save_folder_name)
print(save_folder_name)
print("Test type: %s"%reco_name)

/home/users/jmicallef/LowEnergyNeuralNetwork/output_plots/numu_flat_Z_5_100_CC_uncleaned_cleanedpulsesonly_3600kevents_nologcharge_IC19_lrEpochs50/oscnext_175epochs/
Test type: oscnext


## Load Data from testonly file

In [4]:
print("Testing on %s"%test_file)
f = h5py.File(test_file, 'r')
Y_test_use = f['Y_test'][:]
X_test_DC_use = f['X_test_DC'][:]
X_test_IC_use = f['X_test_IC'][:]
if compare_reco:
    reco_test_use = f['reco_test'][:]
f.close
del f
print(X_test_DC_use.shape,X_test_IC_use.shape)

Testing on /data/icecube/jmicallef/processed_CNN_files/NuMu_140000_level2_uncleaned_cleanedpulsesonly_vertexDC_IC19_flat_95bins_36034evtperbin_CC.lt100.transformedinputstatic_transformed3output.testonly.hdf5
(342328, 8, 60, 5) (342328, 19, 60, 5)


In [None]:
from PlottingChecks import plot_output
plot_output(Y_test_use,save_folder_name,filenumber="test")

## Set Up Net (Load Model, Set Loss Function, Compile)

In [5]:
#Make network and load model
from keras.optimizers import SGD
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

from cnn_model import make_network
print(X_test_DC_use.shape,X_test_IC_use.shape)
model_DC = make_network(X_test_DC_use,X_test_IC_use,output_variables,DC_drop_value,IC_drop_value,connected_drop_value)
print("Loading model %s"%load_model_name)
#print(model_DC.summary())
model_DC.load_weights(load_model_name)


Using TensorFlow backend.


(342328, 8, 60, 5) (342328, 19, 60, 5)












Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Loading model /home/users/jmicallef/LowEnergyNeuralNetwork/output_plots/numu_flat_Z_5_100_CC_uncleaned_cleanedpulsesonly_3600kevents_nologcharge_IC19_lrEpochs50/numu_flat_Z_5_100_CC_uncleaned_cleanedpulsesonly_3600kevents_nologcharge_IC19_lrEpochs50_175epochs_model.hdf5


In [6]:
# WRITE OWN LOSS FOR MORE THAN ONE REGRESSION OUTPUT
from keras.losses import mean_squared_error
from keras.losses import mean_absolute_percentage_error

if first_var == "zenith":
    def ZenithLoss(y_truth,y_predicted):
        #return logcosh(y_truth[:,1],y_predicted[:,1])
        return mean_squared_error(y_truth[:,1],y_predicted[:,0])

    def CustomLoss(y_truth,y_predicted):
            zenith_loss = ZenithLoss(y_truth,y_predicted)
            return zenith_loss

    model_DC.compile(loss=ZenithLoss,
                optimizer=Adam(lr=learning_rate),
                metrics=[ZenithLoss])
    
    print("zenith first")


else: 
    def EnergyLoss(y_truth,y_predicted):
        return mean_absolute_percentage_error(y_truth[:,0],y_predicted[:,0])

    def ZenithLoss(y_truth,y_predicted):
        return mean_squared_error(y_truth[:,1],y_predicted[:,1])

    def TrackLoss(y_truth,y_predicted):
        return mean_squared_logarithmic_error(y_truth[:,2],y_predicted[:,2])

    if output_variables == 3:
        def CustomLoss(y_truth,y_predicted):
            energy_loss = EnergyLoss(y_truth,y_predicted)
            zenith_loss = ZenithLoss(y_truth,y_predicted)
            track_loss = TrackLoss(y_truth,y_predicted)
            return energy_loss + zenith_loss + track_loss

        model_DC.compile(loss=CustomLoss,
                  optimizer=Adam(lr=learning_rate),
                  metrics=[EnergyLoss,ZenithLoss,TrackLoss])

    elif output_variables == 2:
        def CustomLoss(y_truth,y_predicted):
            energy_loss = EnergyLoss(y_truth,y_predicted)
            zenith_loss = ZenithLoss(y_truth,y_predicted)
            return energy_loss + zenith_loss

        model_DC.compile(loss=CustomLoss,
                  optimizer=Adam(lr=learning_rate),
                  metrics=[EnergyLoss,ZenithLoss])
    else:
        def CustomLoss(y_truth,y_predicted):
            energy_loss = EnergyLoss(y_truth,y_predicted)
            return energy_loss

        model_DC.compile(loss=EnergyLoss,
                    optimizer=Adam(lr=learning_rate),
                    metrics=[EnergyLoss])



zenith first


## Run Network

In [None]:
# Run prediction
t0 = time.time()
Y_test_predicted = model_DC.predict([X_test_DC_use,X_test_IC_use])
t1 = time.time()
print("This took me %f seconds for %i events"%(((t1-t0)),Y_test_predicted.shape[0]))

## Plot 

In [None]:
def get_RMS(resolution):
    mean_array = numpy.ones_like(resolution)*numpy.mean(resolution)
    rms = numpy.sqrt( sum((mean_array - resolution)**2)/len(resolution) )
    return rms

def plot_length_energy(truth, nn_reco, emax=100., track_index=2,tmax=200.,\
                        save=False,savefolder=None,use_fraction=False\
                        bins=60,minval=None,maxval=None,ylim=None,\
                        cut_truth = False, axis_square =False, zmax=None,
                        variable="Energy", units = "GeV", epochs=None,reco_name="CNN"):
    """
    Plot testing set reconstruction vs truth
    Recieves:
        truth = array, Y_test truth
        nn_reco = array, neural network prediction output
        save = optional, bool to save plot
        savefolder = optional, output folder to save to, if not in current dir
        syst_set = string, name of the systematic set (for title and saving)
        bins = int, number of bins plot (will use for both the x and y direction)
        minval = float, minimum value to cut nn_reco results
        maxval = float, maximum value to cut nn_reco results
        cut_truth = bool, true if you want to make the value cut on truth rather than nn results
        axis_square = bool, cut axis to be square based on minval and maxval inputs
        variable = string, name of the variable you are plotting
        units = string, units for the variable you are plotting
    Returns:
        2D plot of True vs Reco
    """

    true_energy = truth[:,0]*emax
    true_track_length =  truth[:,track_index]*tmax
    
    if use_fraction:
        nn_resolution = (nn_reco - true_energy)/true_energy
        title = "Fractional %s Resolution"%variable
        zlabel = "(reconstruction - truth) / truth" 
    else:
        nn_resolution = nn_reco - true_energy
        title = "%s Resolution"%variable
        zlabel = "reconstruction - truth (%s)"%units
    
    plt.figure(figsize=(10,7))
    cts,xbin,ybin,img = plt.hist2d(true_energy, true_track_length, bins=bins, weights=nn_resolution, cmax=zmax)
    cbar = plt.colorbar()
    cbar.ax.set_ylabel('counts', rotation=90)
    plt.set_cmap('viridis_r')
    plt.xlabel("True Neutrino Energy (GeV)",fontsize=15)
    plt.ylabel("True Track Length (m)",fontsize=15)
    plt.zlabel(zlabel,fontsize=15)
    
    if zmax:
        nocut_name += "_zmax%i"%zmax    
    if save:
        plt.savefig("%sTrueEnergyTrackReco%s_2DHist%s.png"%(savefolder,reco_name,nocut_name))

In [None]:
plot_length_energy(Y_test_use, Y_test_predicted)

In [None]:
### MAKE THE PLOTS ###
from PlottingFunctions import plot_single_resolution
from PlottingFunctions import plot_2D_prediction
from PlottingFunctions import plot_2D_prediction_fraction
from PlottingFunctions import plot_bin_slices
from PlottingFunctions import plot_distributions

plots_names = ["Energy", "CosZenith", "Track"]
plots_units = ["GeV", "", "m"]
maxabs_factors = [100., 1., 200.]
#maxvals = [max_energy, 1., 0.]
#minvals = [min_energy, -1., 0.]
use_fractions = [True, False, True]
bins_array = [95,100,100]
if output_variables == 3: 
    maxvals = [max_energy, 1., max(Y_test_use[:,2])*maxabs_factor[2]]

for num in range(0,output_variables):

    NN_index = num
    if first_var == "energy":
        true_index = num
        name_index = num
    if first_var == "zenith":
        true_index = first_var_index
        name_index = first_var_index
    plot_name = plots_names[name_index]
    plot_units = plots_units[name_index]
    maxabs_factor = maxabs_factors[name_index]
    maxval = maxvals[name_index]
    minval = minvals[name_index]
    use_frac = use_fractions[name_index]
    bins = bins_array[name_index]
    print("Plotting %s at position %i in true test output and %i in NN test output"%(plot_name, true_index,NN_index))
    
    plot_2D_prediction(Y_test_use[:,true_index]*maxabs_factor,\
                        Y_test_predicted[:,NN_index]*maxabs_factor,\
                        save,save_folder_name,bins=bins,\
                        minval=minval,maxval=maxval,\
                        variable=plot_name,units=plot_units, epochs=epoch)
    plot_2D_prediction(Y_test_use[:,true_index]*maxabs_factor, Y_test_predicted[:,NN_index]*maxabs_factor,\
                        save,save_folder_name,bins=bins,\
                        minval=None,maxval=None,\
                        variable=plot_name,units=plot_units, epochs = epoch)
    plot_single_resolution(Y_test_use[:,true_index]*maxabs_factor,\
                    Y_test_predicted[:,NN_index]*maxabs_factor,\
                   minaxis=-2*maxval,maxaxis=maxval*2,
                   save=save,savefolder=save_folder_name,\
                   variable=plot_name,units=plot_units, epochs = epoch)
    plot_bin_slices(Y_test_use[:,true_index]*maxabs_factor, Y_test_predicted[:,NN_index]*maxabs_factor,\
                    use_fraction = use_frac,\
                    bins=10,min_val=minval,max_val=maxval,\
                    save=True,savefolder=save_folder_name,\
                    variable=plot_name,units=plot_units, epochs = epoch)
    if compare_reco:
        plot_single_resolution(Y_test_use[:,true_index]*maxabs_factor,\
                   Y_test_predicted[:,NN_index]*maxabs_factor,\
                   use_old_reco = True, old_reco = reco_test_use[:,true_index],\
                   minaxis=-2*maxval,maxaxis=maxval*2,
                   save=save,savefolder=save_folder_name,\
                   variable=plot_name,units=plot_units, epochs = epoch,reco_name=reco_name)
        plot_bin_slices(Y_test_use[:,true_index]*maxabs_factor, Y_test_predicted[:,NN_index]*maxabs_factor,\
                    old_reco = reco_test_use[:,true_index],\
                    use_fraction = use_frac,\
                    bins=10,min_val=minval,max_val=maxval,\
                    save=True,savefolder=save_folder_name,\
                    variable=plot_name,units=plot_units, epochs = epoch,reco_name=reco_name)
    if first_var == "energy" and num ==0:
        plot_2D_prediction_fraction(Y_test_use[:,true_index]*maxabs_factor,\
                        Y_test_predicted[:,NN_index]*maxabs_factor,\
                        save,save_folder_name,bins=bins,\
                        minval=0,maxval=2,\
                        variable=plot_name,units=plot_units)
    if num > 0 or first_var == "zenith":
        plot_bin_slices(Y_test_use[:,true_index], Y_test_predicted[:,NN_index], \
                       energy_truth=Y_test_use[:,0]*max_energy, \
                       use_fraction = False, \
                       bins=10,min_val=min_energy,max_val=max_energy,\
                       save=True,savefolder=save_folder_name,\
                       variable=plot_name,units=plot_units, epochs=epoch)
        if compare_reco:
            plot_bin_slices(Y_test_use[:,true_index], Y_test_predicted[:,NN_index], \
                       energy_truth=Y_test_use[:,0]*max_energy, \
                       old_reco = reco_test_use[:,true_index],\
                       use_fraction = False, \
                       bins=10,min_val=min_energy,max_val=max_energy,\
                       save=True,savefolder=save_folder_name,\
                       variable=plot_name,units=plot_units, epochs = epoch,reco_name=reco_name)