In [11]:
import numpy as np
import pandas as pd
import h5py
import tensorflow as tf
import keras
import os
import matplotlib.pyplot as plt
import csv
import math

from datetime import datetime

#from tensorflow import keras
from keras import layers
from keras import models
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Dense, Conv2D, Flatten
#from tensorflow.keras.callbacks import ModelCheckpoint

from old_data_tools import load_preprocessed, dataPrep, nameModel

#simPrefix = os.getcwd()+'\\simdata'


In [8]:
simPrefix = '/Users/kmays/simFiles'

In [14]:
x, y = load_preprocessed(simPrefix, 'train', nanCut=False)

DATA INPUT

In [5]:
#print(x.shape)
#print(y.keys())
# each station has 2 tanks, each tank has 2 DOMs (high/log gain)
# each tank measures charge and time
# each station gives 2 charges and 2 times, 4 total pieces of data per station
# stations arranged in 10x10 square lattice, 2 corners of square unused
# charge measured in VEM, vertical equivalent muon

# 'dir' is true direction, rest of dir are reconstruted by simulations
# 'plane_dir' assumes shower is flat plane
# 'laputop_dir' performs likelihood analysis
# 'small_dir' compromises between plane and laputop
#print(y)

In [6]:
# 85/15 split for training/validation
#change cosz=false & reco=plane
energy = y['energy']
comp = y['comp']
theta, phi = y['small_dir'].transpose()
nevents = len(energy)
trainCut = (np.random.uniform(size=nevents) < 0.85)
testCut = np.logical_not(trainCut)

Model Training

Alpha Model
    Input: no charge merge, no time layers included, normalized data, combined with cosine of zenith angle
    Layers: Two convolutional layers for charge, then combined with zenith
    Output: Energy

In [7]:
# Name for model

numepochs = 6
# Data preparation: no merging of charge (q), no time layers included (t=False), data normalized from 0-1
prep = {'q':None, 't':False, 'normed':True, 'reco':'small', 'cosz':False}

In [8]:
# Establish arrays to be trained on
x_i = dataPrep(x, y, **prep)
temp_y = energy
#print(prep)

In [9]:
x_i[1]

array([0.08067318, 0.07357016, 0.11979118, ..., 0.30931591, 0.29842941,
       0.30189343])

In [10]:
#naming system

#check if the file exists, if not, make it, otherwise, delete and go back 1
#cycle through existing files and stop at one that doesnt already exist and make that the key
key = 'functionalcosZ'
i = 0
#key=key+str(i)
#exists('model/model_{}.h5'.format(key+str(i)))
#print(os.path.isfile('models/{}.h5'.format(key+str(i))))
#fix this

while(os.path.isfile('models/{}.h5'.format(key+str(i)))):
    i = i + 1
key=key+str(i)
print(key)

functionalcosZ5


In [11]:
# Create model using functional API for multiple inputs
charge_input=keras.Input(shape=(10,10,2,),name="charge")
conv1_layer = layers.Conv2D(4,kernel_size=3,activation='relu')(charge_input)
drop1_layer=layers.Dropout(0.8)(conv1_layer)
conv2_layer = layers.Conv2D(2,kernel_size=3,activation='relu')(drop1_layer)
drop2_layer=layers.Dropout(0.5)(conv2_layer)
conv3_layer = layers.Conv2D(8,kernel_size=3,activation='relu')(drop2_layer)
flat_layer = layers.Flatten()(drop1_layer)

zenith_input=keras.Input(shape=(1,),name="zenith")

concat_layer = layers.concatenate([flat_layer,zenith_input])
#concat2_layer=flat_layer*zenith_input
#output = layers.Dense(1)(concat_layer)

#dense1_layer = layers.Dense(1)(concat_layer)
#dense2_layer = layers.Dense(1)(dense1_layer)
output = layers.Dense(1)(concat_layer)

model = models.Model(inputs=[charge_input,zenith_input],outputs=output,name=key)

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae','mse'])

## Old model used for reference
#model = Sequential(name=nameModel(prep, 'test'))  # Automatic naming for flexible assessment later
## Add model layers
#model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(10,10,2)))
#model.add(Conv2D(32, kernel_size=3, activation='relu'))
#model.add(Flatten())
#model.add(Dense(1)) # No activation function for last layer of regression model

## Compile model
#model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae','mse'])

In [12]:
np.count_nonzero(x_i[1]==None)

0

In [13]:
model.summary()

Model: "functionalcosZ5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
charge (InputLayer)             [(None, 10, 10, 2)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 8, 8, 4)      76          charge[0][0]                     
__________________________________________________________________________________________________
dropout (Dropout)               (None, 8, 8, 4)      0           conv2d[0][0]                     
__________________________________________________________________________________________________
flatten (Flatten)               (None, 256)          0           dropout[0][0]                    
____________________________________________________________________________________

In [14]:
#keras.utils.plot_model(model,"model.png")

In [15]:
x_i[1]

array([0.08067318, 0.07357016, 0.11979118, ..., 0.30931591, 0.29842941,
       0.30189343])

In [16]:
#Train
#saves loss in loss file under key name
csv_logger = keras.callbacks.CSVLogger('loss/{}'.format(key))
#early_stop = k.callbacks.EarlyStopping()
callbacks =  [csv_logger]

history = model.fit(
    {"charge":x_i[0],"zenith":x_i[1].reshape(-1,1)}, temp_y, epochs=numepochs,validation_split=0.15,callbacks=callbacks)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [17]:
#save model and prep information (def new files)
model.save('models/%s.h5' % key)
#model=models.load_model('model_%s.h5' % key)
#model.summary()

info=open('prep/%s.txt' % key, "w")
if os.stat('prep/%s.txt' % key).st_size == 0:
    info.write("q:{}, t:{}, normed:{}, reco:{}, cosz:{} \n".format(prep['q'], prep['t'], prep['normed'], prep['reco'], prep['cosz']))
info.close()

#print(keras.models.load_model('models/{}.h5'.format(key)).get_config()==model.get_config()) #should be true
#print(model.get_config())

#saves history information 
f = open("results.txt", "a")
now = datetime.now()
f.write("{}\t{}\tepochs:{}\tloss:{},{}\n".format(
    now.strftime("%m/%d/%Y %H:%M:%S"),
    key,
    numepochs,
    history.history['loss'][numepochs-1],
    history.history['val_loss'][numepochs-1]
))
f.close()

#this how how to read the prep dictionary
#info=open('prep_%s.h5' % key, "r")
#print(info.read())
#info.read()
#info.close()

MODEL COMPARISON

In [33]:
#dataframes that will compare the results
#start with 5 variations (represented as depth/dimensions)
#add column for overfitting, true/false (determined if value loss increases down the column)
#models (dimensions) that include true under overfitting will be cut out
#save model and prep as note attached to the csv to see which did best
#look up other ways to interpret loss information

#call the dataframe
v1= pd.read_csv('loss/functionalcosZ3.', delimiter = ",")
overfit=pd.Series(0)
count=0
v1['Overfits']=0
while(count<5):
    v1.iloc[count+1,7]=float(v1.iloc[count+1,4]>v1.iloc[count,4])
    count+=1
    print(count)

v1

1
2
3
4
5


Unnamed: 0,epoch,loss,mae,mse,val_loss,val_mae,val_mse,Overfits
0,0,0.496864,0.574958,0.496864,0.391251,0.530421,0.391251,0
1,1,0.444086,0.560462,0.444086,0.388192,0.529159,0.388192,0
2,2,0.441152,0.55902,0.441152,0.379669,0.525364,0.379669,0
3,3,0.440389,0.558665,0.440389,0.37958,0.524606,0.37958,0
4,4,0.438933,0.557943,0.438933,0.419087,0.544954,0.419087,1
5,5,0.438544,0.557782,0.438544,0.384367,0.527659,0.384367,0


In [None]:
#try batch regularization, drop out and relu activation at once
#google other versions
#how can i make comparing multiple dimensions simple?
#
#if(i!=0):
#    if("q:{}, t:{}, normed:{}, reco:{}, cosz:{}".format(prep['q'], prep['t'], prep['normed'], prep['reco'], prep['cosz']) in info.read()
#& keras.models.load_model('models/{}.h5'.format(key)).get_config()==model.get_config()):
#        print('theyre the same')
#        #os.remove(the file)
#    else:
#        print('theyre not')
#    print('positive')
#else:
#    print('no')