In [10]:
import pandas as pd
import numpy as np
import os
import sys
import datetime
from sklearn.model_selection import StratifiedKFold, ParameterGrid
sys.path.append(os.environ['CMS_ROOT'])
from cms_modules.utils import model_summary_to_string, args_to_dict
from cms_modules.logging import Logger

import tensorflow as tf
TensorBoard = tf.keras.callbacks.TensorBoard

ecbdl14_root = '/home/jjohn273/git/ECBDL14-Classification/'
sys.path.append(ecbdl14_root)
from model import create_model
from CustomCallbacks import KerasThresholdMonitoringCallback

### Define DNN Config

In [11]:
config = {}
hidden_layers_markup='32+32'
config['hidden_layers'] = [32,32]
config['learn_rate'] = 0.001
config['batch_size'] = 128
config['dropout_rate'] = 0.5
config['batchnorm'] = True
epochs=30
debug = True

### Define I/O Paths

In [12]:
# inputs
data_path = os.path.join(ecbdl14_root, 'data/ecbdl14.onehot.sample.hdf')
data_key = 'train'
# outputs
now = datetime.datetime.today()
ts = now.strftime("%m%d%y-%H%M%S")

### Init Output Files

In [13]:
config_value = f'layers:{hidden_layers_markup}-learn_rate:{config.get("learn_rate")}'
config_value += f'-batch_size:{config.get("batch_size")}-dropout_rate:{config.get("dropout_rate")}-bathcnorm:{config.get("batchnorm")}'

# if not os.path.isfile(train_auc_outputs):
#     results_header = 'config,fold,' + ','.join([f'ep_{i}' for i in range(epochs)])
#     output_files = [train_auc_outputs, validation_auc_outputs]
#     output_headers = [results_header,results_header]
#     for file, header in zip(output_files, output_headers):
#         with open(file, 'w') as fout:
#             fout.write(header + '\n')

def write_results(file, results):
    with open(file, 'a') as fout:
        fout.write(results + '\n')

### Init Logger

In [5]:
tensorboard_dir = f'tensorboard/{ts}-{config_value}/'
log_file = f'logs/{ts}-{config_value}.txt'
logger = Logger(log_file)
logger.log_time('Starting grid search job')
logger.log_time(f'Outputs being written to {[validation_auc_outputs,train_auc_outputs]}')
logger.write_to_file()

### Load Data

In [6]:
df = pd.read_hdf(data_path, data_key)
logger.log_time(f'Loaded data with shape {df.shape}').write_to_file()
if debug:
    y, x = df[:10000]['target'], df[:10000].drop(columns=['target'])
else:
    y, x = df['target'], df.drop(columns=['target'])

In [7]:
cb = KerasThresholdMonitoringCallback(x, y, logger)
tb = TensorBoard(log_dir='logs', histogram_freq=1)
callbacks = [cb, tb]

In [8]:
input_dim = x.shape[1]
dnn = create_model(input_dim, config)
dnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                6432      
_________________________________________________________________
batch_normalization (BatchNo (None, 32)                128       
_________________________________________________________________
activation (Activation)      (None, 32)                0         
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32)                128       
_________________________________________________________________
activation_1 (Activation)    (None, 32)                0

In [9]:
history = dnn.fit(x, y, epochs=epochs, callbacks=callbacks, verbose=1)

Train on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
