In [1]:
import os
import gc
import sys
import glob
import time
import math
import numpy as np
import uproot
import pandas
from functools import partial
from concurrent.futures import ThreadPoolExecutor

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Dense, Conv2D, Dropout, AlphaDropout, Activation, BatchNormalization, Flatten, \
                                    Concatenate, PReLU
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, CSVLogger

sys.path.insert(0, "../../python")
from common import *
from DataLoader import DataLoader

In [2]:
class NetSetup:
    def __init__(self, activation, activation_shared_axes, dropout_rate, first_layer_size, last_layer_size, decay_factor,
                 kernel_regularizer):
        self.activation = activation
        self.activation_shared_axes = activation_shared_axes
        if activation == 'relu' or activation == 'PReLU':
            self.DropoutType = Dropout
            self.kernel_init = 'he_uniform'
            self.apply_batch_norm = True
        elif activation == 'selu':
            self.DropoutType = AlphaDropout
            self.kernel_init = 'lecun_normal'
            self.apply_batch_norm = False
        else:
            raise RuntimeError('Activation "{}" not supported.'.format(activation))
        self.dropout_rate = dropout_rate
        self.first_layer_size = first_layer_size
        self.last_layer_size = last_layer_size
        self.decay_factor = decay_factor
        self.kernel_regularizer = kernel_regularizer
    
    def RecalcLayerSizes(self, n_input_features, width_factor, compression_factor):
        self.first_layer_size = int(math.ceil(n_input_features * (1 + self.dropout_rate) * width_factor))
        self.last_layer_size = int(math.ceil(n_input_features * (1 + self.dropout_rate) * compression_factor))
    
def add_block_ending(net_setup, name_format, layer):
    if net_setup.apply_batch_norm:
        norm_layer = BatchNormalization(name=name_format.format('norm'))(layer)
    else:
        norm_layer = layer
    if net_setup.activation == 'PReLU':
        activation_layer = PReLU(shared_axes=net_setup.activation_shared_axes,
                                 name=name_format.format('activation'))(norm_layer)
    else:
        activation_layer = Activation(net_setup.activation, name=name_format.format('activation'))(norm_layer)
    if net_setup.dropout_rate > 0:
        return net_setup.DropoutType(net_setup.dropout_rate, name=name_format.format('dropout'))(activation_layer)
    return activation_layer

def dense_block(prev_layer, kernel_size, net_setup, block_name, n):
    dense = Dense(kernel_size, name="{}_dense_{}".format(block_name, n),
                  kernel_initializer=net_setup.kernel_init,
                  kernel_regularizer=net_setup.kernel_regularizer)(prev_layer)
    return add_block_ending(net_setup, '{}_{{}}_{}'.format(block_name, n), dense)

def conv_block(prev_layer, filters, kernel_size, net_setup, block_name, n):
    conv = Conv2D(filters, kernel_size, name="{}_conv_{}".format(block_name, n),
                  kernel_initializer=net_setup.kernel_init)(prev_layer)
    return add_block_ending(net_setup, '{}_{{}}_{}'.format(block_name, n), conv)

def reduce_n_features_1d(input_layer, net_setup, block_name):
    prev_layer = input_layer
    current_size = net_setup.first_layer_size
    n = 1
    while True:
        prev_layer = dense_block(prev_layer, current_size, net_setup, block_name, n)
        if current_size == net_setup.last_layer_size: break
        current_size = max(net_setup.last_layer_size, int(current_size / net_setup.decay_factor))
        n += 1
    return prev_layer


def reduce_n_features_2d(input_layer, net_setup, block_name):
    conv_kernel=(1, 1)
    prev_layer = input_layer
    current_size = net_setup.first_layer_size
    n = 1
    while True:
        prev_layer = conv_block(prev_layer, current_size, conv_kernel, net_setup, block_name, n)
        if current_size == net_setup.last_layer_size: break
        current_size = max(net_setup.last_layer_size, int(current_size / net_setup.decay_factor))
        n += 1
    return prev_layer

def create_model(net_config):
    tau_net_setup = NetSetup('PReLU', None, 0.25, 128, 128, 1.4, None)
    comp_net_setup = NetSetup('PReLU', [1, 2], 0.25, 1024, 64, 1.4, None)
    #dense_net_setup = NetSetup('relu', 0, 512, 32, 1.4, keras.regularizers.l1(1e-5))
    dense_net_setup = NetSetup('PReLU', None, 0.25, 512, 64, 1.4, None)
            
    model_name = "DeepTau2017v2p3"
    input_layers = []
    high_level_features = []

    if len(net_config.tau_branches) > 0:
        input_layer_tau = Input(name="input_tau", shape=(len(net_config.tau_branches),))
        input_layers.append(input_layer_tau)
        tau_net_setup.RecalcLayerSizes(len(net_config.tau_branches), 1.5, 1.5)
        reduced_tau = reduce_n_features_1d(input_layer_tau, tau_net_setup, 'tau')
        high_level_features = [ reduced_tau ]
    
    for loc in net_config.cell_locations:
        reduced_inputs = []
        for comp_id in range(len(net_config.comp_names)):
            comp_name = net_config.comp_names[comp_id]
            n_comp_features = len(input_cell_external_branches) + len(net_config.comp_branches[comp_id])
            input_layer_comp = Input(name="input_{}_{}".format(loc, comp_name),
                                     shape=(n_cells_eta[loc] * n_cells_phi[loc], n_comp_features))
            input_layers.append(input_layer_comp)
            input_masked = Masking(name="masking_{}_{}".format(loc, comp_name))(input_layer_comp)
            comp_net_setup.RecalcLayerSizes(n_comp_features, 4, 0.8)
            reduced_comp = reduce_n_features_2d(input_layer_comp, comp_net_setup, "{}_{}".format(loc, comp_name))
            reduced_inputs.append(reduced_comp)
            
        cell_output_size = 64
        if len(component_names) > 1:
            conv_all_start = Concatenate(name="{}_cell_concat".format(loc), axis=3)(reduced_inputs)
            comp_net_setup.first_layer_size = 512
            comp_net_setup.last_layer_size = 64
            prev_layer = reduce_n_features_2d(conv_all_start, comp_net_setup, "{}_all".format(loc))
        else:
            prev_layer = reduced_inputs[0]
        window_size = 2
        current_size = n_cells_eta[loc]
        n = 1
        while current_size > 1:
            win_size = min(current_size, window_size)
            prev_layer = conv_block(prev_layer, cell_output_size, (win_size, win_size), comp_net_setup,
                                    "{}_all_{}x{}".format(loc, win_size, win_size), n)
            n += 1
            current_size -= window_size - 1
            
        cells_flatten = Flatten(name="{}_cells_flatten".format(loc))(prev_layer)
        high_level_features.append(cells_flatten)
        
    if len(high_level_features) > 1:
        features_concat = Concatenate(name="features_concat")(high_level_features)
    else:
        features_concat = high_level_features[0]
    if net_conf.final:
        final_dense = reduce_n_features_1d(features_concat, dense_net_setup, 'final')
    else:
        final_delse = dense_block(features_concat, 2048, dense_net_setup, 'tmp', 1)

    output_layer = Dense(n_outputs, name="final_dense_{}".format(n),
                         kernel_initializer=dense_net_setup.kernel_init)(final_dense)
    softmax_output = Activation("softmax", name="main_output")(output_layer)

    model = Model(input_layers, softmax_output, name="DeepTau2017v2")
    return model, model_name

In [3]:
def compile_model(model, learning_rate):
    opt = keras.optimizers.Adam(lr=learning_rate)
    #opt = keras.optimizers.Nadam(lr=learning_rate)
    #model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    metrics = ["accuracy", TauLosses.tau_crossentropy, TauLosses.Le, TauLosses.Lmu, TauLosses.Ljet, TauLosses.sLe, TauLosses.sLmu, TauLosses.sLjet ]
    model.compile(loss=TauLosses.tau_crossentropy, optimizer=opt, metrics=metrics, weighted_metrics=metrics)

In [4]:
TauLosses.SetSFs(1, 1, 1)
model, model_name = create_model()
compile_model(model, 1e-3)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_inner_cmb (InputLayer)    (None, 11, 11, 180)  0                                            
__________________________________________________________________________________________________
input_outer_cmb (InputLayer)    (None, 11, 11, 180)  0                                            
__________________________________________________________________________________________________
inner_cmb_conv_1 (Conv2D)       (None, 11, 11, 1024) 185344      input_inner_cmb[0][0]            
__________________________________________________________________________________________________
outer_cmb_conv_1 (Con

In [5]:
def close_file(f_name):
    file_objs = [ obj for obj in gc.get_objects() if ("TextIOWrapper" in str(type(obj))) and (obj.name == f_name)]
    for obj in file_objs:
        obj.close()

In [6]:
class TimeCheckpoint(Callback):
    def __init__(self, time_interval, file_name_prefix):
        self.time_interval = time_interval
        self.file_name_prefix = file_name_prefix
        self.initial_time = time.time()
        self.last_check_time = self.initial_time
    
    def on_batch_end(self, batch, logs=None):
        if batch % 100 != 0: return
        current_time = time.time()
        delta_t = current_time - self.last_check_time
        if delta_t >= self.time_interval:
            abs_delta_t_h = (current_time - self.initial_time) / 60. / 60.
            self.model.save('{}_b{}_{:.1f}h.h5'.format(self.file_name_prefix, batch, abs_delta_t_h))
            self.last_check_time = current_time

In [7]:
def run_training(train_suffix, model_name, data_loader, epoch, n_epochs):

    train_name = '%s_%s' % (model_name, train_suffix)
    
    cb_acc = []
    for acc_name in ["acc", "weighted_acc"]:
        cb_acc.append(ModelCheckpoint("%s_acc.hdf5" % train_name, monitor="val_%s" % acc_name, save_best_only=True,
                                      save_weights_only=False, mode="max", verbose=1))
    
    cb_losses = []
    for loss_name in ["loss", "tau_crossentropy", "Le", "Lmu", "Ljet",
                      "weighted_tau_crossentropy", "weighted_Le", "weighted_Lmu", "weighted_Ljet"]:
        cb_losses.append(ModelCheckpoint("%s_%s.hdf5" % (train_name, loss_name), monitor="val_%s" % loss_name,
                                         save_best_only=True, save_weights_only=False, mode="min", verbose=1))

    log_name = "%s.log" % train_name
    if os.path.isfile(log_name):
        close_file(log_name)
        os.remove(log_name)
    csv_log = CSVLogger(log_name, append=True)

    time_checkpoint = TimeCheckpoint(4*60*60, '{}_historic'.format(train_name))
    callbacks = [time_checkpoint, csv_log, *cb_acc, *cb_losses]
    fit_hist = model.fit_generator(data_loader.generator(True), validation_data=data_loader.generator(False),
                                   steps_per_epoch=data_loader.steps_per_epoch, validation_steps=data_loader.validation_steps,
                                   callbacks=callbacks, epochs=n_epochs, initial_epoch=epoch, verbose=1)

    model.save("%s_final.hdf5" % train_name)
    return fit_hist

In [8]:
loader = DataLoader('N:/tau-ml/tuples-v2-t3/training/part_*.h5', 200, 10000, validation_size=6100000,
                    max_queue_size=100, n_passes=-1)

print(loader.total_size, loader.data_size, loader.validation_size)

72491602 66391602 6100000


In [9]:
fit_hist = run_training('step{}'.format(1), model_name, loader, 0, 2)

Instructions for updating:
Use tf.cast instead.
Epoch 1/2

Epoch 00001: val_acc improved from -inf to 0.95009, saving model to DeepTau2017v2p3_step1_acc.hdf5

Epoch 00001: val_weighted_acc improved from -inf to 0.94984, saving model to DeepTau2017v2p3_step1_acc.hdf5

Epoch 00001: val_loss improved from inf to 0.13229, saving model to DeepTau2017v2p3_step1_loss.hdf5

Epoch 00001: val_tau_crossentropy improved from inf to 0.13692, saving model to DeepTau2017v2p3_step1_tau_crossentropy.hdf5

Epoch 00001: val_Le improved from inf to 0.04344, saving model to DeepTau2017v2p3_step1_Le.hdf5

Epoch 00001: val_Lmu improved from inf to 0.00550, saving model to DeepTau2017v2p3_step1_Lmu.hdf5

Epoch 00001: val_Ljet improved from inf to 0.08799, saving model to DeepTau2017v2p3_step1_Ljet.hdf5

Epoch 00001: val_weighted_tau_crossentropy improved from inf to 0.13543, saving model to DeepTau2017v2p3_step1_weighted_tau_crossentropy.hdf5

Epoch 00001: val_weighted_Le improved from inf to 0.05572, saving 


Epoch 00002: val_Lmu improved from 0.00550 to 0.00531, saving model to DeepTau2017v2p3_step1_Lmu.hdf5

Epoch 00002: val_Ljet improved from 0.08799 to 0.08504, saving model to DeepTau2017v2p3_step1_Ljet.hdf5

Epoch 00002: val_weighted_tau_crossentropy improved from 0.13543 to 0.12729, saving model to DeepTau2017v2p3_step1_weighted_tau_crossentropy.hdf5


Exception in thread Thread-26:
Traceback (most recent call last):
  File "c:\program files (x86)\microsoft visual studio\shared\python36_64\lib\threading.py", line 916, in _bootstrap_inner
    self.run()
  File "c:\program files (x86)\microsoft visual studio\shared\python36_64\lib\threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "../../python\DataLoader.py", line 41, in LoaderThread
  File "../../python\DataLoader.py", line 17, in read_hdf
    read_hdf_lock.release()
  File "C:\Users\konst\AppData\Roaming\Python\Python36\site-packages\pandas\io\pytables.py", line 389, in read_hdf
    return store.select(key, auto_close=auto_close, **kwargs)
  File "C:\Users\konst\AppData\Roaming\Python\Python36\site-packages\pandas\io\pytables.py", line 740, in select
    return it.get_result()
  File "C:\Users\konst\AppData\Roaming\Python\Python36\site-packages\pandas\io\pytables.py", line 1518, in get_result
    results = self.func(self.start, self.stop, where)
  F


Epoch 00002: val_weighted_Le improved from 0.05572 to 0.05308, saving model to DeepTau2017v2p3_step1_weighted_Le.hdf5

Epoch 00002: val_weighted_Lmu improved from 0.00655 to 0.00599, saving model to DeepTau2017v2p3_step1_weighted_Lmu.hdf5

Epoch 00002: val_weighted_Ljet improved from 0.07316 to 0.06823, saving model to DeepTau2017v2p3_step1_weighted_Ljet.hdf5
