In [None]:
import os
import logging
import struct
import random
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import pydot
import pydotplus
import graphviz
import scipy
import scipy.stats as sts
import numpy.random as npr
from logging.config import dictConfig
from scipy.stats import norm
from sklearn import metrics
from keras import regularizers
from keras.layers import Dense, Activation, Input, Dropout, BatchNormalization
from keras.models import Sequential
from keras_tqdm import TQDMNotebookCallback as ktqdm
from tqdm import tqdm_notebook as tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from statsmodels.distributions.empirical_distribution import ECDF
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.metrics import precision_recall_curve, average_precision_score, confusion_matrix

%matplotlib notebook

## Network Metrics Function

In [None]:
def plot_history_graph(history, index, label, name):
    """Plot the graph for the given history metrics."""
    plt.plot(history.history[index], label='Training %s = %0.6f' % (label, history.history[index][-1]))
    plt.plot(history.history['val_%s'%index] , label='Testing %s = %0.6f' % (label, history.history['val_%s'%index][-1]))
    plt.title('Model %s'%label, fontsize=15)
    plt.ylabel(label)
    plt.xlabel('epoch')
    plt.legend(loc='upper left')
    plt.savefig(name, bbox_inches=None)
    #plt.figure(figsize=(80,40))

## Custom Activation Function

In [1]:
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects

beta005 = 0.05
beta02 = 0.2
beta05 = 0.5
beta1 = 1
beta3 = 3
beta10 = 10

def custom_activation(x):
    return (K.sigmoid(x * 3))

## Reproducibility

In [None]:
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)
random.seed(42)
tf.set_random_seed(42)

## Logging Discrete Uniform

In [None]:
step = 50
size = 10000
n = step * size
train_size = 0.70
test_size = 1 - train_size
width = np.ceil(np.log2(n)).astype(int)
predecessors = np.arange(0, n, step)
predecessors = np.array([
    (predecessors[i]) for i in range(size)
]).astype(int)
predecessors, predecessors.shape, n

In [None]:
cumulative_predecessors = np.array([(((i / size))) for i in range(size)])

cumulative_predecessors, cumulative_predecessors.size

In [None]:
predecessors_bits = np.array([
    list(np.binary_repr(i, width)) for i in np.arange(0, n, step)
]).astype(int)
np.set_printoptions(threshold=100)
predecessors_bits, predecessors_bits.shape

In [None]:
## Create training + testing set
## L'idea è utilizzare una permutazione casuale per ottenere gli elementi presenti da 0 a size
rnd_ind = npr.permutation(size)
rnd_ind

In [None]:
permutated_preds, permutated_cums = predecessors[np.ndarray.tolist(rnd_ind)], cumulative_predecessors[np.ndarray.tolist(rnd_ind)]
train_ind, train_lab = permutated_preds[:int(size * 0.70)], permutated_cums[:int(size * 0.70)]
test_ind, test_lab = permutated_preds[int(size * 0.70):], permutated_cums[int(size * 0.70):]
permutated_preds, permutated_cums

In [None]:
train_ind_2 = np.array([list(np.binary_repr(train_ind[i], width)) for i in range(train_ind.size)]).astype(int)
train_ind_2

In [None]:
test_ind_2 = np.array([list(np.binary_repr(test_ind[i], width)) for i in range(test_ind.size)]).astype(int)
test_ind_2

## Functions to build the model

In [None]:
from keras import metrics, optimizers

def build_model(neurons):
    """ Return keras network model """
    model = Sequential()
    model.add(Dense(int(neurons), input_dim=neurons, name="Input_dense_layer", activation="sigmoid"))
    model.add(BatchNormalization())
    model.add(Dense(1, name="Output_dense_layer", activation="sigmoid"))
    model.compile(
        loss='mean_squared_error', 
        optimizer="sgd",
        metrics=['mean_absolute_error'])

    return model

In [None]:
from keras.utils import plot_model

model_20 = build_model(width)
model_50 = build_model(width)
model_75 = build_model(width)
model_100 = build_model(width)

In [None]:
history_20 = model_20.fit(
    train_ind_2, 
    train_lab,
    epochs=20,
    shuffle=True,
    batch_size=256,
    verbose=0,
    callbacks=[ktqdm(metric_format="{name}: {value:e}")],
    validation_data=(test_ind_2, test_lab)
)

In [None]:
history_50 = model_50.fit(
    train_ind_2, 
    train_lab,
    epochs=50,
    shuffle=True,
    batch_size=256,
    verbose=0,
    callbacks=[ktqdm(metric_format="{name}: {value:e}")],
    validation_data=(test_ind_2, test_lab)
)

In [None]:
history_75 = model_75.fit(
    train_ind_2, 
    train_lab,
    epochs=_75,
    shuffle=True,
    batch_size=256,
    verbose=0,
    callbacks=[ktqdm(metric_format="{name}: {value:e}")],
    validation_data=(test_ind_2, test_lab)
)

In [None]:
history_100 = model_100.fit(
    train_ind_2, 
    train_lab,
    epochs=100,
    shuffle=True,
    batch_size=256,
    verbose=0,
    callbacks=[ktqdm(metric_format="{name}: {value:e}")],
    validation_data=(test_ind_2, test_lab)
)

In [None]:
%matplotlib notebook
plot_history_graph(history_20, 'mean_absolute_error', 'mae', "1_mae_uniform.png")

In [None]:
plot_history_graph(history_20, 'loss', 'loss', "1_mse_uniform")

In [None]:
plot_history_graph(history_50, 'mean_absolute_error', 'mae', "2_mae_uniform.png")

In [None]:
plot_history_graph(history_50, 'loss', 'loss', "2_mse_uniform")

In [None]:
plot_history_graph(history_75, 'mean_absolute_error', 'mae', "3_mae_uniform.png")

In [None]:
plot_history_graph(history_75, 'loss', 'loss', "3_mse_uniform")

In [None]:
plot_history_graph(history_100, 'mean_absolute_error', 'mae', "4_mae_uniform.png")

In [None]:
plot_history_graph(history_100, 'loss', 'loss', "4_mse_uniform")

## Boxplot error

In [None]:
def prediction(num, width, model, size):
    bin = np.array([list(np.binary_repr(num, width))])
    #print("Prediction on ", num, ":\t ", model.predict(bin))
    return int(np.reshape(np.ceil(model.predict(bin) * size), 1)[0])

In [None]:
def mae_errors(model, size, width, predecessors):
    errors = []
    ## L'operazione è ripetuta size volte
    for x in range(size):
        ## L'indice che mi aspetto è in posizione x di rnd_ind
        expected_index = rnd_ind[x]
        print(x)
        ## Ripeto la predizione step volte
        for i in range(step):
            ## Predico predecessors[expected_index] + i
            predicted_index = prediction(predecessors[expected_index] + i, width, model, size)
            ## Calcolo la distanza tra l'indice ottenuto e l'indice atteso
            errors.append(abs(expected_index - predicted_index))
    ## Plot degli errori
    return errors