In [None]:
import pandas as pd
import numpy as np
import ADP
import data_manipulation as dm
from datetime import datetime
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from scipy.spatial.distance import cdist

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.utils import plot_model
import tensorflow as tf

In [None]:
import matplotlib.backends.backend_pdf
from scipy.stats import gaussian_kde

# Initiation Variables

In [None]:
if __name__ == '__main__':
    ##########################################################
    # ------------------------------------------------------ #
    # --------------------- INITIATION --------------------- #
    # ------------------------------------------------------ #
    ##########################################################
    ### Define User Variables ###

    # List of Granularities
    gra_list = [i for i in range(1,11)]

    # Number of Iterations
    iterations = 33

    # Number of events
    total = 100000

    # Number of Data-set divisions
    windows = 100

    # Percentage of background samples on the testing phase
    background_percent = 0.99

    # Percentage of samples on the training phase
    test_size = 0.3

# Load Data Base

In [None]:
if __name__ == '__main__':
    ##########################################################
    # ------------------------------------------------------ #
    # ----------------------- LOADING ---------------------- #
    # ------------------------------------------------------ #
    ##########################################################
    # Firstly the model loads the background and signal data, 
    # then it removes the attributes first string line, which 
    # are the column names, in order to avoid NaN values in 
    # the array.

    print('         ==== Commencing Initiation ====\n')

    ### Background    
    b_name='Input_Background_1.csv'
    background = np.genfromtxt(b_name, delimiter=',')
    background = background[1:,:]
    Lb, W = background.shape
    print("     .Background Loaded..." )
    print("     .Background shape: {}".format(background.shape))

    ### Signal
    s_name='Input_Signal_1.csv'
    signal = np.genfromtxt(s_name, delimiter=',')
    signal = signal[1:,:]
    Ls, _ = signal.shape
    print("     .Signal Loaded...")
    print("     .Signal shape: {}\n".format(signal.shape))

    print('\n          ==== Initiation Complete ====\n')
    print('=*='*17 )
    print('      ==== Commencing Data Processing ====')

# Plot Distribution

In [None]:
if __name__ == '__main__':
    threshold_list = []
    f, ax = plt.subplots(2,21,figsize=(21*16,2*7))
    for n_i in range(iterations):
        t_dict = {}
        
        ##########################################################
        # ------------------------------------------------------ #
        # ------------------- Preparing Data ------------------- #
        # ------------------------------------------------------ #
        ##########################################################
        print('\n     => Iteration Number', (n_i+1) )
        
        # Divide data-set
        b_samples = int(total*background_percent)
        s_samples = total - b_samples
        
        print('         .Dividing background and signal sub-sets')
        _, divided_background = train_test_split(background, test_size=b_samples/Lb)
        _, divided_signal = train_test_split(signal, test_size=s_samples/Ls)

        print('         .Selecting Signal on the following porpotion:')
        print('             .{}% Background samples'.format(int(background_percent*100)))
        print('             .{}% Signal samples'.format(int((1-background_percent)*100)))
        print('             .{:9d} of Background samples'.format(int(b_samples)) )
        print('             .{:9d} of Signal samples)'.format(int(s_samples)))

        # Concatenating Signal and the Test Background sub-set
        streaming_data_raw = np.concatenate((divided_background,divided_signal), axis=0)
        print("             .FullData shape: {}\n".format(streaming_data_raw.shape))

        # Normalize Data
        print('         .Normalizing Data')
        streaming = normalize(streaming_data_raw,norm='max',axis=0)
        
        columns = ['px1','py1','pz1','E1','eta1','phi1','pt1',
           'px2','py2','pz2','E2','eta2','phi2','pt2',
           'Delta_R','M12','MET','S','C','HT','A']
        
        for i in range(21):
            density = gaussian_kde(streaming[:b_samples,i])
            xs = np.linspace(-1,1,200)
            density.covariance_factor = lambda : .25
            density._compute_covariance()
            ax[0,i].set_title('{} - Background'.format(columns[i]), fontsize=15)
            ax[0,i].fill_between(xs,density(xs), color='tab:blue', alpha=.2)
            
            density = gaussian_kde(streaming[b_samples:,i])
            xs = np.linspace(-1,1,200)
            density.covariance_factor = lambda : .25
            density._compute_covariance()
            ax[1,i].set_title('{} - Signal'.format(columns[i]), fontsize=15)
            ax[1,i].fill_between(xs,density(xs), color='tab:blue', alpha=.2)
    
    plt.savefig('dist.pdf', bbox_inches='tight')
    plt.show()
            

            
            #f = plt.figure(figsize=(16,7))
            #plt.title('{} Background - {} Signal'.format(L1, L2))
            #sns.set(font_scale=1.5)
            #sns.kdeplot(data=total_df, x=x, hue='label',
            #            fill=True,common_norm=False, palette="tab10",alpha=.5, linewidth=0)
            #fig_list.append(f)
            #plt.show()
        
        
        #pdf = matplotlib.backends.backend_pdf.PdfPages("pdf_atributes.pdf")
        #for fig in fig_list:
        #    pdf.savefig(fig)
        #pdf.close()