In [1]:
import pandas as pd
import numpy as np
import ADP
import data_manipulation as dm
from datetime import datetime
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize

In [2]:
if __name__ == '__main__':
    ##########################################################
    # ------------------------------------------------------ #
    # --------------------- INITIATION --------------------- #
    # ------------------------------------------------------ #
    ##########################################################
    ### Define User Variables ###

    # List of Granularities
    gra_list = [i for i in range(1,11)]

    # Number of Iterations
    iterations = 33

    # Number of events
    total = 10000

    # Number of Data-set divisions
    windows = 100

    # Percentage of background samples on the testing phase
    background_percent = 0.99

    # Percentage of samples on the training phase
    test_size = 0.3
    

In [3]:
if __name__ == '__main__':
    ##########################################################
    # ------------------------------------------------------ #
    # ----------------------- LOADING ---------------------- #
    # ------------------------------------------------------ #
    ##########################################################
    # Firstly the model loads the background and signal data, 
    # then it removes the attributes first string line, which 
    # are the column names, in order to avoid NaN values in 
    # the array.

    print('         ==== Commencing Initiation ====\n')

    ### Background    
    b_name='Input_Background_1.csv'
    background = np.genfromtxt(b_name, delimiter=',')
    background = background[1:,:]
    Lb, W = background.shape
    print("     .Background Loaded..." )
    print("     .Background shape: {}".format(background.shape))

    ### Signal
    s_name='Input_Signal_1.csv'
    signal = np.genfromtxt(s_name, delimiter=',')
    signal = signal[1:,:]
    Ls, _ = signal.shape
    print("     .Signal Loaded...")
    print("     .Signal shape: {}\n".format(signal.shape))

    print('\n          ==== Initiation Complete ====\n')
    print('=*='*17 )
    print('      ==== Commencing Data Processing ====')

         ==== Commencing Initiation ====

     .Background Loaded...
     .Background shape: (543500, 21)
     .Signal Loaded...
     .Signal shape: (522467, 21)


          ==== Initiation Complete ====

=*==*==*==*==*==*==*==*==*==*==*==*==*==*==*==*==*=
      ==== Commencing Data Processing ====


In [4]:
if __name__ == '__main__':
    for n_i in range(iterations):
        print('\n     => Iteration Number', (n_i+1) )

        # Divide data-set into training and testing sub-sets
        print('         .Dividing training and testing sub-sets')
        _, divided_background = train_test_split(background, test_size=total/Lb)

        test = int(total*test_size)
        b_test = int(test*background_percent)
        static_data_raw, background_test = train_test_split(divided_background, test_size=test_size, random_state=42)
        _, background_test = train_test_split(background_test, test_size=b_test/background_test.shape[0])

        # Defining number of events Signal events on online phase.
        signal_online_samples = int(test - b_test)
        _, reduced_signal = train_test_split(signal, test_size=signal_online_samples/Ls)

        print('         .Selecting Signal on the following porpotion:')
        print('             .{}% Background samples'.format(int(background_percent*100)))
        print('             .{}% Signal samples'.format(int((1-background_percent)*100)))
        print('             .{:9d} of Background samples (Offline)'.format(int(total*(1-test_size))))
        print('             .{:9d} of Background samples (Online)'.format(int(b_test)) )
        print('             .{:9d} of Signal samples (Online)'.format(int(signal_online_samples)))

        # Concatenating Signal and the Test Background sub-set
        streaming_data_raw = np.concatenate((background_test,reduced_signal), axis=0)
        print("             .Offline shape: {}".format(static_data_raw.shape))
        print("             .Online shape: {}\n".format(streaming_data_raw.shape))

        # Normalize Data
        print('         .Normalizing Data')
        static = normalize(static_data_raw,norm='max',axis=0)
        streaming = normalize(streaming_data_raw,norm='max',axis=0)
        
        print('             .Executing for granularities', gra_list)
        for gra in gra_list:
            print('                 .Iter {} - Gra {}'.format(n_i+1, gra))
            print('                 .', datetime.now())
            
            Input = {'data': static,
                     'granularity': gra,
                     'distancetype': 'euclidean'}
            
            static_output = ADP.ADP(Input, 'Offline')
            
            Input = {'data': streaming,
                     'granularity': gra,
                     'distancetype': 'euclidean'}
            
            streaming_output = ADP.ADP(Input, 'Offline')
            
            Input = {'data': np.concatenate((static,streaming), axis=0),
                     'granularity': gra,
                     'distancetype': 'euclidean'}
            
            output = ADP.ADP(Input, 'Offline')
            
            # dump saidas ADP# open a file, where you ant to store the data
            pickle.dump(static_output, open(r'ADP_Iterations/Static_output_Iter{}_Gra{}.pkl'.format(n_i,gra), 'wb'))
            pickle.dump(streaming_output, open(r'ADP_Iterations/Streaming_output_Iter{}_Gra{}.pkl'.format(n_i,gra), 'wb'))
            pickle.dump(output, open(r'ADP_Iterations/Output_Iter{}_Gra{}.pkl'.format(n_i,gra), 'wb'))
        # dump static e streaming
        pickle.dump(static, open(r'ADP_Iterations/Static_Iter{}.pkl'.format(n_i), 'wb'))
        pickle.dump(streaming, open(r'ADP_Iterations/Streaming_Iter{}.pkl'.format(n_i), 'wb'))
        
    #dump variaveis de controle
    var = {'gra_list': gra_list,
           'iter': iterations,
           'total': total,
           'back_percent': background_percent,
           'test_size': test_size,
           'b_test': b_test}
    pickle.dump(var, open(r'ADP_Iterations/var.pkl', 'wb'))
    
    
    print('\n        ====Data Processing Complete====\n' )
    print('=*='*17 )


     => Iteration Number 1
         .Dividing training and testing sub-sets
         .Selecting Signal on the following porpotion:
             .99% Background samples
             .1% Signal samples
             .     7000 of Background samples (Offline)
             .     2970 of Background samples (Online)
             .       30 of Signal samples (Online)
             .Offline shape: (7000, 21)
             .Online shape: (3000, 21)

         .Normalizing Data
             .Executing for granularities [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                 .Iter 1 - Gra 1
                 . 2021-05-17 15:43:07.646976
                 .Iter 1 - Gra 2
                 . 2021-05-17 15:43:58.306835
                 .Iter 1 - Gra 3
                 . 2021-05-17 15:44:54.593938
                 .Iter 1 - Gra 4
                 . 2021-05-17 15:46:29.698663
                 .Iter 1 - Gra 5
                 . 2021-05-17 15:49:11.699857
                 .Iter 1 - Gra 6
                 . 2021-05-1

             .Executing for granularities [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                 .Iter 7 - Gra 1
                 . 2021-05-17 17:28:39.533583
                 .Iter 7 - Gra 2
                 . 2021-05-17 17:29:19.630096
                 .Iter 7 - Gra 3
                 . 2021-05-17 17:30:07.027555
                 .Iter 7 - Gra 4
                 . 2021-05-17 17:31:18.560709
                 .Iter 7 - Gra 5
                 . 2021-05-17 17:33:29.879770
                 .Iter 7 - Gra 6
                 . 2021-05-17 17:35:37.166141
                 .Iter 7 - Gra 7
                 . 2021-05-17 17:38:30.148394
                 .Iter 7 - Gra 8
                 . 2021-05-17 17:42:07.354950
                 .Iter 7 - Gra 9
                 . 2021-05-17 17:43:53.087628
                 .Iter 7 - Gra 10
                 . 2021-05-17 17:45:36.312437

     => Iteration Number 8
         .Dividing training and testing sub-sets
         .Selecting Signal on the following porpotion:
   

                 .Iter 13 - Gra 2
                 . 2021-05-17 19:08:18.030891
                 .Iter 13 - Gra 3
                 . 2021-05-17 19:09:11.253898
                 .Iter 13 - Gra 4
                 . 2021-05-17 19:10:35.383791
                 .Iter 13 - Gra 5
                 . 2021-05-17 19:12:43.616885
                 .Iter 13 - Gra 6
                 . 2021-05-17 19:14:56.726320
                 .Iter 13 - Gra 7
                 . 2021-05-17 19:16:57.425441
                 .Iter 13 - Gra 8
                 . 2021-05-17 19:18:54.598831
                 .Iter 13 - Gra 9
                 . 2021-05-17 19:20:48.015845
                 .Iter 13 - Gra 10
                 . 2021-05-17 19:22:39.362325

     => Iteration Number 14
         .Dividing training and testing sub-sets
         .Selecting Signal on the following porpotion:
             .99% Background samples
             .1% Signal samples
             .     7000 of Background samples (Offline)
             .     29

                 .Iter 19 - Gra 4
                 . 2021-05-17 20:52:57.398243
                 .Iter 19 - Gra 5
                 . 2021-05-17 20:54:59.501519
                 .Iter 19 - Gra 6
                 . 2021-05-17 20:57:17.391366
                 .Iter 19 - Gra 7
                 . 2021-05-17 20:59:25.527333
                 .Iter 19 - Gra 8
                 . 2021-05-17 21:01:33.294913
                 .Iter 19 - Gra 9
                 . 2021-05-17 21:03:44.913534
                 .Iter 19 - Gra 10
                 . 2021-05-17 21:06:06.107052

     => Iteration Number 20
         .Dividing training and testing sub-sets
         .Selecting Signal on the following porpotion:
             .99% Background samples
             .1% Signal samples
             .     7000 of Background samples (Offline)
             .     2970 of Background samples (Online)
             .       30 of Signal samples (Online)
             .Offline shape: (7000, 21)
             .Online shape: (3000, 

                 .Iter 25 - Gra 6
                 . 2021-05-17 23:17:03.893913
                 .Iter 25 - Gra 7
                 . 2021-05-17 23:19:25.099512
                 .Iter 25 - Gra 8
                 . 2021-05-17 23:21:20.927289
                 .Iter 25 - Gra 9
                 . 2021-05-17 23:23:21.289056
                 .Iter 25 - Gra 10
                 . 2021-05-17 23:25:25.812145

     => Iteration Number 26
         .Dividing training and testing sub-sets
         .Selecting Signal on the following porpotion:
             .99% Background samples
             .1% Signal samples
             .     7000 of Background samples (Offline)
             .     2970 of Background samples (Online)
             .       30 of Signal samples (Online)
             .Offline shape: (7000, 21)
             .Online shape: (3000, 21)

         .Normalizing Data
             .Executing for granularities [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                 .Iter 26 - Gra 1
                 . 2

                 .Iter 31 - Gra 8
                 . 2021-05-18 00:54:37.886241
                 .Iter 31 - Gra 9
                 . 2021-05-18 00:56:31.326021
                 .Iter 31 - Gra 10
                 . 2021-05-18 00:58:24.411415

     => Iteration Number 32
         .Dividing training and testing sub-sets
         .Selecting Signal on the following porpotion:
             .99% Background samples
             .1% Signal samples
             .     7000 of Background samples (Offline)
             .     2970 of Background samples (Online)
             .       30 of Signal samples (Online)
             .Offline shape: (7000, 21)
             .Online shape: (3000, 21)

         .Normalizing Data
             .Executing for granularities [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                 .Iter 32 - Gra 1
                 . 2021-05-18 01:00:16.575402
                 .Iter 32 - Gra 2
                 . 2021-05-18 01:00:51.885495
                 .Iter 32 - Gra 3
                 . 2