In [55]:
import numpy as np
import sys
import sklearn
import pandas as pd
import matplotlib
import random
import os
import operator
import utils 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.python.client import device_lib
from tensorflow import keras
import pickle

In [56]:
import tensorflow as tf
print(tf.__version__)

2.7.0


In [57]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.test.gpu_device_name())

Num GPUs Available:  1
/device:GPU:0


2022-03-12 19:26:04.756611: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-12 19:26:04.756844: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-12 19:26:04.757015: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-12 19:26:04.757227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-12 19:26:04.757402: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from S

In [3]:
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 4007841445872777603
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 23925030912
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7425289367400722127
physical_device_desc: "device: 0, name: Quadro RTX 6000, pci bus id: 0000:02:00.0, compute capability: 7.5"
xla_global_id: 416903419
]


2022-03-09 18:51:50.692843: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-09 18:51:50.693143: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-09 18:51:50.693453: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-09 18:51:50.693812: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-09 18:51:50.694154: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from S


# MAIN

In [16]:
def transform_labels(y_train, y_test):
    """
    Transform label to min equal zero and continuous
    For example if we have [1,3,4] --->  [0,1,2]
    """
    # no validation split
    # init the encoder
    encoder = LabelEncoder()
    # concat train and test to fit
    y_train_test = np.concatenate((y_train, y_test), axis=0)
    # fit the encoder
    encoder.fit(y_train_test)
    # transform to min zero and continuous labels
    new_y_train_test = encoder.transform(y_train_test)
    # resplit the train and test
    new_y_train = new_y_train_test[0:len(y_train)]
    new_y_test = new_y_train_test[len(y_train):]
    return new_y_train, new_y_test

def check_if_file_exits(file_name):
    return os.path.exists(file_name)

In [17]:
def readucr(filename, delimiter=','):
    data = np.loadtxt(filename, delimiter=delimiter)
    Y = data[:, 0]
    X = data[:, 1:]
    return X, Y

def read_dataset(path, dataset_name):
    x_train, y_train = readucr(path + '/' + dataset_name + '_TRAIN')
    x_test, y_test = readucr(path + '/' + dataset_name + '_TEST')
    dataset = (x_train.copy(), y_train.copy(), x_test.copy(),
                                   y_test.copy())
    return dataset

def read_all_datasets(root_dir, to_read):
    datasets_dict = {}
    for dataset_name in to_read:
        root_dir_dataset = root_dir + '/UCR_TS_Archive_2015/' + dataset_name + '/'
        file_name = root_dir_dataset + dataset_name
        x_train, y_train = readucr(file_name + '_TRAIN')
        x_test, y_test = readucr(file_name + '_TEST')
        datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
                                          y_test.copy())
    return datasets_dict

def prepare_data(dataset):
    x_train = dataset[0]
    y_train = dataset[1]
    x_test = dataset[2]
    y_test = dataset[3]

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    # make the min to zero of labels
    y_train, y_test = transform_labels(y_train, y_test)

    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64)
    y_true_train = y_train.astype(np.int64)
    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder()
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()
    if len(x_train.shape) <= 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))
    return x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc
  
    
    
def create_classifier(classifier_name, input_shape, nb_classes, output_directory,
                      verbose=False, build=True):
    if classifier_name == 'nne':
        from classifiers import nne
        return nne.Classifier_NNE(output_directory, input_shape,
                                  nb_classes, verbose)
    if classifier_name == 'inception':
        from classifiers import inception
        return inception.Classifier_INCEPTION(output_directory, input_shape, nb_classes, verbose,
                                              build=build)
    
    
def fit_classifier():
    input_shape = x_train.shape[1:]
    classifier = create_classifier(classifier_name, input_shape, nb_classes,
                                   output_directory)

    classifier.fit(x_train, y_train, x_test, y_test, y_true)

def create_directory(directory_path):
    if os.path.exists(directory_path):
        return None
    else:
        try:
            os.makedirs(directory_path)
        except:
            # in case another machine created the path meanwhile !:(
            return None
        return directory_path
    
    
def get_xp_val(xp):
        if xp == 'batch_size':
            xp_arr = [16, 32, 128]
        elif xp == 'use_bottleneck':
            xp_arr = [False]
        elif xp == 'use_residual':
            xp_arr = [False]
        elif xp == 'nb_filters':
            xp_arr = [16, 64]
        elif xp == 'depth':
            xp_arr = [3, 9]
        elif xp == 'kernel_size':
            xp_arr = [8, 64]
        else:
            raise Exception('wrong argument')
        return xp_arr
    
def generate_results_csv(output_file_name, root_dir, clfs, to_read):
    res = pd.DataFrame(data=np.zeros((0, 7), dtype=float), index=[],
                       columns=['classifier_name', 'dataset_name', 'iteration',
                                'precision', 'accuracy', 'recall', 'duration'])
    for classifier_name in clfs:
        durr = 0.0
        for dataset_name in to_read:
            output_dir = root_dir + '/results/' + classifier_name + '/' \
                         + dataset_name + '/' + 'df_metrics.csv'
            
            if not os.path.exists(output_dir):
                continue
            df_metrics = pd.read_csv(output_dir)
            df_metrics['classifier_name'] = classifier_name
            df_metrics['dataset_name'] = dataset_name
            df_metrics['iteration'] = 0
            res = pd.concat((res, df_metrics), axis=0, sort=False)
            durr += df_metrics['duration'][0]

    res.to_csv(root_dir + '/' + output_file_name, index=False)
    res = res.loc[res['classifier_name'].isin(clfs)]

    return res



In [18]:
def read_dataset_rischio(path, strrischio):
    with open(path + '/train_set_' + strrischio + '_voronoi_strat.p', 'rb') as f:
        x_train = pickle.load(f)[:100]
    with open(path + '/train_label_' + strrischio + '_voronoi_strat.p', 'rb') as f:
        y_train = pickle.load(f)[:100]
    with open(path + '/test_set_' + strrischio + '_voronoi_strat.p', 'rb') as f:
        x_test = pickle.load(f)[:100]
    with open(path + '/test_label_' + strrischio + '_voronoi_strat.p', 'rb') as f:
        y_test = pickle.load(f)[:100]
    dataset = (x_train.copy(), y_train.copy(), x_test.copy(),y_test.copy())
    return dataset

In [19]:
def read_dataset_tesi(path, strrischio, str_norm, str_overunder):
    if str_norm != '':
        str_norm = '_' + str_norm
    if str_overunder != '':
        if strrischio == '2' or strrischio == '5':
            str_overunder = '_' + str_overunder +'_40_60'  
        else:
            str_overunder = '_' + str_overunder +'_50_50'
    with open(path + '/training' + strrischio + str_norm + str_overunder +'.p', 'rb') as f:
        x_train = pickle.load(f)
    with open(path + '/label_training' + strrischio + str_norm + str_overunder +'.p', 'rb') as f:
        y_train = pickle.load(f)
    with open(path + '/test' + strrischio + str_norm + '.p', 'rb') as f:
        x_test = pickle.load(f)
    with open(path + '/label_test' + strrischio + '.p', 'rb') as f:
        y_test = pickle.load(f)
    dataset = (x_train.copy(), y_train.copy(), x_test.copy(),y_test.copy())
    return dataset

In [20]:
def read_dataset_modified(dataset_folder, rischio, scaling, overunder, perc_min = 40, perc_magg = 60):
    if overunder != '' and scaling != '':
        add_str_ytrain = add_str_xtrain  = '_' + scaling +'_' + overunder + '_' + str(perc_min) + '_' + str(perc_magg)
        add_str_xtest  = '_' + scaling
        add_str_ytest = ''
    elif overunder != '' and scaling == '' :
        add_str_ytrain = add_str_xtrain = '_' + overunder + '_' + str(perc_min) + '_' + str(perc_magg)
        add_str_ytest = add_str_xtest = ''
    elif overunder == '' and scaling != '' :
        add_str_ytrain = add_str_ytest = ''
        add_str_xtrain = add_str_xtest = '_' + scaling
    elif overunder == '' and scaling == '' :
        add_str_ytrain = add_str_ytest = add_str_xtrain = add_str_xtest = ''
    else:
        print('PROBLEMA', overunder, scaling )
        return
    with open(dataset_folder + '/training' + rischio + add_str_xtrain + '.p', 'rb') as f:
        x_train = pickle.load(f)
    with open(dataset_folder + '/label_training' + rischio + add_str_ytrain + '.p', 'rb') as f:
        y_train = pickle.load(f)
    with open(dataset_folder + '/test' + rischio + add_str_xtest + '.p', 'rb') as f:
        x_test = pickle.load(f)
    with open(dataset_folder + '/label_test' + rischio + add_str_ytest + '.p', 'rb') as f:
        y_test = pickle.load(f)
    dataset = (x_train.copy(), y_train.copy(), x_test.copy(),y_test.copy())
    return dataset

In [21]:
def create_reports(dataset_name, y_test, y_train):

    itr_list = ['', '_itr_1', '_itr_2', '_itr_3', '_itr_4']
    for itr in itr_list:

        reconstructed_model = keras.models.load_model('InceptionTime/results/inception/'+ itr + '/' + dataset_name +'/best_model.hdf5')
        y_pred_train = reconstructed_model.predict(x_train)

        with open('InceptionTime/results/inception/' + itr + '/' + dataset_name + '/y_pred_train.npy', 'wb') as f:
            np.save(f, y_pred_train)

        y_pred_train = np.argmax(y_pred_train, axis=1)

        with open('InceptionTime/results/inception/' + itr + '/' + dataset_name + '/y_pred.npy', 'rb') as f:
            y_pred = np.load(f)

        y_pred = np.argmax(y_pred, axis=1)
       
        with open('InceptionTime/results/inception/' + itr + '/' + dataset_name + '/class_reports', 'w') as f:
            f.write('TRAINING ' + dataset_name + '\n' +
                    str(classification_report(y_train, y_pred_train)) + '\n')
            f.write('TEST ' + dataset_name + '\n' + str(classification_report(y_test, y_pred)) + '\n')
    return
    
    
def nne_reports(dataset_name, y_test, y_train):    
    with open('InceptionTime/results/nne/inception-0-1-2-3-4-/' + dataset_name + '/y_pred_train.npy', 'rb') as f:
            y_pred_train = np.load(f)

    y_pred_train = np.argmax(y_pred_train, axis=1)

    with open('InceptionTime/results/nne/inception-0-1-2-3-4-/' + dataset_name + '/y_pred.npy', 'rb') as f:
        y_pred = np.load(f)

    y_pred = np.argmax(y_pred, axis=1)


    with open('/InceptionTime/results/nne/inception-0-1-2-3-4-/' + dataset_name + '/class_reports', 'w') as f:
        f.write('TRAINING ' + dataset_name + '\n' +
                str(classification_report(y_train, y_pred_train)) + '\n')
        f.write('TEST ' + dataset_name + '\n' + str(classification_report(y_test, y_pred)) + '\n')
    return

In [22]:
def dataset_name_f(strrischio, overunder, normst):
    str_overunder_dataset = ''
    str_norm_dataset = ''
    if normst != '':
        str_norm_dataset = '_' + normst
    if overunder != '':
        if strrischio == '2' or strrischio == '5':
            str_overunder_dataset = '_' + overunder +'_40_60'  
        else:
            str_overunder_dataset = '_' + overunder +'_50_50'
    dataset_name = 'rischio' + strrischio + str_norm_dataset + str_overunder_dataset
    return dataset_name

In [28]:
risk_dataset = 'rischio2'
strrischio = '2'
normst = 'norm'
overunder = 'randover'
dataset_folder = 'datasets_tesi/rischio' + strrischio
dataset = read_dataset_modified(dataset_folder, strrischio, normst, overunder, perc_min = '40', perc_magg = '60')
x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data(dataset)
itr_list = ['', '_itr_1', '_itr_2', '_itr_3', '_itr_4']
itr = itr_list[0]
reconstructed_model = keras.models.load_model('InceptionTime/results/inception/'+ itr + '/' + dataset_name +'/best_model.hdf5')
y_pred_train = reconstructed_model.predict(x_train)

In [29]:
y_pred_train

array([[1.0000000e+00, 4.3167083e-09],
       [9.9490273e-01, 5.0972179e-03],
       [9.9999988e-01, 8.5445194e-08],
       ...,
       [4.8049332e-11, 1.0000000e+00],
       [9.8188599e-16, 1.0000000e+00],
       [1.2940050e-11, 1.0000000e+00]], dtype=float32)

In [None]:
root_dir = '/InceptionTime/'
xps = ['use_bottleneck', 'use_residual', 'nb_filters', 'depth', 'kernel_size', 'batch_size']
sys.argv = ['self.py','InceptionTime','']

risk_dataset = 'rischio2'
strrischio = '2'
normst = 'norm'
overunder = 'randunder'

dataset_name = dataset_name_f(strrischio, overunder, normst)


dataset_folder = '/datasets_tesi/rischio' + strrischio

with tf.device("/device:GPU:0"):
    print("tf.keras code in this scope will run on GPU")
    if sys.argv[1] == 'InceptionTime':
        classifier_name = 'inception'
        nb_iter_ = 5
        dataset = read_dataset_modified(dataset_folder, strrischio, normst, overunder, perc_min = '40', perc_magg = '60')
        for iter in range(nb_iter_):
            print('\t\titer', iter)

            trr = ''
            if iter != 0:
                trr = '_itr_' + str(iter)

            tmp_output_directory = root_dir + '/results/' + classifier_name + '/' + trr + '/'

            x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data(dataset)

            output_directory = tmp_output_directory + dataset_name + '/'

            temp_output_directory = create_directory(output_directory)

            if temp_output_directory is None:
                print('Already_done', tmp_output_directory, dataset_name)
                continue

            fit_classifier()

            print('\t\t\t\tDONE')

            create_directory(output_directory + '/DONE')
        
        create_reports(dataset_name, np.argmax(y_test, axis=1),np.argmax(y_train, axis=1))
        
        # run the ensembling of these iterations of Inception
        classifier_name = 'nne'

        dataset = read_dataset_modified(dataset_folder, strrischio, normst, overunder, perc_min = '40', perc_magg = '60')
        
        tmp_output_directory = root_dir + '/results/' + classifier_name + '/' 

        x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data(dataset)

        output_directory = tmp_output_directory + dataset_name + '/'

        fit_classifier()

        print('\t\t\t\tDONE')
        
        
        nne_reports(dataset_name, np.argmax(y_test, axis=1),np.argmax(y_train, axis=1))

    elif sys.argv[1] == 'InceptionTime_xp':
        # this part is for running inception with the different hyperparameters
        # listed in the paper
        classifier_name = 'inception'
        max_iterations = 5

        dataset = read_dataset('/UCR_TS_Archive_2015/' + dataset_name, dataset_name)

        for xp in xps:

            xp_arr = get_xp_val(xp)

            print('xp', xp)

            for xp_val in xp_arr:
                print('\txp_val', xp_val)

                kwargs = {xp: xp_val}

                for iter in range(max_iterations):

                    trr = ''
                    if iter != 0:
                        trr = '_itr_' + str(iter)
                    print('\t\titer', iter)

                    output_directory = root_dir + 'results/' + classifier_name + '/' + xp + '/' + str(
                        xp_val) + '/' + trr + '/' + dataset_name + '/'


                    print('\t\t\tdataset_name', dataset_name)
                    x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data(dataset)

                    # check if data is too big for this gpu
                    size_data = x_train.shape[0] * x_train.shape[1]

                    temp_output_directory = create_directory(output_directory)

                    if temp_output_directory is None:
                        print('\t\t\t\t', 'Already_done')
                        continue

                    input_shape = x_train.shape[1:]
                    print(input_shape)
                    from classifiers import inception

                    classifier = inception.Classifier_INCEPTION(output_directory, input_shape, nb_classes,
                                                                    verbose=False, build=True, **kwargs)

                    classifier.fit(x_train, y_train, x_test, y_test, y_true)

                    # the creation of this directory means
                    create_directory(output_directory + '/DONE')

                    print('\t\t\t\t', 'DONE')

        # we now need to ensemble each iteration of inception (aka InceptionTime)
        classifier_name = 'nne'

        dataset = read_dataset('/UCR_TS_Archive_2015/' + dataset_name, dataset_name)

        tmp_output_directory = root_dir + '/results/' + classifier_name + '/'

        for xp in xps:
            xp_arr = get_xp_val(xp)
            for xp_val in xp_arr:

                clf_name = 'inception/' + xp + '/' + str(xp_val)

                x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data(dataset)

                output_directory = tmp_output_directory + dataset_name + '/'
                

                from classifiers import nne

                classifier = nne.Classifier_NNE(output_directory, x_train.shape[1:],
                                                    nb_classes, clf_name=clf_name)

                classifier.fit(x_train, y_train, x_test, y_test, y_true)

    elif sys.argv[1] == 'generate_results_csv':
        clfs = []
        itr = '-0-1-2-3-4-'
        inceptionTime = 'nne/inception'
        # add InceptionTime: an ensemble of 5 Inception networks§
        clfs.append(inceptionTime + itr)
        # add InceptionTime for each hyperparameter study
        for xp in xps:
            xp_arr = get_xp_val(xp)
            for xp_val in xp_arr:
                clfs.append(inceptionTime + '/' + xp + '/' + str(xp_val) + itr)
        df = generate_results_csv('results.csv', root_dir, clfs, to_read)
       


tf.keras code in this scope will run on GPU
		iter 0


2022-03-09 18:51:55.284369: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-09 18:51:55.284645: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-09 18:51:55.284848: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-09 18:51:55.285097: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-09 18:51:55.285301: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from S

input_tensor.shape (None, 1800, 1)
input_tensor.shape (None, 1800, 128)
input_tensor.shape (None, 1800, 128)
input_tensor.shape (None, 1800, 128)
input_tensor.shape (None, 1800, 128)
input_tensor.shape (None, 1800, 128)


2022-03-09 18:51:57.920044: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8201

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
  layer_config = serialize_layer_fn(layer)


In [15]:
print(tf.__version__)

2.7.0


In [18]:
risk_dataset = 'rischio2'
strrischio = '2'
normst = 'norm'
overunder = 'randunder'
str_overunder_dataset = ''
str_norm_dataset = ''
if normst != '':
    str_norm_dataset = '_' + normst
if overunder != '':
    if strrischio == '2' or strrischio == '5':
        str_overunder_dataset = '_' + overunder +'_40_60'  
    else:
        str_overunder_dataset = '_' + overunder +'_50_50'
dataset_name = 'rischio' + strrischio + str_norm_dataset + str_overunder_dataset

dataset = read_dataset_tesi('/datasets_tesi/rischio' + strrischio , strrischio, normst, overunder)
x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data(dataset)

y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)