In [1]:
import tensorflow.keras as keras
import tensorflow as tf
import numpy as np
import time

import matplotlib
from utils.utils import save_test_duration
import matplotlib.pyplot as plt

from utils.utils import save_logs
from utils.utils import calculate_metrics
class RESNET:

    def __init__(self, output_directory, input_shape, nb_classes, verbose=False, build=True, load_weights=False):
        self.output_directory = output_directory
        if build == True:
            self.model = self.build_model(input_shape, nb_classes)
            if (verbose == True):
                self.model.summary()
            self.verbose = verbose
            if load_weights == True:
                self.model.load_weights(self.output_directory
                                        .replace('resnet_augment', 'resnet')
                                        .replace('TSC_itr_augment_x_10', 'TSC_itr_10')
                                        + '/model_init.hdf5')
            else:
                self.model.save_weights(self.output_directory + 'model_init.hdf5')
        return

    def build_model(self, input_shape, nb_classes):
        n_feature_maps = 64

        input_layer = keras.layers.Input(shape = (5,1,))

        # BLOCK 1

        conv_x = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=8, padding='same')(input_layer)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        conv_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=5, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_z = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # expand channels for the sum
        shortcut_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=1, padding='same')(input_layer)
        shortcut_y = keras.layers.BatchNormalization()(shortcut_y)

        output_block_1 = keras.layers.add([shortcut_y, conv_z])
        output_block_1 = keras.layers.Activation('relu')(output_block_1)

        # BLOCK 2

        conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_1)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        conv_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=5, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # expand channels for the sum
        shortcut_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=1, padding='same')(output_block_1)
        shortcut_y = keras.layers.BatchNormalization()(shortcut_y)

        output_block_2 = keras.layers.add([shortcut_y, conv_z])
        output_block_2 = keras.layers.Activation('relu')(output_block_2)

        # BLOCK 3

        conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_2)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        conv_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=5, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # no need to expand channels because they are equal
        shortcut_y = keras.layers.BatchNormalization()(output_block_2)

        output_block_3 = keras.layers.add([shortcut_y, conv_z])
        output_block_3 = keras.layers.Activation('relu')(output_block_3)

        # FINAL

        gap_layer = keras.layers.GlobalAveragePooling1D()(output_block_3)

        output_layer = keras.layers.Dense(nb_classes, activation='softmax')(gap_layer)

        model = keras.models.Model(inputs=input_layer, outputs=output_layer)

        model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),
                      metrics=['accuracy'])

        reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, min_lr=0.0001)

        file_path = self.output_directory + 'best_model.hdf5'

        model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='loss',
                                                           save_best_only=True)

        self.callbacks = [reduce_lr, model_checkpoint]

        return model

    def fit(self, x_train, y_train, x_val, y_val, y_true):
        if not tf.test.is_gpu_available:
            print('error')
            exit()
        # x_val and y_val are only used to monitor the test loss and NOT for training
        batch_size = 64
        nb_epochs = 1500

        mini_batch_size = int(min(x_train.shape[0] / 10, batch_size))

        start_time = time.time()

        hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs,
                              verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks)

        duration = time.time() - start_time

        self.model.save(self.output_directory + 'last_model.hdf5')

        y_pred = self.predict(x_val, y_true, x_train, y_train, y_val,
                              return_df_metrics=False)

        # save predictions
        np.save(self.output_directory + 'y_pred.npy', y_pred)

        # convert the predicted from binary to integer
        y_pred = np.argmax(y_pred, axis=1)

        df_metrics = save_logs(self.output_directory, hist, y_pred, y_true, duration)

        keras.backend.clear_session()

        return df_metrics

    def predict(self, x_test, y_true, x_train, y_train, y_test, return_df_metrics=True):
        start_time = time.time()
        model_path = self.output_directory + 'best_model.hdf5'
        model = keras.models.load_model(model_path)
        y_pred = model.predict(x_test)
        if return_df_metrics:
            y_pred = np.argmax(y_pred, axis=1)
            df_metrics = calculate_metrics(y_true, y_pred, 0.0)
            return df_metrics
        else:
            test_duration = time.time() - start_time
            save_test_duration(self.output_directory + 'test_duration.csv', test_duration)
            return y_pred


In [2]:
import pandas as pd
import numpy as np
dataset = pd.read_csv (r'C:/Users/Boaz/Documents/new_data_label.csv')
del dataset['Unnamed: 0']
del dataset['event_date']
# dataset.loc[dataset['labels'] != 0, 'labels'] = 1
# dataset.loc[dataset['labels'] == 0, 'labels'] = 0
print(dataset)


data = pd.read_csv(r'C:/Users/Boaz/Documents/new_data_label.csv')
del data['Unnamed: 0']
del data['event_date']
del data['labels']
print(data)

       response_time  response_size  session_duration  response_errorcode_num  \
0             718444      750197670                 2                       0   
1            1051741      794574705                 1                       0   
2             666003      445975743                 0                       0   
3             939968      342481634                 0                       0   
4            1059742      316304423                 0                       0   
...              ...            ...               ...                     ...   
61339        1654815      986044166                 2                       0   
61340        1080735      942590486                 0                       0   
61341        1266569      887658828                 4                       0   
61342         555966      850409314                 0                       0   
61343        1603774     1005933723                 2                       0   

       response_failure_num

In [3]:
import pandas as pd
import numpy as np
datasets = pd.read_csv (r'C:/Users/Boaz/Documents/new_data_label.csv')
del datasets['Unnamed: 0']
del datasets['event_date']
print(datasets)


datas = pd.read_csv(r'C:/Users/Boaz/Documents/new_data_label.csv')
del datas['Unnamed: 0']
del datas['event_date']
del datas['labels']
print(datas)

       response_time  response_size  session_duration  response_errorcode_num  \
0             718444      750197670                 2                       0   
1            1051741      794574705                 1                       0   
2             666003      445975743                 0                       0   
3             939968      342481634                 0                       0   
4            1059742      316304423                 0                       0   
...              ...            ...               ...                     ...   
61339        1654815      986044166                 2                       0   
61340        1080735      942590486                 0                       0   
61341        1266569      887658828                 4                       0   
61342         555966      850409314                 0                       0   
61343        1603774     1005933723                 2                       0   

       response_failure_num

In [4]:
Y= datasets['labels']
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

dummy_y

array([[1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       ...,
       [1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.]], dtype=float32)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(datas, dummy_y, test_size=0.3,random_state=109,stratify=dataset['labels']) # 70% training and 30% test

In [6]:
import tensorflow.keras as keras
import tensorflow as tf
import numpy as np
import time 
t = time.time()
model = RESNET(r'C:/Users/Boaz/Documents/', 5, 6)
model = model.build_model(5, 6)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 5, 1)]       0                                            
__________________________________________________________________________________________________
conv1d_11 (Conv1D)              (None, 5, 64)        576         input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_12 (BatchNo (None, 5, 64)        256         conv1d_11[0][0]                  
__________________________________________________________________________________________________
activation_9 (Activation)       (None, 5, 64)        0           batch_normalization_12[0][0]     
____________________________________________________________________________________________

In [7]:

model.fit(X_train, y_train, batch_size=16, epochs=30,
            verbose=1)
elapsed = time.time() - t

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [8]:
y_pred = model.predict(X_test)
y_pred_new=  np.argmax(y_pred,axis=1)
y_pred_new

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [9]:
count = 0
for x in y_pred_new:
    if x != 0:
        #print(x)
        count += 1
print(count)

325


In [10]:
y_test
a = y_test
y_test_new = [np.where(r==1)[0][0] for r in a]
for i in range(len(y_test_new)):
    if y_test_new[i] == 2:
        y_test_new[i] = 3
        
    elif y_test_new[i] == 3:
        y_test_new[i] = 5
        
    elif y_test_new[i] == 4:
        y_test_new[i] = 6
        
    elif y_test_new[i] == 5:
        y_test_new[i] = 7

#y_test_new

In [11]:
for i in range(len(y_pred_new)):
    if y_pred_new[i] == 0:
        y_pred_new[i] = 0
    elif y_pred_new[i] == 1:
        y_pred_new[i] = 1
    elif y_pred_new[i] == 2:
        y_pred_new[i] = 3
        
    elif y_pred_new[i] == 3:
        y_pred_new[i] = 5
        
    elif y_pred_new[i] == 4:
        y_pred_new[i] = 6
        
    elif y_pred_new[i] == 5:
        y_pred_new[i] = 7


In [12]:
count = 0
for x in y_pred_new:
    if x != 0:
        count += 1
print(count)

325


In [13]:
from sklearn import metrics
print("F1:",metrics.f1_score(y_test_new,y_pred_new,labels=[0,1,3,5,6,7],average=None))
print('\nDuration in seconds:')
print(elapsed)

F1: [0.87698277 0.66465257 0.         0.         0.         0.12987013]

Duration in seconds:
560.0729398727417


In [14]:
metrics.multilabel_confusion_matrix(y_pred_new, y_test_new, labels =[0,1,3,5,6,7])

array([[[  315,    10],
        [ 3953, 14126]],

       [[17962,   125],
        [   97,   220]],

       [[16696,  1708],
        [    0,     0]],

       [[18343,    61],
        [    0,     0]],

       [[16319,  2085],
        [    0,     0]],

       [[18332,    64],
        [    3,     5]]], dtype=int64)

In [15]:
from sklearn.preprocessing import label_binarize

y_test_roc = label_binarize(y_test_new, classes=[0,1,3,5,6,7])
y_result_roc = label_binarize(y_pred_new, classes=[0,1,3,5,6,7])
metrics.roc_auc_score(y_test_roc, y_result_roc, average=None, multi_class='ovr', labels=[0,1,3,5,6,7])

array([0.53654882, 0.81615494, 0.5       , 0.5       , 0.5       ,
       0.53615007])

In [16]:
from sklearn.metrics import roc_curve, auc
import seaborn as sns
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from matplotlib import pyplot as plt
matplotlib.use('agg')
def plot_multiclass_roc(y_result, y_test, n_classes, figsize=(17, 6)):
    
    # structures
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # calculate dummies once
    y_test_roc = label_binarize(y_test, classes=[0, 1, 3, 5, 6, 7])
    y_result_roc = label_binarize(y_result, classes=[0, 1, 3, 5, 6, 7])
    
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_roc[:, i], y_result_roc[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # roc for each class
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot([0, 1], [0, 1], 'k--')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic example')
    for i in range(n_classes):
        ax.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for label %i' % (roc_auc[i], i))
    ax.legend(loc="best")
    ax.grid(alpha=.4)
    sns.despine()
    plt.show()

plot_multiclass_roc(y_pred_new, y_test_new, n_classes=6, figsize=(16, 10))



In [17]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    import itertools
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [18]:
cnf_matrix = metrics.confusion_matrix(y_test_new, y_pred_new,labels=[0, 1, 3, 5, 6, 7])
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[0, 1, 3, 5, 6, 7],
                      title='Confusion matrix, without normalization')

Confusion matrix, without normalization
[[14126    10     0     0     0     0]
 [  123   220     0     0     0     2]
 [ 1708     0     0     0     0     0]
 [   22    38     0     0     0     1]
 [ 2083     2     0     0     0     0]
 [   17    47     0     0     0     5]]
