In [None]:
import datetime as dt
import math
import os
import talos as ta

import holoviews as hv
import keras_metrics as km
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from influxdb import DataFrameClient
from keras import Sequential
from keras import backend as K
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.layers import (LSTM, BatchNormalization, Dense, Dropout, Flatten,
                          Input, RepeatVector, TimeDistributed)
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.merge import Concatenate, concatenate
from keras.models import Model
from numpy.random import seed


from pylab import rcParams
from scipy import stats
from sklearn.externals import joblib
from sklearn.metrics import (auc, classification_report, confusion_matrix,
                             f1_score, precision_recall_curve,
                             precision_recall_fscore_support, recall_score,
                             roc_curve)
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.utils import class_weight
from tensorflow import set_random_seed


import ricercando as ric

database_ip = '46.101.250.119'
ric.set_connection_params(host=database_ip)
cli = DataFrameClient(database_ip, 8086, 'monroe', 'secure', 'monroe')
cli.switch_database('monroe')

seed(7)
set_random_seed(11)
rcParams['figure.figsize'] = 8, 6
LABELS = ["False","True"]
DATA_SPLIT_PCT = 0.2
DATA_SPLIT_PCT_VALID = 0.4

In [None]:
# Select nodes
train_nodes = [
    {
        "node_id": '601',
        "ICCID": '89390100001965067610',
        "start_time": '2018-01-01',
        "end_time": '2018-01-30'
    },
    {
        "node_id": '608',
        "ICCID": '8946071512360089522',
        "start_time": '2018-01-01',
        "end_time": '2018-01-30'
    },
    {
        "node_id": '609',
        "ICCID": '89460850007007786482',
        "start_time": '2018-01-01',
        "end_time": '2018-01-30'
    },
    {
        "node_id": '610',
        "ICCID": '8939104160000392272',
        "start_time": '2018-01-01',
        "end_time": '2018-01-30'
    },
    {
        "node_id": '612',
        "ICCID": '8939104160000392231',
        "start_time": '2018-01-01',
        "end_time": '2018-01-29'
    },
    {
        "node_id": '613',
        "ICCID": '89390100001965068626',
        "start_time": '2018-01-01',
        "end_time": '2018-01-29'
    }
    
]

In [None]:
window_sizes = [1,5,10,20,40]
lookback = 240
n_features = 1

X_train_scaled_windows= []
X_valid_scaled_windows= []
X_test_scaled_windows= []

for window_size in window_sizes:
    

    X_array = []
    X_train_array = []
    X_valid_array = []
    X_test_array = []

    y_array = []
    y_train_array = []
    y_valid_array = []
    y_test_array = []

    for node in train_nodes:
        node_id = node["node_id"]
        ICCID = node["ICCID"]
        start_time = node["start_time"]
        end_time = node["end_time"]

        datasets = cli.query("select * from class_1m where NodeId='{}' and time >= '{}' and time <= '{}' ".format(node_id,start_time,end_time))
        df = ric.getdf(tables="ping", nodeid=node_id,  start_time= start_time, end_time=end_time, freq="1m")
        df = df[df['Iccid'] == ICCID]

        # merge together class and df
        class_feature = datasets['class_1m'].copy()

        class_feature = class_feature.drop(columns=['NodeId'])
        class_feature.index = class_feature.index.tz_localize(None)
        class_feature['time'] = class_feature.index
        df['time'] = df.index
        df.index.name = None
        df = pd.merge(df, class_feature,  how='inner', left_on=['Iccid','time'], right_on = ['Iccid','time'])
        df.index = df['time']
        df = df.drop(columns=['time'])
        df.index.name = 'time'
        df_analise = df.copy()

        # delay it for lookback value and predict from last element

        df = df_analise.copy()
        df = df.dropna(subset=['RTT'])
        df.index = list(range(len(df.index)))
        df = df[["RTT","Class"]]
        df['Class'] = df['Class'].values * 1
    

        df = df.fillna(0)
        df['RTT'] = df['RTT'].rolling(window_size, min_periods=1).mean()


        first_RTT = df['RTT'][0]
        last_RTT = df['RTT'].values[-1]


        for i in range((int(lookback/2))-1,-1,-1):
            df['RTT_-{}'.format(i)] = df['RTT'].shift(periods=i).fillna(first_RTT)

        for i in range(1,(int(lookback/2)+1),1):
            df['RTT_{}'.format(i)] = df['RTT'].shift(periods=-i).fillna(last_RTT)

        columns_list = list(df.columns.values)
        features_names = list(filter(lambda x : "RTT_" in x, columns_list))


        X = df[features_names].values
        y = df["Class"].values 

        X_train, X_test, y_train, y_test = train_test_split(np.array(X), np.array(y), test_size=DATA_SPLIT_PCT, shuffle=False)
        X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=DATA_SPLIT_PCT_VALID, shuffle=False)

        X_train_array.append(X_train)
        X_valid_array.append(X_valid)
        X_test_array.append(X_test)

        y_train_array.append(y_train)
        y_valid_array.append(y_valid)
        y_test_array.append(y_test)


    X_train = np.concatenate(X_train_array)
    X_valid = np.concatenate(X_valid_array)
    X_test = np.concatenate(X_test_array)


    y_train = np.concatenate(y_train_array)
    y_valid = np.concatenate(y_valid_array)
    y_test = np.concatenate(y_test_array)

    sc = MinMaxScaler()
    X_train_scaled = sc.fit_transform(X_train)
    X_valid_scaled = sc.transform(X_valid)
    X_test_scaled = sc.transform(X_test)

    X_train_scaled = X_train_scaled.reshape(X_train.shape[0], lookback, n_features)
    X_valid_scaled = X_valid_scaled.reshape(X_valid.shape[0], lookback, n_features)
    X_test_scaled = X_test_scaled.reshape(X_test.shape[0], lookback, n_features)

    X_train_scaled_windows.append(X_train_scaled)
    X_valid_scaled_windows.append(X_valid_scaled)
    X_test_scaled_windows.append(X_test_scaled)
    
    class_weights = class_weight.compute_class_weight('balanced',np.unique(y_train),y_train)

In [None]:
# define custom scoring

# combining intervals
def customise_score(y_testt, y_predd, offset = 5, mark_as=1):
    
    
    y_t = np.copy(y_testt)
    y_p = np.copy(y_predd)

    
    # Fill-in gaps betwwen test
    for i in range(len(y_t)):
        if y_t[i] and any(y_t[i+1:i+offset+1]):
            for j in range(1,offset+1):
                if y_t[i+j]:
                    break
                else:
                    y_t[i+j] = mark_as
        
    # Fill-in gaps betwwen pred
    for i in range(len(y_p)):
        if y_p[i] and any(y_p[i+1:i+offset+1]):
            for j in range(1,offset+1):
                if y_p[i+j]:
                    break
                else:
                    y_p[i+j] = mark_as
                
    return y_t, y_p

# counting intervals

def customise_score_for_readable(y_testt, y_predd, offset = 8, offset_pred = 8, mark_as = 1, mark_as_inverse = 0):
    
    
    y_t = np.copy(y_testt)
    y_p = np.copy(y_predd)


    # Fill-in gaps between test marked True Classes
    for i in range(len(y_t)):
        if y_t[i] and any(y_t[i+1:i+offset+1]):
            for j in range(1,offset+1):
                if y_t[i+j]:
                    break
                else:
                    y_t[i+j] = mark_as
                    
                    
    # Fill-in gaps between pred marked True Classes
    for i in range(len(y_p)):
        if y_p[i] and any(y_p[i+1:i+offset_pred+1]):
            for j in range(1,offset_pred+1):
                if y_p[i+j]:
                    break
                else:
                    y_p[i+j] = mark_as
                
    return y_t, y_p
            

def customise_score_readable(*args, **kwargs):
   
    y_t, y_p = customise_score_for_readable(*args, **kwargs)
    
    if len(y_t) != len(y_p):
        raise Exception("Invalid length od y_p and y_t, should be same")
    
    i = 0
    
    new_y_t = []
    new_y_p = []
    
    
    num_TN = 1
    
    # find TP and Fn
    
    while i < len(y_t):
        if y_t[i]:
            j = 1
            while y_t[i+j]:
                j += 1
            
            if any(y_p[i:i+j]):
                new_y_t.append(1)
                new_y_p.append(1)
                num_TN += 1
                i = i + j
                continue
            else:
                new_y_t.append(1)
                new_y_p.append(0)
                i = i + j
                
        i += 1
    
    # find TN - they dont matter- but number same as number of anomaly zones
    
    for i in range(num_TN):
        new_y_t.append(0)
        new_y_p.append(0)
    
    
    # find FP
                
    while i < len(y_p):
        if y_p[i]:
            j = 1
            while y_p[i+j]:
                j += 1
            
            if not any(y_t[i:i+j]):
                new_y_t.append(0)
                new_y_p.append(1)
                i = i + j
                continue
            else:
                i = i + j
                continue
        i += 1
        
    
    return new_y_t, new_y_p

In [None]:
# define model

def build_keras_base(x_train, y_train, x_val, y_val, params):
    paramss = params
    lr = 0.0001

    input_branches = []
    output_branches = []

    for i in range(5):

        visible = Input(shape=(240,1))
        conv1 = Conv1D(filters=paramss['num_of_filters1'], kernel_size=paramss['kernel1'], activation='relu')(visible)
        pool1 = MaxPooling1D(pool_size=2)(conv1)
        

        input_branches.append(visible)
        output_branches.append(pool1)


    merge = concatenate(output_branches)
    
    conv2 = Conv1D(filters=paramss['num_of_filters2'], kernel_size=paramss['kernel2'], activation='relu')(merge)
    pool2 = MaxPooling1D(pool_size=paramss['pool1'])(conv2)
    flat = Flatten()(pool2)
    
    hidden1 = Dense(paramss['num_of_dense1'], activation='relu')(flat)
    
    if paramss['is_dropout']:
        dropout1 = Dropout(0.4)(hidden1)
    else:
        dropout1 = BatchNormalization()(hidden1)
        
    model = None
    if paramss['second_dense']:
        
        hidden2 = Dense(paramss['second_dense'], activation='relu')(dropout1)
        
        if paramss['is_dropout']:
            dropout2 = Dropout(0.4)(hidden2)
        else:
            dropout2 = BatchNormalization()(hidden2)
        
        output = Dense(1, activation='sigmoid')(dropout2)
        model = Model(inputs=input_branches, outputs=output)
        
        adam = optimizers.Adam(lr)
        model.compile(optimizer=adam, loss='binary_crossentropy',metrics=['accuracy',km.precision(), km.recall()])
        
    else:
        output = Dense(1, activation='sigmoid')(dropout1)
        model = Model(inputs=input_branches, outputs=output)
        adam = optimizers.Adam(lr)
        model.compile(optimizer=adam, loss='binary_crossentropy',metrics=['accuracy',km.precision(), km.recall()])
        
        
    es = EarlyStopping(monitor='val_loss', patience=4, verbose=0)
    history = model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=0, validation_data=(x_val,y_val), class_weight=paramss['class_weights'], callbacks= [es])

    return history, model

In [None]:
# define hyperparameters

p = {'num_of_dense1': [128, 256, 512],
     'num_of_filters1':[8,16,32],
     'num_of_filters2':[64,128,256],
     'kernel1': [3,5,9],
     'kernel2': [3,5,9],
     'second_dense': [32,64, 128],
     'is_dropout':[True],
     'class_weights': [class_weights],
     'pool1': [2,4],
}

In [None]:
# scan random parameters

t = ta.Scan(x=X_train_scaled_windows,
            y=y_train,
            x_val=X_valid_scaled_windows,
            y_val=y_valid,
            model=build_keras_base, 
            params=p,
            experiment_name='hyperparam1',
           fraction_limit=0.04)

In [None]:
# analyze results
analyze_object = ta.Analyze(t)

In [None]:
# access the dataframe with the results

# analyze_object.data['val_f1'] = (2* prec * recall) / (prec + recall)
analyze_object.data.sort_values(by=['val_f1'],ascending=False)

In [None]:

# get the highest result for any metric
analyze_object.high('val_precision')

# get the round with the best result
analyze_object.rounds2high('val_acc')

# get the best paramaters
analyze_object.best_params('val_acc', ['acc', 'loss', 'val_loss'])

# get correlation for hyperparameters against a metric
analyze_object.correlate('val_loss', ['acc', 'loss', 'val_loss'])