In [None]:
import time
import datetime
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, GlobalAveragePooling1D, BatchNormalization, SimpleRNN
from tensorflow.keras.metrics import Accuracy, Precision, Recall
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.optimizers import SGD

In [None]:
tmp_dt = [
    { 
        'name': 'Mirai_dataset', 
        'df': pd.read_csv('Mirai_dataset.csv'),
        'label': pd.read_csv('mirai_labels.csv')
    },
]
for dt in tmp_dt: print(dt['name'])

In [None]:
datasets = []
result = {
    'dataset': [],
    'model': [],
    'accuracy': [],
    'precision': [],
    'recall': [],
    'f-score': [],
    'train_time': [],
    'test_time': []
}

In [None]:
for dt in tmp_dt:
    X = np.array(dt['df'])
    y = np.array(dt['label']) 
    
    datasets.append({'name': dt['name'], 'X': X, 'y': y})

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
def rnn_model(x_train):
    model = Sequential()
    model.add(SimpleRNN(256, dropout=0.2, input_shape=(1, x_train.shape[1])))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(
        loss='binary_crossentropy', 
        optimizer='adam',
        metrics=['accuracy', f1_m, precision_m, recall_m]
    )
    return {'mdl': model, 'name': 'RNN'}

In [None]:
def lstm_model(x_train):
    model = Sequential([
        LSTM(x_train.shape[1], input_shape=(1, x_train.shape[1]), activation='relu', return_sequences=True),
        Dropout(0.2),
        LSTM(128, activation='relu', return_sequences=True),
        Dropout(0.2),
        LSTM(100, activation='relu', return_sequences=True),
        Dropout(0.2),
        LSTM(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation= 'softmax')
    ])
    model.compile(
        loss='binary_crossentropy', 
        optimizer='adam',
        metrics=['accuracy', f1_m, precision_m, recall_m]
    )
    return {'mdl': model, 'name': 'LSTM'}

In [None]:
def cnn_model(x_train):
    model = Sequential([
        Conv1D(input_shape=(1, x_train.shape[1]), filters=64, kernel_size=3, activation='relu', padding='same'),
        Conv1D(filters=32, kernel_size=3, activation='relu', padding='same'),
        Conv1D(filters=16, kernel_size=2, activation='relu', padding='same'),
        GlobalAveragePooling1D(),
        Dense(52, activation='relu'),
        Dense(26, activation='relu'),
        Dense(1, activation= 'softmax')
    ])
    model.compile(
        loss='binary_crossentropy', 
        optimizer='adam',
        metrics=['accuracy', f1_m, precision_m, recall_m]
    )
    return {'mdl': model, 'name': 'CNN'}

In [None]:
def intermediate_net_model(x_train):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(1, x_train.shape[1])))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='softmax'))
    
    model.compile(
        loss='binary_crossentropy', 
        optimizer=SGD(learning_rate=0.1), 
        metrics=['accuracy', f1_m, precision_m, recall_m]
    )
    return {'mdl': model, 'name': 'Intermediate Net'}

In [None]:
def deep_net_model(x_train):
    model = Sequential()

    model.add(Dense(64, activation='relu', input_shape=(1, x_train.shape[1])))
    model.add(BatchNormalization())

    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())

    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(1, activation='softmax'))
    model.compile(
        loss='binary_crossentropy', 
        optimizer='adam', 
        metrics=['accuracy', f1_m, precision_m, recall_m]
    )
    return {'mdl': model, 'name': 'Deep Net'}

In [None]:
for dt in datasets:
    dataset_start_time = time.time()
    dt_name = dt['name']
    X_train, X_test, y_train, y_test = train_test_split(dt['X'], dt['y'], test_size = 0.2)
    cv = KFold(n_splits=4, random_state=1, shuffle=True)
    sc = MinMaxScaler(feature_range = (0, X_train.shape[1]))
    
    # Deep leaning models
    dl_models = [
        intermediate_net_model(X_train),
        deep_net_model(X_train),
        rnn_model(X_train),
        lstm_model(X_train),
        cnn_model(X_train),
    ]

    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

    for model in dl_models:
        model_name = model['name']
        print(f'Starting analising dataset [{dt_name}] with model {model_name}')
        
        # Train
        print(f'Training dataset [{dt_name}] with model {model_name}')
        train_start_time = time.time()
        estimator = model['mdl'].fit(X_train, y_train, batch_size=64, epochs=32, verbose=0)
        train_elapsed_time = time.time() - train_start_time
        
        #Test
        print(f'Testing dataset [{dt_name}] with model {model_name}')
        test_start_time = time.time()
        results = model['mdl'].evaluate(X_test, y_test, verbose=0)
        test_elapsed_time = time.time() - train_start_time
        
        #Build results
        accuracy = results[1]
        f_score = results[2]
        precision = results[3]
        recall = results[4]
        
        result['dataset'].append(dt_name)
        result['model'].append(model_name)
        result['accuracy'].append(accuracy)
        result['precision'].append(precision)
        result['recall'].append(recall)
        result['f-score'].append(f_score)
        result['train_time'].append(str(datetime.timedelta(seconds=train_elapsed_time)))
        result['test_time'].append(str(datetime.timedelta(seconds=test_elapsed_time)))
        
    dataset_elapsed_time = time.time() - dataset_start_time
    print(f'dataset [{dt_name}] took {str(dataset_elapsed_time)} seconds to complete\n')

In [None]:
print('Results are:\n')
pd.DataFrame(data=result)

In [None]:
df = pd.DataFrame(result)
df.to_csv (r'results.csv', index = False, header=True)