In [1]:
import warnings
warnings.filterwarnings( 'ignore' )
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, TimeSeriesSplit
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.impute import SimpleImputer

In [2]:
trainpath = '../../CleanPartitions/trainp4.csv'
testpath = '../../CleanPartitions/testp4.csv'

traindata = pd.read_csv(trainpath, encoding='ISO-8859-1')
testdata = pd.read_csv(testpath, encoding='ISO-8859-1')

if 'attack_date' in traindata.columns:
    traindata = traindata.drop(columns=['attack_date'])

if 'attack_date' in testdata.columns:
    testdata = testdata.drop(columns=['attack_date'])

    print(f'shape train data: ', traindata.shape)
    print(f'shape test data: ', testdata.shape)

shape train data:  (4585, 25)
shape test data:  (1964, 25)


In [3]:
def split_data(dftrain, dftest):
    Ytrain = dftrain['gname']
    Xtrain = dftrain.drop(columns=['gname'])
    Ytest = dftest['gname']
    Xtest = dftest.drop(columns=['gname'])
    return Xtrain, Ytrain, Xtest, Ytest

def find_best_mlp(Xtrain, Ytrain):
     params = {
     'hidden_layer_sizes': [(50,), (100,), (50, 50)],
     'activation': ['relu', 'tanh'],
     'solver': ['adam'],
     'alpha': [1e-5, 1e-4, 1e-3],
     'learning_rate_init': [0.001, 0.01, 0.1],
     'early_stopping': [True]
     }

     mlp = MLPClassifier(max_iter=200, random_state=42)

     tscv = TimeSeriesSplit(n_splits=5)

     grid_search = GridSearchCV(mlp, params, cv=tscv, scoring='accuracy', n_jobs=-1, verbose=1)

     grid_search =GridSearchCV(estimator=mlp, param_grid=params, cv = tscv)

     grid_search.fit(Xtrain, Ytrain)
     best_mlp = grid_search.best_estimator_
     #print(best_dt)
     return best_mlp

In [4]:
Xtrain, Ytrain, Xtest, Ytest = split_data(traindata, testdata)
best_mlp = find_best_mlp(Xtrain, Ytrain)
y_pred_mlp = best_mlp.predict(Xtest)
accuracy_mlp = accuracy_score(Ytest, y_pred_mlp)
print(f"Accuracy: {accuracy_mlp * 100:.2f}%")

Accuracy: 99.75%


In [5]:
print(best_mlp)

MLPClassifier(activation='tanh', alpha=0.001, early_stopping=True,
              hidden_layer_sizes=(50,), learning_rate_init=0.01,
              random_state=42)
