# Using Multi-Layer Perceptron Classifier 

### Importing libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error,confusion_matrix, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

### Reading and cleaning data

In [4]:
data = pd.read_csv(r"C:\Users\Aarushi Wagh\Downloads\code1\codes\isolation_forest_test_data.csv")
data.head()

data['label'] = data['label'].replace(['Inlier', 'Outlier'],(0,1))
data.head()

X = data[["app_cpu_apps.plugin_x","app_cpu_tc-qos-helper_x","app_cpu_ssh_x","running","used","cached","buffers"]].values
y = data['label'].values

### Splitting, oversampling and undersampling data

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, stratify=y, random_state=1)
oversample = RandomOverSampler(sampling_strategy=0.5)
undersample = RandomUnderSampler(sampling_strategy=0.8)
X_over, y_over = oversample.fit_resample(X_train, y_train)
#X_under, y_under = undersample.fit_resample(X_train_svc, y_train_svc)
X_both, y_both = undersample.fit_resample(X_over, y_over)
print(Counter(y_train))
#print(Counter(y_under))
print(Counter(y_over))
print(Counter(y_both))

Counter({0: 3988, 1: 48})
Counter({0: 3988, 1: 1994})
Counter({0: 2492, 1: 1994})


### Performing GridSearch to select best parameters for the model

In [7]:
parameter_space = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(mlp_clf, parameter_space, n_jobs=-1, cv=3)
clf.fit(X_train, y_train)

In [8]:
print(clf.best_params_)

{'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 100, 50), 'learning_rate': 'constant', 'solver': 'adam'}


### Training the model

In [11]:
mlp_clf = MLPClassifier(max_iter=100, activation='relu', alpha=0.0001, hidden_layer_sizes=(50,100,50), learning_rate='constant', solver='adam')
mlp_clf.fit(X_both, y_both)
y_predicted = mlp_clf.predict(X_test)
print(mlp_clf.score(X_test,y_test))

0.996039603960396


### Performance metrics

In [12]:
report = classification_report(y_test, y_predicted)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       998
           1       0.90      0.75      0.82        12

    accuracy                           1.00      1010
   macro avg       0.95      0.87      0.91      1010
weighted avg       1.00      1.00      1.00      1010



In [13]:
print(confusion_matrix(y_test, y_predicted))
print(precision_score(y_test, y_predicted))
print(recall_score(y_test, y_predicted))
print(f1_score(y_test, y_predicted))

[[997   1]
 [  3   9]]
0.9
0.75
0.8181818181818182
