In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
import collections, numpy
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn import tree
from sklearn.svm import SVC
import matplotlib.pyplot as plt

In [2]:
def load_data(path,target):
    df = pd.read_csv(path)
    y = df[target]
    X = df.drop(labels = target, axis = 1)
    X = X.apply(pd.to_numeric)
    return df,X,y
    
def load_mutated_data(path,target):
    df = pd.read_csv(path)
    y = df[target]
    X = df.drop(labels = target, axis = 1)
    X = X.apply(pd.to_numeric)
    return df,X,y

In [3]:
#Get Dataset
def get_train(i):
    file = 'Data/NSL-KDD/modified/Train/train_' + str(i) + '.csv'
    train_df, train_X, train_y = load_data(file,'defects')
    y_train = []
    for instance in train_y.values:
        if instance == 'normal':
            y_train.append(1)
        else:
            y_train.append(-1)
    train_df.defects.unique()
    return train_X,y_train

In [4]:
def get_test(j):
    file = 'Data/NSL-KDD/modified/Test/test_' + str(j) + '.csv'
    test_df, test_X,test_y = load_mutated_data(file,'defects')
    y_test = []
    for instance in test_y.values:
        if instance == 'normal':
            y_test.append(1)
        else:
            y_test.append(-1)
    test_df.defects.unique()
    return test_X,y_test

In [23]:
# decision Tree
file = 'Data/NSL-KDD/modified/Train/mixed_train/train_0.csv'
train_df, train_X, train_y = load_data(file,'defects')
clf_tree = tree.DecisionTreeClassifier(criterion = 'entropy')
clf_tree.fit(train_X, train_y)

DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [24]:
file = 'Data/NSL-KDD/modified/Test/5_amonaly/train_set1.csv'
test_df, test_X,test_y = load_data(file,'defects')
predicted = clf_tree.predict(test_X)
print(metrics.classification_report(test_y, predicted))

              precision    recall  f1-score   support

   ftp_write       0.33      0.50      0.40         2
     ipsweep       1.00      1.00      1.00       428
      normal       0.00      0.00      0.00         0
        perl       0.00      0.00      0.00         1
         phf       1.00      0.50      0.67         2
       smurf       1.00      0.99      1.00       563
   snmpguess       1.00      0.99      1.00       174
         spy       0.00      0.00      0.00         1
        worm       1.00      1.00      1.00         1
      xsnoop       0.00      0.00      0.00         2

   micro avg       0.99      0.99      0.99      1174
   macro avg       0.53      0.50      0.51      1174
weighted avg       1.00      0.99      0.99      1174



  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [None]:
#### SVM Tree
clf_tree = SVC(kernel = 'sigmoid',class_weight = 'balanced')
clf_tree.fit(train_X, y_train)

In [None]:
predicted = clf_tree.predict(test_X)
print(metrics.classification_report(y_test, predicted))

In [None]:
precision = metrics.precision_score(cluster_tree[cluster_id].test_labels, 
                                                cluster_tree[cluster_id].predicted,average='weighted')
recall = metrics.recall_score(cluster_tree[cluster_id].test_labels, 
                                          cluster_tree[cluster_id].predicted,average='weighted')
f1_Score = metrics.f1_score(cluster_tree[cluster_id].test_labels, 
                                        cluster_tree[cluster_id].predicted,average='weighted')

In [None]:
original_train = 18143
original_test = 17878
perf_score = []
for i in range(1,39):
    train_X, train_y = get_train(i)
    clf_tree = tree.DecisionTreeClassifier(criterion = 'entropy')
    clf_tree.fit(train_X, train_y)
    train_anomaly = train_X.shape[0] - original_train
    for j in range(1,36):
        test_X, test_y = get_test(j)
        test_anomaly = test_X.shape[0] - original_test
        predicted = clf_tree.predict(test_X)
        precision = metrics.precision_score(test_y,predicted,average='weighted')
        recall = metrics.recall_score(test_y,predicted,average='weighted')
        f1_Score = metrics.f1_score(test_y,predicted,average='weighted')
        perf_score.append([i+1,j+1,train_anomaly,test_anomaly,precision,recall,f1_Score])

In [None]:
perf_score_df = pd.DataFrame(perf_score, columns = ['Trained_on_number_of_class',
                                                    'Tested_on_number _of_class',
                                                   'Anomaly_on_Trained_Data',
                                                   'Anomaly_on_Tested_Data',
                                                   'Precision',
                                                   'Recall',
                                                   'F1-Score'])

In [None]:
perf_score_df = perf_score_df.round({'Precision':2,'Recall':2,'F1-Score':2})
perf_score_df.to_csv('Data/NSL-KDD/modified/Results/root_node.csv')

In [None]:
# style
plt.style.use('seaborn-darkgrid')
 
# create a color palette
palette = plt.get_cmap('Set1')
num=0
for i in range(perf_score_df.Trained_on_number_of_class.nunique()):
    num+=1
    sub_df = perf_score_df[perf_score_df['Trained_on_number_of_class'] == i+1]
    score = sub_df['F1-Score'].values
    plt.plot(score,marker='', color=palette(num), linewidth=1, alpha=0.9, label='Trained On ' + str(i+1) + 'Classes')
# Add legend
#plt.legend(loc=2, ncol=2)
 
# Add titles
plt.title("Spaghetti plot", loc='left', fontsize=12, fontweight=0, color='orange')
plt.xlabel("Tested Data Number of classes")
plt.ylabel("Score")

plt.show()

In [None]:
ticks = []
for i in range(40):
    ticks.append(i*35)

In [None]:
perf_score_df = pd.read_csv('Data/NSL-KDD/modified/Results/root_node.csv')
# style
plt.style.use('fivethirtyeight')
 
# create a color palette
palette = plt.get_cmap('Set1')
num = 6
fig = plt.figure(num=None, figsize = (230,30))
score = perf_score_df['F1-Score'].values
ax = fig.add_subplot(1,1,1)
ax.plot(score,marker='', color='black', linewidth=4, alpha=0.9, label='Trained On Classes')
ax.tick_params(direction='out', length=6, width=2, colors='r',
               grid_color='black', grid_alpha=0.5)
ax.set_xticks(ticks)
plt.show()

In [None]:
print(plt.style.available)