In [None]:
import warnings
warnings.filterwarnings('ignore')

col_names = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes',
             'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins',
             'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root',
             'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds',
             'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate',
             'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate',
             'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count',
             'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',
             'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate',
             'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'label']

import pandas as pd

pd.set_option('display.max_columns', None)
# pd.set_option('max_rows', None)

df = pd.read_csv('KDDTrain.csv', header=None, names=col_names)
df_test = pd.read_csv('KDDTest.csv', header=None, names=col_names)

print('Dimensions of training set:', df.shape)
print('Dimensions of test set:', df_test.shape)

In [None]:
df

In [None]:
df.describe()

In [None]:
df_test

In [None]:
df_test.describe() 

In [None]:
print('Training set label distribution')
print(df['label'].value_counts())

print('\nTest set label distribution')
print(df_test['label'].value_counts())

In [None]:
print('Categorical features in training set:')
for col_name in df.columns:
    if df[col_name].dtypes == 'object' :
        unique_cat = len(df[col_name].unique())
        print('Feature {col_name} has {unique_cat} categories'.format(col_name=col_name, unique_cat=unique_cat))
        
print('\nDistribution of categories in service:')
print(df['service'].value_counts().sort_values(ascending=False).head())

In [None]:
print('categorical features in test set:')
for col_name in df_test.columns:
    if df_test[col_name].dtypes == 'object':
        unique_cat = len(df_test[col_name].unique())
        print('Feature {col_name} has {unique_cat} categories'.format(col_name=col_name, unique_cat=unique_cat))
        
print('\nDistribution of categories in service:')
print(df_test['service'].value_counts().sort_values(ascending=False).head())

In [None]:
from sklearn.preprocessing import LabelEncoder
categorical_columns=['protocol_type', 'service', 'flag']
df_categorical_values = df[categorical_columns]
testdf_categorical_values = df_test[categorical_columns]

In [None]:
df_categorical_values.head()

In [None]:
testdf_categorical_values.head()

In [None]:
#protocol_type
unique_protocol = sorted(df.protocol_type.unique())
string1 = 'Protocol_type_'
unique_protocol2 = [string1 + x for x in unique_protocol]

# service
unique_service = sorted(df.service.unique())
string2 = 'service_'
unique_service2 = [string2 + x for x in unique_service]

# flag
unique_flag = sorted(df.flag.unique())
string3 = 'flag_'
unique_flag2 = [string3 + x for x in unique_flag]

# put together
dumcols = unique_protocol2 + unique_service2 + unique_flag2
print(dumcols)

unique_service_test = sorted(df_test.service.unique())
unique_service2_test = [string2 + x for x in unique_service_test]
testdumcols = unique_protocol2 + unique_service2_test + unique_flag2

In [None]:
df_categorical_values_enc = df_categorical_values.apply(LabelEncoder().fit_transform)
df_categorical_values_enc.head()


In [None]:
# test set
testdf_categorical_values_enc = testdf_categorical_values.apply(LabelEncoder().fit_transform)
testdf_categorical_values_enc.head()

In [None]:
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder()
df_categorical_values_encenc = enc.fit_transform(df_categorical_values_enc)
testdf_categorical_values_encenc = enc.fit_transform(testdf_categorical_values_enc)
# test set


In [None]:
df_cat_data = pd.DataFrame(df_categorical_values_encenc.toarray(), columns=dumcols)
df_cat_data.head()

In [None]:
testdf_cat_data = pd.DataFrame(testdf_categorical_values_encenc.toarray(), columns=testdumcols)
testdf_cat_data.head()

In [None]:
trainservice = df['service'].tolist()
testservice = df_test['service'].tolist()
difference = list(set(trainservice) - set(testservice))
string = 'service_'
difference = [string + x for x in difference]
difference

In [None]:
df_cat_data.shape

In [None]:
testdf_cat_data.shape

In [None]:
for col in difference:
    testdf_cat_data[col] = 0

testdf_cat_data.shape

In [None]:
newdf = df.join(df_cat_data)
newdf.drop('flag', axis=1, inplace=True)
newdf.drop('protocol_type', axis=1, inplace=True)
newdf.drop('service', axis=1, inplace=True)

newdf_test = df_test.join(testdf_cat_data)
newdf_test.drop('flag', axis=1, inplace=True)
newdf_test.drop('protocol_type', axis=1, inplace=True)
newdf_test.drop('service', axis=1, inplace=True)

In [None]:
newdf.shape

In [None]:
newdf_test.shape

In [None]:
labeldf = newdf['label']
labeldf_test = newdf_test['label']

newlabeldf = labeldf.replace(
    {'normal': 0, 'neptune': 1, 'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1, 'mailbomb': 1, 'apache2': 1,
     'processtable': 1, 'udpstorm': 1, 'worm': 1,
     'ipsweep': 2, 'nmap': 2, 'portsweep': 2, 'satan': 2, 'mscan': 2, 'saint': 2
        , 'ftp_write': 3, 'guess_passwd': 3, 'imap': 3, 'multihop': 3, 'phf': 3, 'spy': 3, 'warezclient': 3,
     'warezmaster': 3, 'sendmail': 3, 'named': 3, 'snmpgetattack': 3, 'snmpguess': 3, 'xlock': 3, 'xsnoop': 3,
     'httptunnel': 3,
     'buffer_overflow': 4, 'loadmodule': 4, 'perl': 4, 'rootkit': 4, 'ps': 4, 'sqlattack': 4, 'xterm': 4})
newlabeldf_test = labeldf_test.replace(
    {'normal': 0, 'neptune': 1, 'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1, 'mailbomb': 1, 'apache2': 1,
     'processtable': 1, 'udpstorm': 1, 'worm': 1,
     'ipsweep': 2, 'nmap': 2, 'portsweep': 2, 'satan': 2, 'mscan': 2, 'saint': 2
        , 'ftp_write': 3, 'guess_passwd': 3, 'imap': 3, 'multihop': 3, 'phf': 3, 'spy': 3, 'warezclient': 3,
     'warezmaster': 3, 'sendmail': 3, 'named': 3, 'snmpgetattack': 3, 'snmpguess': 3, 'xlock': 3, 'xsnoop': 3,
     'httptunnel': 3,
     'buffer_overflow': 4, 'loadmodule': 4, 'perl': 4, 'rootkit': 4, 'ps': 4, 'sqlattack': 4, 'xterm': 4})

newdf['label'] = newlabeldf
newdf_test['label'] = newlabeldf_test

In [None]:
newdf['label'].head()

In [None]:
newdf_test['label'].head()

In [None]:
to_drop_DoS = [2, 3, 4]
to_drop_Probe = [1, 3, 4]
to_drop_R2L = [1, 2, 4]
to_drop_U2R = [1, 2, 3]
DoS_df = newdf[~newdf['label'].isin(to_drop_DoS)]
Probe_df = newdf[~newdf['label'].isin(to_drop_Probe)]
R2L_df = newdf[~newdf['label'].isin(to_drop_R2L)]
U2R_df = newdf[~newdf['label'].isin(to_drop_U2R)]

print('Train:')
print('Dimensions of DoS:', DoS_df.shape)
print('Dimensions of Probe:', Probe_df.shape)
print('Dimensions of R2L:', R2L_df.shape)
print('Dimensions of U2R:', U2R_df.shape)


DoS_df_test = newdf_test[~newdf_test['label'].isin(to_drop_DoS)]
Probe_df_test = newdf_test[~newdf_test['label'].isin(to_drop_Probe)]
R2L_df_test = newdf_test[~newdf_test['label'].isin(to_drop_R2L)]
U2R_df_test = newdf_test[~newdf_test['label'].isin(to_drop_U2R)]

print('Test:')
print('Dimensions of DoS:', DoS_df_test.shape)
print('Dimensions of Probe:', Probe_df_test.shape)
print('Dimensions of R2L:', R2L_df_test.shape)
print('Dimensions of U2R:', U2R_df_test.shape)

In [None]:
#Feature_Scaling

X_DoS = DoS_df.drop('label', 1)
Y_DoS = DoS_df.label
X_Probe = Probe_df.drop('label', 1)
Y_Probe = Probe_df.label
X_R2L = R2L_df.drop('label', 1)
Y_R2L = R2L_df.label
X_U2R = U2R_df.drop('label', 1)
Y_U2R = U2R_df.label

X_DoS_test = DoS_df_test.drop('label', 1)
Y_DoS_test = DoS_df_test.label
X_Probe_test = Probe_df_test.drop('label', 1)
Y_Probe_test = Probe_df_test.label
X_R2L_test = R2L_df_test.drop('label', 1)
Y_R2L_test = R2L_df_test.label
X_U2R_test = U2R_df_test.drop('label', 1)
Y_U2R_test = U2R_df_test.label

colNames = list(X_DoS)
colNames_test = list(X_DoS_test)

In [None]:
from sklearn import preprocessing

scaler1 = preprocessing.StandardScaler().fit(X_DoS)
X_DoS = scaler1.transform(X_DoS)
scaler2 = preprocessing.StandardScaler().fit(X_Probe)
X_Probe = scaler2.transform(X_Probe)
scaler3 = preprocessing.StandardScaler().fit(X_R2L)
X_R2L = scaler3.transform(X_R2L)
scaler4 = preprocessing.StandardScaler().fit(X_U2R)
X_U2R = scaler4.transform(X_U2R)

scaler5 = preprocessing.StandardScaler().fit(X_DoS_test)
X_DoS_test = scaler5.transform(X_DoS_test)
scaler6 = preprocessing.StandardScaler().fit(X_Probe_test)
X_Probe_test = scaler6.transform(X_Probe_test)
scaler7 = preprocessing.StandardScaler().fit(X_R2L_test)
X_R2L_test = scaler7.transform(X_R2L_test)
scaler8 = preprocessing.StandardScaler().fit(X_U2R_test)
X_U2R_test = scaler8.transform(X_U2R_test)

In [None]:
X_DoS.std(axis=0) #Check standard deviation

In [None]:
X_Probe.std(axis=0)

In [None]:
X_R2L.std(axis=0)

In [None]:
X_U2R.std(axis=0)

In [None]:
# Univariate Feature Selection
from sklearn.feature_selection import VarianceThreshold
    
import numpy as np
from sklearn.feature_selection import SelectPercentile, f_classif
np.seterr(divide='ignore', invalid='ignore');
selector = SelectPercentile(f_classif, percentile=10)
X_newDoS = selector.fit_transform(X_DoS, Y_DoS)
print(X_newDoS.shape)

true = selector.get_support()
newcolindex_DoS = [i for i, x in enumerate(true) if x]
newcolname_DoS = list(colNames[i] for i in newcolindex_DoS)

In [None]:
X_newProbe = selector.fit_transform(X_Probe,Y_Probe)
print(X_newProbe.shape)
newcolindex_Probe = [i for i, x in enumerate(true) if x]
newcolname_Probe = list(colNames[i] for i in newcolindex_Probe)

In [None]:
X_newR2L = selector.fit_transform(X_R2L,Y_R2L)
print(X_newR2L.shape)

newcolindex_R2L=[i for i, x in enumerate(true) if x]
newcolname_R2L=list( colNames[i] for i in newcolindex_R2L)

In [None]:
X_newU2R = selector.fit_transform(X_U2R,Y_U2R)
print(X_newU2R.shape)

true = selector.get_support()
newcolindex_U2R = [i for i, x in enumerate(true) if x]
newcolname_U2R = list( colNames[i] for i in newcolindex_U2R)

In [None]:
print('Univariate Features selected for DoS:')
newcolname_DoS

In [None]:
print('Univariate Features selected for Probe:')
newcolname_Probe

In [None]:
print('Univariate Features selected for R2L:')
newcolname_R2L

In [None]:
print('Univariate Features selected for U2R:')
newcolname_U2R

In [None]:
# Recursive Feature Elimination, Get Importance

from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0)

rfe = RFE(clf, n_features_to_select=1)
rfe.fit(X_newDoS, Y_DoS)
print ('DoS Features sorted by their rank: ')
sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), newcolname_DoS))

In [None]:
rfe.fit(X_newProbe, Y_Probe)
print ('Probe Features sorted by their rank:')
sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), newcolname_Probe))

In [None]:
rfe.fit(X_newR2L, Y_R2L)
 
print ('R2L Features sorted by their rank:')
sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), newcolname_R2L))

In [None]:
rfe.fit(X_newU2R, Y_U2R)
 
print ('U2R Features sorted by their rank:')
sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), newcolname_U2R))

In [None]:
# Recursive Feature Elimination, select 13 features each of 122

rfe = RFE(estimator=clf, n_features_to_select=13, step=1)
rfe.fit(X_DoS, Y_DoS)
X_rfeDoS = rfe.transform(X_DoS)
true = rfe.support_
rfecolindex_DoS = [i for i, x in enumerate(true) if x]
rfecolname_DoS = list(colNames[i] for i in rfecolindex_DoS)

In [None]:
rfe.fit(X_Probe, Y_Probe)
X_rfeProbe = rfe.transform(X_Probe)
rfecolindex_Probe = [i for i, x in enumerate(true) if x]
rfecolname_Probe = list(colNames[i] for i in rfecolindex_Probe)

In [None]:
rfe.fit(X_R2L, Y_R2L)
X_rfeR2L = rfe.transform(X_R2L)
rfecolindex_R2L = [i for i, x in enumerate(true) if x]
rfecolname_R2L = list(colNames[i] for i in rfecolindex_R2L)

In [None]:
rfe.fit(X_U2R, Y_U2R)
X_rfeU2R=rfe.transform(X_U2R)
rfecolindex_U2R = [i for i, x in enumerate(true) if x]
rfecolname_U2R = list(colNames[i] for i in rfecolindex_U2R)

In [None]:
print('Features selected for DoS:')
rfecolname_DoS

In [None]:
print('Features selected for Probe:')
rfecolname_Probe

In [None]:
print('Features selected for R2L:')
rfecolname_R2L

In [None]:
print('Features selected for U2R:')
rfecolname_U2R

In [None]:
from sklearn.svm import SVC
import time
start = time.time()
clf_DoS = SVC()
clf_DoS.fit(X_DoS, Y_DoS)
end = time.time() - start
end

In [None]:
start = time.time()
clf_Probe = SVC()
clf_Probe.fit(X_Probe, Y_Probe)
end = time.time() - start
end

In [None]:
start = time.time()
clf_R2L = SVC()
clf_R2L.fit(X_R2L, Y_R2L)
end = time.time() - start
end

In [None]:
start = time.time()
clf_U2R = SVC()
clf_U2R.fit(X_U2R, Y_U2R)
end = time.time() - start
end

In [None]:
start = time.time()
clf_rfeDoS = SVC()
clf_rfeDoS.fit(X_rfeDoS, Y_DoS)
end = time.time() - start
end

In [None]:
start = time.time()
clf_rfeProbe = SVC()
clf_rfeProbe.fit(X_rfeProbe, Y_Probe)
end = time.time() - start
end

In [None]:
start = time.time()
clf_rfeR2L = SVC()
clf_rfeR2L.fit(X_rfeR2L, Y_R2L)
end = time.time() - start
end

In [None]:
start = time.time()
clf_rfeU2R = SVC()
clf_rfeU2R.fit(X_rfeU2R, Y_U2R)
end = time.time() - start
end

In [None]:
start = time.time()
Y_DoS_pred = clf_DoS.predict(X_DoS_test)
end = time.time() - start
print(end)
pd.crosstab(Y_DoS_test, Y_DoS_pred, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [None]:
start = time.time()
Y_Probe_pred = clf_Probe.predict(X_Probe_test)
end = time.time() - start
print(end)
pd.crosstab(Y_Probe_test, Y_Probe_pred, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [None]:
start = time.time()
Y_R2L_pred = clf_R2L.predict(X_R2L_test)
end = time.time() - start
print(end)
pd.crosstab(Y_R2L_test, Y_R2L_pred, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [None]:
start = time.time()
Y_U2R_pred = clf_U2R.predict(X_U2R_test)
end = time.time() - start
print(end)
pd.crosstab(Y_U2R_test, Y_U2R_pred, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics
accuracy = cross_val_score(clf_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='accuracy')
print('Accuracy DoS: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(clf_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='precision')
print('Precision DoS: %0.5f (+/- %0.5f)' % (precision.mean(), precision.std() * 2))
recall = cross_val_score(clf_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='recall')
print('Recall DoS: %0.5f (+/- %0.5f)' % (recall.mean(), recall.std() * 2))
f = cross_val_score(clf_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='f1')
print('F-measure DoS: %0.5f (+/- %0.5f)' % (f.mean(), f.std() * 2))

In [None]:
accuracy = cross_val_score(clf_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='accuracy')
print('Accuracy Probe: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(clf_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='precision_macro')
print('Precision Probe: %0.5f (+/- %0.5f)' % (precision.mean(), precision.std() * 2))
recall = cross_val_score(clf_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='recall_macro')
print('Recall Probe: %0.5f (+/- %0.5f)' % (recall.mean(), recall.std() * 2))
f = cross_val_score(clf_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='f1_macro')
print('F-measure Probe: %0.5f (+/- %0.5f)' % (f.mean(), f.std() * 2))

In [None]:
accuracy = cross_val_score(clf_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='accuracy')
print('Accuracy R2L: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(clf_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='precision_macro')
print('Precision R2L: %0.5f (+/- %0.5f)' % (precision.mean(), precision.std() * 2))
recall = cross_val_score(clf_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='recall_macro')
print('Recall R2L: %0.5f (+/- %0.5f)' % (recall.mean(), recall.std() * 2))
f = cross_val_score(clf_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='f1_macro')
print('F-measure R2L: %0.5f (+/- %0.5f)' % (f.mean(), f.std() * 2))

In [None]:
accuracy = cross_val_score(clf_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='accuracy')
print('Accuracy U2R: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(clf_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='precision_macro')
print('Precision U2R: %0.5f (+/- %0.5f)' % (precision.mean(), precision.std() * 2))
recall = cross_val_score(clf_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='recall_macro')
print('Recall U2R: %0.5f (+/- %0.5f)' % (recall.mean(), recall.std() * 2))
f = cross_val_score(clf_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='f1_macro')
print('F-measure U2R: %0.5f (+/- %0.5f)' % (f.mean(), f.std() * 2))

In [None]:
# import matplotlib.pyplot as plt
# from sklearn.feature_selection import RFECV
# from sklearn.model_selection import StratifiedKFold

# rfecv_DoS = RFECV(estimator=clf_DoS, step=1, cv=10, scoring='accuracy')
# rfecv_DoS.fit(X_DoS_test, Y_DoS_test)

# plt.figure()
# plt.xlabel('Number of features selected')
# plt.ylabel('Cross validation score')
# plt.title('RFECV DoS')
# plt.plot(range(1, len(rfecv_DoS.grid_scores_) + 1), rfecv_DoS.grid_scores_)
# plt.show()

In [None]:
# rfecv_Probe = RFECV(estimator=clf_Probe, step=1, cv=10, scoring='accuracy')
# rfecv_Probe.fit(X_Probe_test, Y_Probe_test)

# plt.figure()
# plt.xlabel('Number of features selected')
# plt.ylabel('Cross validation score')
# plt.title('RFECV Probe')
# plt.plot(range(1, len(rfecv_Probe.grid_scores_) + 1), rfecv_Probe.grid_scores_)
# plt.show()

In [None]:
# rfecv_R2L = RFECV(estimator=clf_R2L, step=1, cv=10, scoring='accuracy')
# rfecv_R2L.fit(X_R2L_test, Y_R2L_test)
# # Plot number of features VS. cross-validation scores
# plt.figure()
# plt.xlabel('Number of features selected')
# plt.ylabel('Cross validation score')
# plt.title('RFECV R2L')
# plt.plot(range(1, len(rfecv_R2L.grid_scores_) + 1), rfecv_R2L.grid_scores_)
# plt.show()

In [None]:
# rfecv_U2R = RFECV(estimator=clf_U2R, step=1, cv=10, scoring='accuracy')
# rfecv_U2R.fit(X_U2R_test, Y_U2R_test)
# plt.figure()
# plt.xlabel('Number of features selected')
# plt.ylabel('Cross validation score')
# plt.title('RFECV U2R')
# plt.plot(range(1, len(rfecv_U2R.grid_scores_) + 1), rfecv_U2R.grid_scores_)
# plt.show()

In [None]:
X_DoS_test2 = X_DoS_test[:,rfecolindex_DoS]
X_Probe_test2 = X_Probe_test[:,rfecolindex_Probe]
X_R2L_test2 = X_R2L_test[:,rfecolindex_R2L]
X_U2R_test2 = X_U2R_test[:,rfecolindex_U2R]
X_U2R_test2.shape

In [None]:
start = time.time()
Y_DoS_pred2 = clf_rfeDoS.predict(X_DoS_test2)
end = time.time() - start
print(end)
pd.crosstab(Y_DoS_test, Y_DoS_pred2, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [None]:
start = time.time()
Y_Probe_pred2 = clf_rfeProbe.predict(X_Probe_test2)
end = time.time() - start
print(end)
pd.crosstab(Y_Probe_test, Y_Probe_pred2, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [None]:
start = time.time()
Y_R2L_pred2 = clf_rfeR2L.predict(X_R2L_test2)
end = time.time() - start
print(end)
pd.crosstab(Y_R2L_test, Y_R2L_pred2, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [None]:
start = time.time()
Y_U2R_pred2 = clf_rfeU2R.predict(X_U2R_test2)
end = time.time() - start
print(end)
pd.crosstab(Y_U2R_test, Y_U2R_pred2, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [None]:
accuracy = cross_val_score(clf_rfeDoS, X_DoS_test2, Y_DoS_test, cv=10, scoring='accuracy')
print('Accuracy Dos Rfec: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(clf_rfeDoS, X_DoS_test2, Y_DoS_test, cv=10, scoring='precision')
print('Precision Dos Rfec: %0.5f (+/- %0.5f)' % (precision.mean(), precision.std() * 2))
recall = cross_val_score(clf_rfeDoS, X_DoS_test2, Y_DoS_test, cv=10, scoring='recall')
print('Recall Dos Rfec: %0.5f (+/- %0.5f)' % (recall.mean(), recall.std() * 2))
f = cross_val_score(clf_rfeDoS, X_DoS_test2, Y_DoS_test, cv=10, scoring='f1')
print('F-measure Dos Rfec: %0.5f (+/- %0.5f)' % (f.mean(), f.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeProbe, X_Probe_test2, Y_Probe_test, cv=10, scoring='accuracy')
print('Accuracy Probe Rfec: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(clf_rfeProbe, X_Probe_test2, Y_Probe_test, cv=10, scoring='precision_macro')
print('Precision Probe Rfec: %0.5f (+/- %0.5f)' % (precision.mean(), precision.std() * 2))
recall = cross_val_score(clf_rfeProbe, X_Probe_test2, Y_Probe_test, cv=10, scoring='recall_macro')
print('Recall Probe Rfec: %0.5f (+/- %0.5f)' % (recall.mean(), recall.std() * 2))
f = cross_val_score(clf_rfeProbe, X_Probe_test2, Y_Probe_test, cv=10, scoring='f1_macro')
print('F-measure Probe Rfec: %0.5f (+/- %0.5f)' % (f.mean(), f.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeR2L, X_R2L_test2, Y_R2L_test, cv=10, scoring='accuracy')
print('Accuracy R2L Rfec: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(clf_rfeR2L, X_R2L_test2, Y_R2L_test, cv=10, scoring='precision_macro')
print('Precision R2L Rfec: %0.5f (+/- %0.5f)' % (precision.mean(), precision.std() * 2))
recall = cross_val_score(clf_rfeR2L, X_R2L_test2, Y_R2L_test, cv=10, scoring='recall_macro')
print('Recall R2L Rfec: %0.5f (+/- %0.5f)' % (recall.mean(), recall.std() * 2))
f = cross_val_score(clf_rfeR2L, X_R2L_test2, Y_R2L_test, cv=10, scoring='f1_macro')
print('F-measure R2L Rfec: %0.5f (+/- %0.5f)' % (f.mean(), f.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeU2R, X_U2R_test2, Y_U2R_test, cv=10, scoring='accuracy')
print('Accuracy U2R Rfec: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(clf_rfeU2R, X_U2R_test2, Y_U2R_test, cv=10, scoring='precision_macro')
print('Precision U2R Rfec: %0.5f (+/- %0.5f)' % (precision.mean(), precision.std() * 2))
recall = cross_val_score(clf_rfeU2R, X_U2R_test2, Y_U2R_test, cv=10, scoring='recall_macro')
print('Recall U2R Rfec: %0.5f (+/- %0.5f)' % (recall.mean(), recall.std() * 2))
f = cross_val_score(clf_rfeU2R, X_U2R_test2, Y_U2R_test, cv=10, scoring='f1_macro')
print('F-measure U2R Rfec: %0.5f (+/- %0.5f)' % (f.mean(), f.std() * 2))

In [None]:
#Strafified Cross Validation
from sklearn.model_selection import StratifiedKFold
accuracy = cross_val_score(clf_rfeDoS, X_DoS_test2, Y_DoS_test, cv=StratifiedKFold(10), scoring='accuracy')
print('Accuracy DoS: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeProbe, X_Probe_test2, Y_Probe_test, cv=StratifiedKFold(10), scoring='accuracy')
print('Accuracy Probe: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeR2L, X_R2L_test2, Y_R2L_test, cv=StratifiedKFold(10), scoring='accuracy')
print('Accuracy R2L: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeU2R, X_U2R_test2, Y_U2R_test, cv=StratifiedKFold(10), scoring='accuracy')
print('Accuracy U2R: %0.5f (+/- %0.5f)' % (accuracy.mean(), accuracy.std() * 2))

In [None]:
#Cross Validation 2, 5, 10, 30, 50 FOLD
accuracy = cross_val_score(clf_rfeDoS, X_DoS_test2, Y_DoS_test, cv=2, scoring='accuracy')
print("Accuracy DoS 2 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

accuracy = cross_val_score(clf_rfeDoS, X_DoS_test2, Y_DoS_test, cv=5, scoring='accuracy')
print("Accuracy DoS 5 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

accuracy = cross_val_score(clf_rfeDoS, X_DoS_test2, Y_DoS_test, cv=10, scoring='accuracy')
print("Accuracy DoS 10 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeProbe, X_Probe_test2, Y_Probe_test, cv=2, scoring='accuracy')
print("Accuracy Probe 2 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

accuracy = cross_val_score(clf_rfeProbe, X_Probe_test2, Y_Probe_test, cv=5, scoring='accuracy')
print("Accuracy Probe 5 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

accuracy = cross_val_score(clf_rfeProbe, X_Probe_test2, Y_Probe_test, cv=10, scoring='accuracy')
print("Accuracy Probe 10 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeR2L, X_R2L_test2, Y_R2L_test, cv=2, scoring='accuracy')
print("Accuracy R2L 2 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

accuracy = cross_val_score(clf_rfeR2L, X_R2L_test2, Y_R2L_test, cv=5, scoring='accuracy')
print("Accuracy R2L 5 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

accuracy = cross_val_score(clf_rfeR2L, X_R2L_test2, Y_R2L_test, cv=10, scoring='accuracy')
print("Accuracy R2L 10 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

In [None]:
accuracy = cross_val_score(clf_rfeU2R, X_U2R_test2, Y_U2R_test, cv=2, scoring='accuracy')
print("Accuracy U2R 2 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

accuracy = cross_val_score(clf_rfeU2R, X_U2R_test2, Y_U2R_test, cv=5, scoring='accuracy')
print("Accuracy U2R 5 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

accuracy = cross_val_score(clf_rfeU2R, X_U2R_test2, Y_U2R_test, cv=10, scoring='accuracy')
print("Accuracy U2R 10 FOLD: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))