# IMPORTING LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.api.types import is_numeric_dtype
import warnings
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree  import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier, GradientBoostingClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import BernoulliNB
from lightgbm import LGBMClassifier
from sklearn.feature_selection import RFE
import itertools
from xgboost import XGBClassifier
from tabulate import tabulate

# DATA COLLECTION

In [None]:
train = pd.read_csv('/kaggle/input/network-intrusion-detection/Train_data.csv')

In [None]:
test = pd.read_csv('/kaggle/input/network-intrusion-detection/Test_data.csv')

In [None]:
train.head()

In [None]:
train.describe()

In [None]:
train.shape

In [None]:
train.isnull().sum()

In [None]:
sns.countplot(x=train['class'])

In [None]:
train['class'].value_counts()

In [None]:
def le(df):
    for col in df.columns:
        if df[col].dtype == 'object':
                label_encoder = LabelEncoder()
                df[col] = label_encoder.fit_transform(df[col])

le(train)
le(test)

In [None]:
train.drop(['num_outbound_cmds'], axis=1, inplace=True)
test.drop(['num_outbound_cmds'], axis=1, inplace=True)

In [None]:
train.head()

In [None]:
X_train = train.drop(['class'], axis=1)
Y_train = train['class']

In [None]:
rfc = RandomForestClassifier()
rfe = RFE(rfc, n_features_to_select=10)
rfe = rfe.fit(X_train, Y_train)
feature_map = [(i, v) for i, v in itertools.zip_longest(rfe.get_support(), X_train.columns)]
selected_features = [v for i, v in feature_map if i==True]
selected_features

In [None]:
X_train = X_train[selected_features]

In [None]:
scale = StandardScaler()
X_train = scale.fit_transform(X_train)
test = scale.fit_transform(test)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, train_size=0.70, random_state=2)

In [None]:
x_train.shape

In [None]:
x_test.shape

# K-Nearest Neighbor 

In [None]:
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 25, metric = 'minkowski')
knn.fit(x_train,y_train)

In [None]:
knn_y_pred = knn.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,knn_y_pred)

In [None]:
error_rate = 1.0 - metrics.accuracy_score(y_test,knn_y_pred)
print("Error Rate:", error_rate)

In [None]:
sensitivity = metrics.recall_score(y_test,knn_y_pred,average='weighted')
print("Sensitivity:", sensitivity)

In [None]:
def calculate_specificity(y_true, knn_y_pred, class_label):
    true_negative = np.sum((y_true != class_label) & (knn_y_pred != class_label))
    false_positive = np.sum((y_true != class_label) & (knn_y_pred == class_label))
    specificity = true_negative / (true_negative + false_positive)
    return specificity
class_label = 0  
specificity = calculate_specificity(y_test, knn_y_pred, class_label)
print("Specificity:", specificity)

In [None]:
f1_score = metrics.f1_score(y_test,knn_y_pred,average='weighted')
print("F1-score", f1_score)

# Support Vector Machine 

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel="linear",random_state=0)
svc.fit(x_train,y_train)

In [None]:
svc_y_pred = svc.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,svc_y_pred)

In [None]:
error_rate = 1.0 - metrics.accuracy_score(y_test,svc_y_pred)
print("Error Rate:", error_rate)

In [None]:
sensitivity = metrics.recall_score(y_test,svc_y_pred,average='weighted')
print("Sensitivity:", sensitivity)

In [None]:
def calculate_specificity(y_true, svc_y_pred, class_label):
    true_negative = np.sum((y_true != class_label) & (svc_y_pred != class_label))
    false_positive = np.sum((y_true != class_label) & (svc_y_pred == class_label))
    specificity = true_negative / (true_negative + false_positive)
    return specificity
class_label = 0  
specificity = calculate_specificity(y_test, svc_y_pred, class_label)
print("Specificity:", specificity)

In [None]:
f1_score = metrics.f1_score(y_test,knn_y_pred,average='weighted')
print("F1-score", f1_score)

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [None]:
model = LogisticRegression()

In [None]:
model.fit(x_train,y_train)

In [None]:
y_pred = model.predict(x_test)

In [None]:
print('Accuracy of Logistic Regression is: ', model.score(x_test,y_test) * 100,'%')

In [None]:
error_rate = 1.0 - metrics.accuracy_score(y_test,y_pred)
print("Error Rate:", error_rate)

In [None]:
sensitivity = metrics.recall_score(y_test,y_pred,average='weighted')
print("Sensitivity:", sensitivity)

In [None]:
def calculate_specificity(y_true, y_pred, class_label):
    true_negative = np.sum((y_true != class_label) & (y_pred != class_label))
    false_positive = np.sum((y_true != class_label) & (y_pred == class_label))
    specificity = true_negative / (true_negative + false_positive)
    return specificity
class_label = 0  
specificity = calculate_specificity(y_test, y_pred, class_label)
print("Specificity:", specificity)

In [None]:
f1_score = metrics.f1_score(y_test,y_pred,average='weighted')
print("F1-score", f1_score)

# Gradient Boost

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import recall_score

In [None]:
model = GradientBoostingClassifier()
model.fit(x_train,y_train)

In [None]:
y_pred = model.predict(x_test)

In [None]:
print('Accuracy of Gradient Boost is: ', model.score(x_test,y_test) * 100,'%')

In [None]:
error_rate = 1.0 - metrics.accuracy_score(y_test,y_pred)
print("Error Rate:", error_rate)

In [None]:
sensitivity = metrics.recall_score(y_test,y_pred,average='weighted')
print("Sensitivity:", sensitivity)

In [None]:
def calculate_specificity(y_true, y_pred, class_label):
    true_negative = np.sum((y_true != class_label) & (y_pred != class_label))
    false_positive = np.sum((y_true != class_label) & (y_pred == class_label))
    specificity = true_negative / (true_negative + false_positive)
    return specificity
class_label = 0  
specificity = calculate_specificity(y_test, y_pred, class_label)
print("Specificity:", specificity)

In [None]:
f1_score = metrics.f1_score(y_test,y_pred,average='weighted')
print("F1-score", f1_score)

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=50)
clf.fit(x_train,y_train)

In [None]:
clf_y_pred = clf.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,clf_y_pred)
print("Accuracy:", accuracy)

In [None]:
error_rate = 1.0 - metrics.accuracy_score(y_test,clf_y_pred)
print("Error Rate:", error_rate)

In [None]:
sensitivity = metrics.recall_score(y_test,clf_y_pred,average='weighted')
print("Sensitivity:", sensitivity)

In [None]:
def calculate_specificity(y_true, clf_y_pred, class_label):
    true_negative = np.sum((y_true != class_label) & (clf_y_pred != class_label))
    false_positive = np.sum((y_true != class_label) & (clf_y_pred == class_label))
    specificity = true_negative / (true_negative + false_positive)
    return specificity
class_label = 0  
specificity = calculate_specificity(y_test, clf_y_pred, class_label)
print("Specificity:", specificity)

In [None]:
f1_score = metrics.f1_score(y_test,clf_y_pred,average='weighted')
print("F1-score", f1_score)