# **KDDCup99 Multiclass Classification Using Deep Learning**

## **Import Essential Libraries**

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

## **Mount Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## **Load The Dataset**

In [None]:
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/KDDCUP99 Data Set/kddcup-99.csv')
df.head()

## **Convert Categorical Code into Numerical Ones**

In [None]:
# Convert categorical variables to numerical variables
categorical_columns = ['protocol_type', 'service', 'flag', 'outcome']
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

## **Split the Data into Target and Features**

In [None]:
# Split the data into features and target
X = df.drop('outcome', axis=1)
y = df['outcome']

In [None]:
# Scale the numerical variables
scaler = MinMaxScaler()
X[X.columns] = scaler.fit_transform(X)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=100, shuffle=True)

## **Deep Learning Model Creation**

In [None]:
# Build the model
model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(y_train.nunique(), activation='softmax')
])

## **Compile and Fit The Model**

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=12, validation_split=0.2)

## **Evaluate The Model**

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

## **Saving The Model**

In [None]:
# Save the model
model.save('/content/drive/MyDrive/KDDCUP99 Data Set/kddcup.h5')

## **Plot The Graph**

In [None]:
# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Plot training and validation accuracy
plt.figure(figsize=(10, 6))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

## **Create Confusion Matrix**

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Generate confusion matrix
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

## **Classification Report**

In [None]:
# Classification Report
class_names = label_encoders['outcome'].classes_
classification_rep = classification_report(y_test, y_pred_classes, target_names=class_names, labels=np.unique(y_pred_classes))
print("Classification Report:\n", classification_rep)

## **ANN (Artificial Neural Network)**

## **Import Essential Libraries**

In [None]:
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score, r2_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
import joblib
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

## **Load Dataset**

In [None]:
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/KDDCUP99 Data Set/kddcup-99.csv')
df.head()

## **Get Unique Features of Protocol Type, Service, Flag and Outcome**

In [None]:
df['protocol_type'].unique()

In [None]:
df['flag'].unique()

In [None]:
df['service'].unique()

In [None]:
df['outcome'].unique()

## **Label Encoding Dataset**

In [None]:
# Select categorical features
categorical_features = ["protocol_type", "service", "flag"]

# Initialize label encoders
label_encoders = {}
for feature in categorical_features:
    le = LabelEncoder()
    df[feature] = le.fit_transform(df[feature])
    label_encoders[feature] = le

## **Save The Label Encoding**

In [None]:
label_encoders_filename = '/content/drive/MyDrive/KDDCUP99 Data Set/label_encoders.pkl'
joblib.dump(label_encoders, label_encoders_filename)

## **Split the data into Train and Test**

In [None]:
# Split the data into features and target
X = df.drop('outcome', axis=1)
y = df['outcome']

# Scale the numerical variables
scaler = MinMaxScaler()
X[X.columns] = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=100, shuffle=True)

## **ADD ANN MLP Classifier**

In [None]:
from sklearn.neural_network import MLPClassifier

# Define the ANN model
model = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', solver='adam', random_state=42)

## **Train The Model**

In [None]:
# Train the model
model.fit(X_train, y_train)

## **Saving The Model**

In [None]:
# Save the trained model to a file
model_filename = '/content/drive/MyDrive/KDDCUP99 Data Set/kddcup99ann.pkl'
joblib.dump(model, model_filename)

## **Generate Loss and Accuracy Graphs**

In [None]:
loss_history = model.loss_curve_
# Record accuracy values during training
train_accuracy_history = []
test_accuracy_history = []

for epoch in range(len(loss_history)):
    model.partial_fit(X_train, y_train)
    train_accuracy = model.score(X_train, y_train)
    test_accuracy = model.score(X_test, y_test)
    train_accuracy_history.append(train_accuracy)
    test_accuracy_history.append(test_accuracy)

# Generate loss graph
plt.figure(figsize=(6, 4))
plt.plot(loss_history, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Curve')
plt.legend()
plt.show()

# Generate accuracy graph
plt.figure(figsize=(6, 4))
plt.plot(train_accuracy_history, label='Training Accuracy')
plt.plot(test_accuracy_history, label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Test Accuracy')
plt.legend()
plt.tight_layout()
plt.show()

## **Create Confusion Matrix**

In [None]:
plt.figure(figsize=(10, 8))
sns.set(font_scale=1.2)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues",
            xticklabels=model.classes_, yticklabels=model.classes_)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()


## **Classification Report**

In [None]:
# Generate classification report
y_pred = model.predict(X_test)
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)