<a href="https://colab.research.google.com/github/abhijadhav14/Deep-Learning-CSE5019/blob/main/KDD_File.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Dropout, Flatten, Dense
from tensorflow.keras.utils import to_categorical

In [2]:
url_train = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTrain+.txt"
url_test = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTest+.txt"

In [3]:
columns = ["duration","protocol_type","service","flag","src_bytes","dst_bytes","land","wrong_fragment","urgent",
           "hot","num_failed_logins","logged_in","num_compromised","root_shell","su_attempted","num_root",
           "num_file_creations","num_shells","num_access_files","num_outbound_cmds","is_host_login",
           "is_guest_login","count","srv_count","serror_rate","srv_serror_rate","rerror_rate",
           "srv_rerror_rate","same_srv_rate","diff_srv_rate","srv_diff_host_rate","dst_host_count",
           "dst_host_srv_count","dst_host_same_srv_rate","dst_host_diff_srv_rate",
           "dst_host_same_src_port_rate","dst_host_srv_diff_host_rate","dst_host_serror_rate",
           "dst_host_srv_serror_rate","dst_host_rerror_rate","dst_host_srv_rerror_rate","label","difficulty"]

In [4]:
train_df = pd.read_csv(url_train, names=columns)
test_df = pd.read_csv(url_test, names=columns)

In [5]:
categorical = ['protocol_type', 'service', 'flag']
encoder = LabelEncoder()
for col in categorical:
    train_df[col] = encoder.fit_transform(train_df[col])
    test_df[col] = encoder.transform(test_df[col])

In [6]:
X_train = train_df.drop(['label', 'difficulty'], axis=1)
y_train = train_df['label']
X_test = test_df.drop(['label', 'difficulty'], axis=1)
y_test = test_df['label']

In [7]:
y_train = y_train.apply(lambda x: 0 if x == 'normal' else 1)
y_test = y_test.apply(lambda x: 0 if x == 'normal' else 1)

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
X_train = X_train.reshape(X_train.shape[0], 41, 1)
X_test = X_test.reshape(X_test.shape[0], 41, 1)

In [10]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [11]:
model = Sequential()
model.add(Conv1D(32, 3, padding='same', activation='relu', input_shape=(41,1)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))

model.add(Conv1D(64, 3, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [13]:
history = model.fit(X_train, y_train, epochs=10, batch_size=128,
                    validation_split=0.2, verbose=1)

Epoch 1/10
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 33ms/step - accuracy: 0.9387 - loss: 0.1999 - val_accuracy: 0.9875 - val_loss: 0.0372
Epoch 2/10
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 38ms/step - accuracy: 0.9806 - loss: 0.0555 - val_accuracy: 0.9901 - val_loss: 0.0285
Epoch 3/10
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 22ms/step - accuracy: 0.9844 - loss: 0.0444 - val_accuracy: 0.9914 - val_loss: 0.0248
Epoch 4/10
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.9858 - loss: 0.0390 - val_accuracy: 0.9909 - val_loss: 0.0250
Epoch 5/10
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 22ms/step - accuracy: 0.9867 - loss: 0.0372 - val_accuracy: 0.9923 - val_loss: 0.0226
Epoch 6/10
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 22ms/step - accuracy: 0.9873 - loss: 0.0359 - val_accuracy: 0.9919 - val_loss: 0.0203
Epoch 7/10
[1m7

In [14]:
y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)

print("\nTest Accuracy:", accuracy_score(y_true, y_pred))
print("\nClassification Report:\n", classification_report(y_true, y_pred))

[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step

Test Accuracy: 0.7755500354861604

Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.98      0.79      9711
           1       0.97      0.62      0.76     12833

    accuracy                           0.78     22544
   macro avg       0.82      0.80      0.77     22544
weighted avg       0.84      0.78      0.77     22544



In [None]:
# Load your new file
new_data = pd.read_csv("new_data.csv")   # <-- Replace with your file

# Drop unwanted columns if present
for col in ['label', 'difficulty']:
    if col in new_data.columns:
        new_data = new_data.drop(columns=[col])

# Encode categorical columns
for col in categorical:
    new_data[col] = encoder.transform(new_data[col])

# Scale with SAME scaler
new_data = scaler.transform(new_data)

# Reshape to Conv1D format
new_data = new_data.reshape(new_data.shape[0], 41, 1)

# Predict
new_pred = np.argmax(model.predict(new_data), axis=1)

print("\nPrediction for NEW DATA:")
print(new_pred)