In [7]:
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

train_df = pd.read_csv("C:/Users/avika/OneDrive/Documents/KDDTrain.csv.txt")
test_df = pd.read_csv("C:/Users/avika/OneDrive/Documents/KDDTest+.txt")

In [8]:
columns = [
 'duration','protocol_type','service','flag','src_bytes','dst_bytes',
 'land','wrong_fragment','urgent','hot','num_failed_logins','logged_in',
 'num_compromised','root_shell','su_attempted','num_root',
 'num_file_creations','num_shells','num_access_files','num_outbound_cmds',
 'is_host_login','is_guest_login','count','srv_count','serror_rate',
 'srv_serror_rate','rerror_rate','srv_rerror_rate','same_srv_rate',
 'diff_srv_rate','srv_diff_host_rate','dst_host_count',
 'dst_host_srv_count','dst_host_same_srv_rate','dst_host_diff_srv_rate',
 'dst_host_same_src_port_rate','dst_host_srv_diff_host_rate',
 'dst_host_serror_rate','dst_host_srv_serror_rate','dst_host_rerror_rate',
 'dst_host_srv_rerror_rate','label','difficulty'
]

train_df.columns = columns
test_df.columns  = columns

In [9]:

train_df['label'] = train_df['label'].apply(lambda x: 0 if x == 'normal' else 1)
test_df['label']  = test_df['label'].apply(lambda x: 0 if x == 'normal' else 1)

X_train = train_df.drop(['label','difficulty'], axis=1)
y_train = train_df['label']

X_test = test_df.drop(['label','difficulty'], axis=1)
y_test = test_df['label']

In [10]:
# =========================
# ENCODERS (FIT ONLY ON TRAIN)
# =========================
encoders = {}
for col in ['protocol_type','service','flag']:
    le = LabelEncoder()
    X_train[col] = le.fit_transform(X_train[col])
    X_test[col]  = le.transform(X_test[col])
    encoders[col] = le

In [11]:
# save encoders
pickle.dump(encoders, open("encoders.pkl","wb"))

# =========================
# FEATURE ORDER (SAVE)
# =========================
features = list(X_train.columns)
pickle.dump(features, open("features.pkl","wb"))


In [12]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

pickle.dump(scaler, open("scaler.pkl","wb"))


In [13]:
# =========================
# MODEL
# =========================
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(
    X_train_scaled,
    y_train,
    epochs=10,
    batch_size=128,
    validation_data=(X_test_scaled, y_test)
)

model.save("nids_model.h5")

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9775 - loss: 0.0704 - val_accuracy: 0.8109 - val_loss: 0.9317
Epoch 2/10
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9908 - loss: 0.0274 - val_accuracy: 0.7851 - val_loss: 1.1425
Epoch 3/10
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9922 - loss: 0.0230 - val_accuracy: 0.7858 - val_loss: 1.1348
Epoch 4/10
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9931 - loss: 0.0209 - val_accuracy: 0.7880 - val_loss: 1.2382
Epoch 5/10
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9936 - loss: 0.0189 - val_accuracy: 0.7916 - val_loss: 1.2718
Epoch 6/10
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9939 - loss: 0.0179 - val_accuracy: 0.7798 - val_loss: 1.3455
Epoch 7/10
[1m985/985[0m [32m━━━━━━━



In [14]:
# =========================
# TESTING
# =========================
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

print("✅ Training + Testing + Pickle Done")

[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 641us/step
Accuracy: 0.8029987135696225
              precision    recall  f1-score   support

           0       0.69      0.97      0.81      9711
           1       0.97      0.67      0.80     12832

    accuracy                           0.80     22543
   macro avg       0.83      0.82      0.80     22543
weighted avg       0.85      0.80      0.80     22543

✅ Training + Testing + Pickle Done
