In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Load cleaned data
df = pd.read_csv("../data/nsl_kdd_dataset.csv")
df['label'] = df['label'].apply(lambda x: 0 if x == 'normal' else 1)

# Split features and target
X = df.drop('label', axis=1)
y = df['label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[  0 217]
 [  0 891]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       217
           1       0.80      1.00      0.89       891

    accuracy                           0.80      1108
   macro avg       0.40      0.50      0.45      1108
weighted avg       0.65      0.80      0.72      1108



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [5]:
print(confusion_matrix(y_test, y_pred))


[[  0 217]
 [  0 891]]


In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(max_iter=1000))
])

pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[  0 217]
 [  0 891]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       217
           1       0.80      1.00      0.89       891

    accuracy                           0.80      1108
   macro avg       0.40      0.50      0.45      1108
weighted avg       0.65      0.80      0.72      1108



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Baseline Model Observation

Initial logistic regression collapsed into predicting a single class.
This occurred due to feature scale dominance.

After applying feature scaling, the model began learning meaningful
behavioral patterns and produced more balanced predictions.
