In [3]:
import sys
sys.path.append('../')

from src.models.train_models import IDSModelTrainer
import numpy as np
import joblib

In [4]:
print("Loading processed data...")

X_train = np.load('../data/processed/X_train.npy')
X_test = np.load('../data/processed/X_test.npy')
y_train = np.load('../data/processed/y_train.npy')
y_test = np.load('../data/processed/y_test.npy')

print(f" X_train: {X_train.shape}")
print(f" X_test: {X_test.shape}")
print(f" y_train: {y_train.shape}")
print(f" y_test: {y_test.shape}")


Loading processed data...
 X_train: (178465, 78)
 X_test: (44617, 78)
 y_train: (178465,)
 y_test: (44617,)


In [5]:
import os

os.makedirs('../models', exist_ok=True)

trainer = IDSModelTrainer()

save_path = os.path.abspath('../models/')
print(f"Saving models to: {save_path}")

trained_models, results = trainer.train_all_models(
    X_train, y_train, 
    X_test, y_test,
    save_path=save_path + '/'
)

print("\n All models trained and saved!")

print("\nVerifying saved models:")
for filename in ['random_forest.pkl', 'gradient_boosting.pkl', 'logistic_regression.pkl']:
    filepath = os.path.join(save_path, filename)
    if os.path.exists(filepath):
        size = os.path.getsize(filepath) / (1024*1024)
        print(f" {filename}: {size:.2f} MB")
    else:
        print(f" {filename}: NOT FOUND")

Saving models to: /Users/jakub.porada/Downloads/Users/Jakub/NetworkIDS/models

NETWORK IDS MODEL TRAINING

Training set size: (178465, 78)
Test set size: (44617, 78)
Number of features: 78
Training random_forest...


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    3.1s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done 100 out of 100 | elapsed:    0.0s finished


Training completed in 3.122182846069336 seconds

Evaluating random_forest...

random_forest Results:
  Accuracy:  0.9999 (99.99%)
  Precision: 1.0000 (of detected attacks, 100.0% were real)
  Recall:    0.9999 (caught 100.0% of all attacks)
  F1-Score:  0.9999
  AUC-ROC:   0.9999
  False Positive Rate: 0.0001 (0.01%)

  Confusion Matrix:
    True Negatives:  19,013 (correctly identified benign)
    False Positives: 1 (benign flagged as attack)
    False Negatives: 3 (missed attacks)
    True Positives:  25,600 (correctly caught attacks)

  Inference time: 0.0834s for 44,617 samples
  Per-sample: 0.0019ms
Training gradient_boosting...
      Iter       Train Loss   Remaining Time 
         1           1.1745            3.18m
         2           1.0203            3.66m
         3           0.8923            3.75m
         4           0.7845            3.48m
         5           0.6925            3.44m
         6           0.6134            3.36m
         7           0.5449            3.1

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.


Training completed in 2.5030057430267334 seconds

Evaluating logistic_regression...

logistic_regression Results:
  Accuracy:  0.9980 (99.80%)
  Precision: 0.9976 (of detected attacks, 99.8% were real)
  Recall:    0.9990 (caught 99.9% of all attacks)
  F1-Score:  0.9983
  AUC-ROC:   0.9979
  False Positive Rate: 0.0033 (0.33%)

  Confusion Matrix:
    True Negatives:  18,952 (correctly identified benign)
    False Positives: 62 (benign flagged as attack)
    False Negatives: 26 (missed attacks)
    True Positives:  25,577 (correctly caught attacks)

  Inference time: 0.0128s for 44,617 samples
  Per-sample: 0.0003ms

 Saved results to /Users/jakub.porada/Downloads/Users/Jakub/NetworkIDS/models/model_results.json

MODEL COMPARISON

Metric                    random_forest        gradient_boosting    logistic_regression  
----------------------------------------------------------------------
accuracy                  0.9999               0.9998               0.9980               
precisi

In [6]:
import joblib
import os

base_path = '/Users/jakub.porada/Downloads/Users/Jakub/NetworkIDS/models/'

for model_name, model in trained_models.items():
    filepath = os.path.join(base_path, f'{model_name}.pkl')
    joblib.dump(model, filepath)
    print(f" Saved {model_name} to {filepath}")
    print(f"  File size: {os.path.getsize(filepath) / (1024*1024):.2f} MB")

‚úì Saved random_forest to /Users/jakub.porada/Downloads/Users/Jakub/NetworkIDS/models/random_forest.pkl
  File size: 1.38 MB
‚úì Saved gradient_boosting to /Users/jakub.porada/Downloads/Users/Jakub/NetworkIDS/models/gradient_boosting.pkl
  File size: 1.27 MB
‚úì Saved logistic_regression to /Users/jakub.porada/Downloads/Users/Jakub/NetworkIDS/models/logistic_regression.pkl
  File size: 0.00 MB
