In [1]:
import pandas as pd
import numpy as np
import joblib
import warnings
warnings.filterwarnings("ignore")

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [11]:
df = pd.read_csv('dataset_sdn.csv').dropna()

In [12]:
features = [
    'src', 'pktcount', 'dst', 'byteperflow', 'pktperflow',
    'pktrate', 'tot_kbps', 'rx_kbps', 'flows', 'bytecount',
    'dt', 'Protocol', 'dur', 'tot_dur'
]

In [13]:
X = df[features].drop(['src', 'dst', 'dt'], axis=1)
y = df['label']

In [14]:
X = X.drop(['dur', 'pktrate', 'pktperflow'], axis=1)

In [15]:
X = pd.get_dummies(X, columns=['Protocol'])

In [16]:
for proto in ['Protocol_ICMP', 'Protocol_TCP', 'Protocol_UDP']:
    if proto not in X.columns:
        X[proto] = 0

In [17]:
X = X[['byteperflow', 'tot_kbps', 'rx_kbps', 'flows', 'bytecount', 'tot_dur',
       'Protocol_ICMP', 'Protocol_TCP', 'Protocol_UDP']]

In [18]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [20]:
model = RandomForestClassifier(
    n_estimators=100,       # fewer trees
    max_depth=10,           # shallower trees
    min_samples_leaf=3,     # less overfitting
    random_state=1,
    n_jobs=-1
)
model.fit(X_train, y_train)

RandomForestClassifier(max_depth=10, min_samples_leaf=3, n_jobs=-1,
                       random_state=1)

In [21]:
y_pred = model.predict(X_test)
print(f"✅ Accuracy: {round(accuracy_score(y_test, y_pred) * 100, 2)}%")
print(classification_report(y_test, y_pred))

✅ Accuracy: 96.0%
              precision    recall  f1-score   support

           0       0.99      0.94      0.97     18986
           1       0.92      0.99      0.95     12166

    accuracy                           0.96     31152
   macro avg       0.95      0.97      0.96     31152
weighted avg       0.96      0.96      0.96     31152



In [22]:
joblib.dump(model, 'rf_model.pkl', compress=3)
joblib.dump(scaler, 'scaler.pkl', compress=3)
print("📦 Model and Scaler saved with compression!")

📦 Model and Scaler saved with compression!


In [2]:
pip show scikit-learn

Name: scikit-learn
Version: 1.0.2
Summary: A set of python modules for machine learning and data mining
Home-page: http://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: c:\users\devesh kumar\anaconda3\lib\site-packages
Requires: joblib, scipy, numpy, threadpoolctl
Required-by: scikit-learn-intelex, mlxtend, category-encoders
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip freeze

aiohttp @ file:///C:/ci/aiohttp_1646806572557/work
aiosignal @ file:///tmp/build/80754af9/aiosignal_1637843061372/work
alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work
anaconda-client @ file:///C:/ci/anaconda-client_1635342725944/work
anaconda-navigator==2.1.4
anaconda-project @ file:///tmp/build/80754af9/anaconda-project_1637161053845/work
anyio @ file:///C:/ci/anyio_1644481921011/work/dist
appdirs==1.4.4
argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work
argon2-cffi-bindings @ file:///C:/ci/argon2-cffi-bindings_1644551690056/work
arrow @ file:///opt/conda/conda-bld/arrow_1649166651673/work
asgiref==3.7.2
astroid @ file:///C:/ci/astroid_1628063282661/work
astropy @ file:///C:/ci/astropy_1650634291321/work
asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work
async-timeout @ file:///tmp/build/80754af9/async-timeout_1637851218186/work
atomicwrites==1.4.0
attrs @ file:///opt/conda/conda-bld/attrs_1642510447205/work
Automat @ file://

In [4]:
pip show joblib

Name: joblibNote: you may need to restart the kernel to use updated packages.
Version: 1.1.0
Summary: Lightweight pipelining with Python functions
Home-page: https://joblib.readthedocs.io
Author: Gael Varoquaux
Author-email: gael.varoquaux@normalesup.org
License: BSD
Location: c:\users\devesh kumar\anaconda3\lib\site-packages
Requires: 
Required-by: scikit-learn, nltk, mlxtend



In [5]:
pip show fastapi

Note: you may need to restart the kernel to use updated packages.




In [6]:
pip show numpy

Name: numpy
Version: 1.21.5
Summary: NumPy is the fundamental package for array computing with Python.
Home-page: https://www.numpy.org
Author: Travis E. Oliphant et al.
Author-email: 
License: BSD
Location: c:\users\devesh kumar\anaconda3\lib\site-packages
Requires: 
Required-by: xgboost, xarray, tifffile, tables, statsmodels, seaborn, scipy, scikit-learn, scikit-image, PyWavelets, pyerfa, patsy, pandas, numexpr, numba, mlxtend, mkl-random, mkl-fft, matplotlib, imageio, imagecodecs, hvplot, holoviews, h5py, gensim, datashape, datashader, daal4py, category-encoders, Bottleneck, bokeh, bkcharts, astropy
Note: you may need to restart the kernel to use updated packages.
