In [51]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as snss
from imblearn.under_sampling import RandomUnderSampler

In [None]:
df = pd.read_csv('./dataset/dataset_sdn.csv')
df

In [None]:
df.isnull().sum()

In [None]:
df['label'].value_counts()

In [None]:
df.describe()

In [None]:
corr = df.drop(['src', 'dst', 'Protocol'], axis=1).corr()
sns.heatmap(corr)

# Data Preprocessing

In [18]:
df = df.dropna()

In [19]:
df = pd.get_dummies(df, columns=['Protocol'])
df['src'] = df['src'].apply(lambda ip: int(''.join([bin(int(x)+256)[3:] for x in ip.split('.')]), 2))
df['dst'] = df['dst'].apply(lambda ip: int(''.join([bin(int(x)+256)[3:] for x in ip.split('.')]), 2))

scaler = StandardScaler()
df[['bytecount', 'pktcount', 'dur', 'dur_nsec', 'tot_dur', 'tx_bytes', 'rx_bytes']] = scaler.fit_transform(df[['bytecount', 'pktcount', 'dur', 'dur_nsec', 'tot_dur', 'tx_bytes', 'rx_bytes']])

In [21]:
x = df.drop('label', axis=1)
y = df['label']
rus = RandomUnderSampler(random_state=42)
x_res, y_res = rus.fit_resample(x, y)

In [22]:
x_train, x_test, y_train, y_test = train_test_split(x_res, y_res, test_size=0.2, random_state=42)

In [None]:
print(len(x_train))
print(len(x_test))

# Model Training

## KNN

In [52]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)

y_pred = knn.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"KNN Accuracy: {accuracy:.2f}")

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [36]:
model = DecisionTreeClassifier()
model.fit(x_train, y_train)
pred = model.predict(x_test)

In [None]:
print(classification_report(y_test, pred))

In [None]:
print(confusion_matrix(y_test, pred))

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy: {accuracy:.2f}")

## RandomForest

In [59]:
from sklearn.ensemble import RandomForestClassifier

In [60]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(x_train, y_train)
pred = model.predict(x_test)

In [None]:
print(classification_report(y_test, pred))

In [None]:
print(confusion_matrix(y_test, pred))

In [None]:
print(accuracy_score(y_test, pred))

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy:.2f}")

# Save the Model

In [65]:
import pickle
with open('./dataset/random_forest_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)


In [None]:
row_dict = df.iloc[0].to_dict()
print(row_dict)