In [None]:
import pandas as pd
df = pd.read_csv(r"Cleaned_SDN_Intrusion.csv")
df.head()

In [None]:
len(df)

In [None]:
df.shape

In [None]:
# Rechecking for null or missing values
missing_values = df.isnull().sum()
print("missing values:\n", missing_values[missing_values>0])

In [None]:
# Rechecking for duplicate rows
duplicate_rows = df.duplicated().sum()
print(f"Duplicate Rows: {duplicate_rows}")

In [None]:
df.columns = df.columns.str.strip().str.lower()

In [None]:
X = df.iloc[:,:-5]
y = df.iloc[:,-5:]

In [None]:
df.columns.tolist()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

In [None]:
# Adjusting values for RandomForestClassifier
import numpy as np
from sklearn.impute import SimpleImputer

In [None]:
X_train.replace([np.inf, -np.inf], np.nan, inplace = True)
X_test.replace([np.inf, -np.inf], np.nan, inplace = True)

In [None]:
imputer = SimpleImputer(strategy = 'mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.fit_transform(X_test)

In [None]:
model1 = RandomForestClassifier(n_estimators = 50, max_depth = 10, n_jobs = -1)
model1.fit(X_train,y_train)

In [None]:
train_scores = cross_val_score(model1, X_train,y_train, cv = 5)
test_scores = cross_val_score(model1, X_test,y_test, cv = 5)

print(f"Training Dataset Score: {train_scores}")
print(f"Testing Dataset Score: {test_scores}")

In [None]:
model2 = LinearRegression()
model2.fit(X_train,y_train)

In [None]:
train_scores = cross_val_score(model2, X_train,y_train, cv = 5)
test_scores = cross_val_score(model2, X_test,y_test, cv = 5)

print(f"Training Dataset Score: {train_scores}")
print(f"Testing Dataset Score: {test_scores}")

In [None]:
from xgboost import XGBClassifier
from sklearn.multiclass import OneVsRestClassifier

In [None]:
model3 = OneVsRestClassifier(XGBClassifier(
    n_estimators = 100,
    max_depth = 10,
    n_jobs = -1,))

In [None]:
model3.fit(X_train,y_train)

In [None]:
train_scores = cross_val_score(model3, X_train,y_train, cv = 5)
test_scores = cross_val_score(model3, X_test,y_test, cv = 5)

print(f"Training Dataset Score: {train_scores}")
print(f"Testing Dataset Score: {test_scores}")

In [None]:
model4 = DecisionTreeClassifier(max_depth = 10)
model4.fit(X_train,y_train)

In [None]:
train_scores = cross_val_score(model4, X_train,y_train, cv = 5)
test_scores = cross_val_score(model4, X_test,y_test, cv = 5)

print(f"Training Dataset Score: {train_scores}")
print(f"Testing Dataset Score: {test_scores}")

In [None]:
import joblib 

joblib.dump(model3, 'xgb_model.pkl')
joblib.dump(X_test, 'X_test.pkl')
joblib.dump(y_test, 'y_test.pkl')