# 📥 Step 1: Import Libraries & Load Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, ConfusionMatrixDisplay
from sklearn.feature_selection import SelectKBest, f_classif
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

# Load dataset
df = pd.read_csv("data/IoTID20.csv")  # Update with correct filename
df.head()


## 🔍 Step 2: Data Inspection & Preprocessing

In [None]:
df.info()
df.isnull().sum()
df.describe()


## ⚙️ Step 3: Encoding & Feature Preparation

In [None]:
df = df.dropna()
le = LabelEncoder()
df['Label'] = le.fit_transform(df['Label'])

for col in df.select_dtypes(include='object').columns:
    if col != 'Label':
        df[col] = le.fit_transform(df[col])

X = df.drop(columns=["Label"])
y = df["Label"]


## 📊 Step 4: Feature Selection

In [None]:
selector = SelectKBest(score_func=f_classif, k=15)
X_selected = selector.fit_transform(X, y)
selected_features = X.columns[selector.get_support()]
print("Selected Features:", selected_features.tolist())


## 🔁 Step 5: Balance Dataset with SMOTE

In [None]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X[selected_features], y)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)


## 🤖 Step 6: Train & Evaluate XGBoost Classifier

In [None]:
model = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))


## 📈 Step 7: Visualize ROC Curve & Confusion Matrix

In [None]:
y_prob = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_prob)

plt.figure(figsize=(8, 5))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc_score(y_test, y_prob):.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.grid(True)
plt.show()

ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, cmap='Blues').plot()
plt.title("Confusion Matrix")
plt.grid(False)
plt.show()
