In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import xgboost as xgb
from rich.console import Console
from rich.table import Table
from rich.panel import Panel

In [None]:
def preprocess_data(df):
    df = df.copy()
    # Encode categorical target
    le = LabelEncoder()
    y = le.fit_transform(df['category'])
    # Drop unnecessary columns
    X = df.drop(['is_benign', 'attack', 'category'], axis=1)
    return X, y, le

def evaluate_model(y_true, y_pred, le, model_name="Model"):
    console = Console()
    
    metrics = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, average='weighted'),
        "Recall": recall_score(y_true, y_pred, average='weighted'),
        "F1 Score": f1_score(y_true, y_pred, average='weighted')
    }
    
    table = Table(show_header=True, header_style="bold white", box=None)
    table.add_column("Metric", style="cyan")
    table.add_column("Score", justify="right")
    
    for metric, value in metrics.items():
        color = "green" if value > 0.8 else "yellow" if value > 0.6 else "red"
        table.add_row(metric, f"[{color}]{value:.5f}[/{color}]")
    
    # Calculate class distribution
    total = len(y_true)
    class_dist_table = Table(show_header=True, header_style="bold white", box=None)
    class_dist_table.add_column("Class", style="cyan")
    class_dist_table.add_column("Distribution (%)", justify="right")
    
    for i, cls in enumerate(le.classes_):
        pct = np.sum(y_pred == i) / total * 100
        color = "green" if pct > 30 else "yellow" if pct > 10 else "red"
        class_dist_table.add_row(str(cls), f"[{color}]{pct:.2f}%[/{color}]")
    
    metrics["Class Distribution"] = {le.classes_[i]: np.sum(y_pred == i) / total * 100 
                                   for i in range(len(le.classes_))}
    
    table.add_row("", "")
    table.add_row("Class Distribution", "")

    cm = confusion_matrix(y_true, y_pred)
    cm_table = Table(show_header=True, header_style="bold white", box=None)
    cm_table.add_column("Pred \\ True", style="cyan")
    
    classes = le.classes_
    for cls in classes:
        cm_table.add_column(str(cls), justify="right")
    
    for i, cls in enumerate(classes):
        row = [str(cls)] + [str(cm[i, j]) for j in range(cm.shape[1])]
        cm_table.add_row(*row)
    
    combined_table = Table.grid(expand=True)
    combined_table.add_column(justify="center", ratio=1)
    combined_table.add_column(justify="center", ratio=1)
    combined_table.add_row(table, cm_table)
    
    panel = Panel(
        combined_table,
        title=f"[bold]{model_name} - Performance Metrics and Confusion Matrix[/bold]",
        border_style="white"
    )
    
    console.print("\n", panel, "\n")
    return metrics

def train_xgboost(X, y, le):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42, stratify=y
    )
    
    model = xgb.XGBClassifier(
        objective='multi:softprob',
        num_class=len(np.unique(y)),
        random_state=42,
        n_estimators=100,
        learning_rate=0.1
    )
    
    param_grid = {
        'max_depth': [3, 5, 7],
        'min_child_weight': [1, 3, 5],
        'gamma': [0, 0.1, 0.2],
        'subsample': [0.8, 0.9],
        'colsample_bytree': [0.8, 0.9]
    }
    
    grid_search = GridSearchCV(
        model,
        param_grid,
        cv=5,
        scoring='accuracy',
        n_jobs=-1,
        verbose=1
    )
    
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_
    
    y_pred = best_model.predict(X_test)
    evaluate_model(y_test, y_pred, le, "XGBoost Multiclass")
    
    return best_model

In [None]:
# Usage
df = pd.read_csv('/kaggle/input/dataset/train_sel_hclust.csv')
X, y, le = preprocess_data(df)
best_model = train_xgboost(X, y, le)
best_model.save_model('xgboost_multiclass_model.json')