# Classificazione multiclasse con tecnica ensemble Bagging
In questo notebook viene utilizzata una tecnica di ensemble (Bagging) per migliorare le prestazioni del modello K-Nearest Neighbors (KNN).
- Viene definito un classificatore KNN con k=9
- n_estimators=10 10 modelli KNN
- max_features=0.7 ogni modello vede solo il 70% delle feature

Sono stati testati diversi modelli facendo variare questi parametri, i risultati ottenuti sono in ../results/classification_category.


In [1]:
import os
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score,classification_report, confusion_matrix
import pandas as pd
import joblib
import sys
sys.path.append('../Scripts')
from utility import evaluate_and_save_model_multiclass


X_train = pd.read_csv("../data/splitted_category/X_train.csv")
X_test = pd.read_csv("../data/splitted_category/X_test.csv")
y_train = pd.read_csv("../data/splitted_category/y_train.csv").values.ravel()
y_test = pd.read_csv("../data/splitted_category/y_test.csv").values.ravel()  

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
joblib.dump(scaler, "../models/scaler_knn_category.joblib")


k=9
max=0.7
knn = KNeighborsClassifier(n_neighbors=k, weights='distance')

bagging_model = BaggingClassifier(
    estimator=knn,
    n_estimators=10,
    random_state=42,
    max_features=max,
    n_jobs=-1
)
bagging_model.fit(X_train_scaled, y_train)

y_pred_train = bagging_model.predict(X_train_scaled)
y_pred_test = bagging_model.predict(X_test_scaled)

evaluate_and_save_model_multiclass(
    bagging_model,
    "bagging model",
    y_train,
    y_pred_train,
    y_test,
    y_pred_test,
    "../results/classification_category/knn",
    "../models/bagging_model_category.joblib",
    {"k":k,"max":max}
)
