## **1. Setup e import**

In [1]:
from __future__ import annotations
import time, random, os, sys
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.datasets import cifar10
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

RANDOM_STATE = 42
SUBSET_SIZE: int | None = None  # es. 20_000 per ridurre tempo
N_JOBS = -1                     # usa tutti i core
CV_FOLDS = 3                    # 3‑fold CV
PCA_VARIANCE = 0.95             # % varianza da mantenere
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)
print("Setup completato. scikit‑learn", sys.modules['sklearn'].__version__)

Setup completato. scikit‑learn 1.6.1


## **2. Caricamento del dataset**

In [None]:
print("Scarico CIFAR‑10 …")
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = y_train.flatten()
y_test = y_test.flatten()
class_names = [
    "airplane", "automobile", "bird", "cat", "deer",
    "dog", "frog", "horse", "ship", "truck",
]
print(f"Train: {x_train.shape}, Test: {x_test.shape}")

# Normalizzazione [0,1] e flatten a vettore
X_train = x_train.astype("float32") / 255.0
X_test  = x_test.astype("float32")  / 255.0
X_train = X_train.reshape(len(X_train), -1)
X_test  = X_test.reshape(len(X_test),  -1)

# Sottocampionamento (opzionale)
if SUBSET_SIZE is not None and SUBSET_SIZE < len(X_train):
    idx = np.random.choice(len(X_train), SUBSET_SIZE, replace=False)
    X_train, y_train = X_train[idx], y_train[idx]
    print(f"→ Training set ridotto a {SUBSET_SIZE} campioni")

Scarico CIFAR‑10 …
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step
Train: (50000, 32, 32, 3), Test: (10000, 32, 32, 3)


## **3. Definizione dei modelli e delle griglie di ricerca**

In [None]:
models = {
    "LogisticRegression": {
        "pipeline": Pipeline([
            ("scaler", StandardScaler()),
            ("pca", PCA(n_components=PCA_VARIANCE, svd_solver="full")),
            ("clf", LogisticRegression(max_iter=1000, multi_class="multinomial", n_jobs=N_JOBS,
                                       random_state=RANDOM_STATE)),
        ]),
        "param_grid": {"clf__C": [0.1, 1, 10]},
    },
    "kNN": {
        "pipeline": Pipeline([
            ("scaler", StandardScaler()),
            ("pca", PCA(n_components=PCA_VARIANCE, svd_solver="full")),
            ("clf", KNeighborsClassifier()),
        ]),
        "param_grid": {
            "clf__n_neighbors": [3, 5, 7],
            "clf__weights": ["uniform", "distance"],
        },
    },
    "SVM": {
        "pipeline": Pipeline([
            ("scaler", StandardScaler()),
            ("pca", PCA(n_components=PCA_VARIANCE, svd_solver="full")),
            ("clf", SVC()),
        ]),
        "param_grid": {
            "clf__kernel": ["linear", "rbf"],
            "clf__C": [1, 10],
            "clf__gamma": ["scale"],
        },
    },
    "DecisionTree": {
        "pipeline": Pipeline([
            ("pca", PCA(n_components=PCA_VARIANCE, svd_solver="full")),
            ("clf", DecisionTreeClassifier(random_state=RANDOM_STATE)),
        ]),
        "param_grid": {
            "clf__max_depth": [None, 20, 40],
            "clf__min_samples_leaf": [1, 5, 10],
        },
    },
}
print("Modelli pronti:", list(models.keys()))


## **4. Grid Search + Cross‑Validation**

In [None]:
results = {}
for name, cfg in models.items():
    print(f"\n🔍 Grid Search per {name}")
    grid = GridSearchCV(cfg["pipeline"], cfg["param_grid"], cv=CV_FOLDS, n_jobs=N_JOBS, verbose=1)
    start = time.time()
    grid.fit(X_train, y_train)
    t = time.time() - start
    print("Migliori hyper‑parametri:", grid.best_params_)
    print(f"Score medio (CV): {grid.best_score_:.4f}  •  Tempo: {t:.1f}s")

    # Test set
    y_pred = grid.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Accuracy test: {acc:.4f}")
    results[name] = {
        "model": grid,
        "test_accuracy": acc,
        "train_time_sec": t,
    }



🔍 Grid Search per LogisticRegression
Fitting 3 folds for each of 3 candidates, totalling 9 fits


KeyboardInterrupt: 

## **5. Stampa risultati e matrice di confusione**

In [None]:
for name, res in results.items():
    print(f"\n### {name}")
    y_pred = res["model"].predict(X_test)
    print(classification_report(y_test, y_pred, target_names=class_names))
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=False, fmt="d", cmap="Blues", xticklabels=False, yticklabels=False)
    plt.title(f"Confusion Matrix – {name}")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.show()


## **6. Riepilogo finale delle prestazioni**

In [None]:
print("\n**Riepilogo final accuracy sul test‑set**")
for name, res in results.items():
    print(f"{name:15} → {res['test_accuracy']:.4f}")