# SVM — Kernels × Datasets (com prints e comentários)

Este notebook compara **SVC (C)** e **NuSVC (nu)** com três *kernels* (`linear`, `poly` grau 2, `rbf`)
em **três datasets**: linearmente separável, círculos concêntricos (quadrático) e moons (RBF).
Inclui comentários passo a passo, **prints** das métricas logo após cada avaliação,
e gráficos de **fronteira de decisão** e **matriz de confusão**.


In [None]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# 1) Importação das bibliotecas do scikit-learn
from sklearn.datasets import make_classification, make_circles, make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC, NuSVC
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix, ConfusionMatrixDisplay

# 2) Geração dos datasets
def build_linear(n=600, random_state=7):
    X, y = make_classification(n_samples=n, n_features=2, n_informative=2, n_redundant=0,
                               n_clusters_per_class=1, class_sep=2.0, flip_y=0.01, random_state=random_state)
    return X, y

def build_poly(n=600, random_state=7):
    X, y = make_circles(n_samples=n, factor=0.45, noise=0.08, random_state=random_state)
    return X, y

def build_rbf(n=600, random_state=7):
    X, y = make_moons(n_samples=n, noise=0.15, random_state=random_state)
    return X, y

datasets = {
    "linear": build_linear(),
    "polynomial (circles)": build_poly(),
    "rbf (moons)": build_rbf()
}

# 3) Split treino/teste + padronização
def split_scale(X, y, test_size=0.3, seed=37):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, stratify=y, random_state=seed)
    sc = StandardScaler()
    X_train_s = sc.fit_transform(X_train)
    X_test_s = sc.transform(X_test)
    return X_train_s, X_test_s, y_train, y_test

# 4) Avaliação SVC (C) e NuSVC (nu)
def eval_models(X_train, X_test, y_train, y_test, C=1.0, nu=0.3, degree=2, gamma="scale"):
    results = []
    models = {}
    cfgs = [
        ("SVC", "linear", dict(kernel="linear", C=C)),
        ("SVC", "poly",   dict(kernel="poly", degree=degree, C=C, gamma=gamma)),
        ("SVC", "rbf",    dict(kernel="rbf", C=C, gamma=gamma)),
        ("NuSVC", "linear", dict(kernel="linear", nu=nu)),
        ("NuSVC", "poly",   dict(kernel="poly", degree=degree, nu=nu, gamma=gamma)),
        ("NuSVC", "rbf",    dict(kernel="rbf", nu=nu, gamma=gamma)),
    ]
    for kind, kernel, params in cfgs:
        if kind == "SVC":
            clf = SVC(decision_function_shape="ovr", random_state=42, **params)
        else:
            clf = NuSVC(decision_function_shape="ovr", random_state=42, **params)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        bacc = balanced_accuracy_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)
        results.append({"model": kind, "kernel": kernel, "C": params.get("C"), "nu": params.get("nu"),
                        "degree": params.get("degree"), "gamma": params.get("gamma"),
                        "accuracy": acc, "balanced_accuracy": bacc})
        models[(kind, kernel)] = (clf, cm, y_pred)
    return pd.DataFrame(results), models

# 5) Helpers de plot
def plot_boundary(model, X_train, X_test, y_test, title):
    X_vis = np.vstack([X_train, X_test])
    h = 0.02
    x_min, x_max = X_vis[:, 0].min() - 0.5, X_vis[:, 0].max() + 0.5
    y_min, y_max = X_vis[:, 1].min() - 0.5, X_vis[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    grid = np.c_[xx.ravel(), yy.ravel()]
    Z = model.predict(grid).reshape(xx.shape)
    plt.figure(figsize=(6,5))
    plt.contourf(xx, yy, Z, alpha=0.3)
    plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, s=20, edgecolor='k')
    plt.title(title)
    plt.xlabel("Feature 1 (scaled)")
    plt.ylabel("Feature 2 (scaled)")
    plt.tight_layout()
    plt.show()

def plot_cm(cm, title):
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(values_format='d')
    plt.title(title)
    plt.tight_layout()
    plt.show()

# 6) Execução: imprime métricas após cada dataset
all_rows = []
stored_models = {}
for dname, (X, y) in datasets.items():
    print("="*80)
    print(f"DATASET: {dname}")
    Xtr, Xte, ytr, yte = split_scale(X, y)
    C_val, nu_val, degree_val, gamma_val = 1.0, 0.3, 2, "scale"
    print(f"Hiperparâmetros: C={C_val}, nu={nu_val}, degree={degree_val}, gamma={gamma_val}")
    df_metrics, models = eval_models(Xtr, Xte, ytr, yte, C=C_val, nu=nu_val, degree=degree_val, gamma=gamma_val)
    print(df_metrics.round(4).to_string(index=False))
    df_metrics.insert(0, "dataset", dname)
    all_rows.append(df_metrics)
    stored_models[dname] = (Xtr, Xte, ytr, yte, models)

metrics_all = pd.concat(all_rows, ignore_index=True)
metrics_all.round(4)

# 7) Plots SVC (3 kernels) por dataset
for dname, (Xtr, Xte, ytr, yte, models) in stored_models.items():
    for kernel in ["linear", "poly", "rbf"]:
        clf, cm, _ = models[("SVC", kernel)]
        plot_boundary(clf, Xtr, Xte, yte, f"{dname} — Fronteira SVC ({kernel})")
        plot_cm(cm, f"{dname} — Matriz de Confusão SVC ({kernel})")
