In [None]:
!pip install scikit-learn pandas numpy ucimlrepo



In [None]:
# --- Imports ---

import pandas as pd
import numpy as np
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score

In [None]:
# --- 1. Carregamento da base de dados ---
heart_disease = fetch_ucirepo(id=45)
X = heart_disease.data.features.copy()
y = heart_disease.data.targets['num']

In [None]:
# --- 2. Selecionar colunas relevantes (14 atributos) ---
categorical_cols = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal']
numeric_cols = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']

# --- 3. Pré-processamento ---

# Numéricos: imputação + normalização
numeric_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', MinMaxScaler())
])

# Categóricos: imputação + codificação
categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combinar ambos os pipelines
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_pipeline, numeric_cols),
    ('cat', categorical_pipeline, categorical_cols)
])


In [None]:
# --- 4. Pipeline completo: pré-processamento + seleção de características ---
full_pipeline = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('feature_selection', SelectKBest(score_func=chi2, k=6))
    # Seleção de K melhores atributos
])

# Aplicar o pipeline e transformar os dados
X_preprocessed = full_pipeline.fit_transform(X, y)

In [None]:
# --- 5. Definição dos classificadores ---
classifiers = {
    'SVM (RBF)': SVC(kernel='rbf', random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42)
}

In [None]:
# --- 6. Avaliação com validação cruzada (k=5) e múltiplas métricas ---
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Métricas de avaliação
scoring = {
    'Acurácia': make_scorer(accuracy_score),
    'Precisão': make_scorer(precision_score, average='macro', zero_division=0),
    'Recall': make_scorer(recall_score, average='macro', zero_division=0)
}


In [None]:
# --- 7. Executar avaliação ---
print("=== Avaliação dos Classificadores (k-fold = 5) ===")
for name, model in classifiers.items():
    print(f"\n{name}:")
    for metric, scorer in scoring.items():
        scores = cross_val_score(model, X_preprocessed, y, cv=cv, scoring=scorer)
        print(f"  {metric}: {scores.mean():.4f}")

=== Avaliação dos Classificadores (k-fold = 5) ===

SVM (RBF):
  Acurácia: 0.5875
  Precisão: 0.2854
  Recall: 0.3107

Decision Tree:
  Acurácia: 0.5578
  Precisão: 0.3565
  Recall: 0.3307


```.md
=== Avaliação dos Classificadores (k-fold = 5) ===

SVM (RBF):
  Acurácia: 0.5875
  Precisão: 0.2854
  Recall: 0.3107

Decision Tree:
  Acurácia: 0.5578
  Precisão: 0.3565
  Recall: 0.3307

```

