# 5. Avaliação do Modelo

Este notebook avalia o impacto das novas features na performance do modelo.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc

# Configurações de visualização
plt.style.use('seaborn')
sns.set_palette('husl')
pd.set_option('display.max_columns', None)

In [None]:
# Carregar dados
data_path = Path('../data/heart_disease_processed.csv')
df = pd.read_csv(data_path)

## 1. Preparação dos Dados

In [None]:
# Separar features originais e novas
original_features = ['age', 'resting_blood_pressure', 'cholestoral', 'Max_heart_rate',
                    'oldpeak']

engineered_features = ['age_risk', 'bp_risk', 'cardiac_stress_index',
                      'combined_risk_score', 'age_heartrate_interaction',
                      'bp_cholesterol_interaction']

# Preparar conjuntos de dados
X_original = df[original_features]
X_engineered = df[original_features + engineered_features]
y = df['target']

# Normalizar
scaler = StandardScaler()
X_original_scaled = scaler.fit_transform(X_original)
X_engineered_scaled = scaler.fit_transform(X_engineered)

## 2. Avaliação com Cross-Validation

In [None]:
# Função para avaliar modelo
def evaluate_model(X, y, model):
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    return scores.mean(), scores.std()

# Criar modelos
rf_original = RandomForestClassifier(n_estimators=100, random_state=42)
rf_engineered = RandomForestClassifier(n_estimators=100, random_state=42)

# Avaliar
original_score, original_std = evaluate_model(X_original_scaled, y, rf_original)
engineered_score, engineered_std = evaluate_model(X_engineered_scaled, y, rf_engineered)

print(f"Score com features originais: {original_score:.3f} (+/- {original_std:.3f})")
print(f"Score com features engineered: {engineered_score:.3f} (+/- {engineered_std:.3f})")

## 3. Análise Detalhada do Melhor Modelo

In [None]:
# Dividir dados
X_train, X_test, y_train, y_test = train_test_split(
    X_engineered_scaled, y, test_size=0.2, random_state=42)

# Treinar modelo
rf_final = RandomForestClassifier(n_estimators=100, random_state=42)
rf_final.fit(X_train, y_train)

# Fazer previsões
y_pred = rf_final.predict(X_test)

# Relatório de classificação
print("Relatório de Classificação:")
print(classification_report(y_test, y_pred))

## 4. Curva ROC

In [None]:
# Calcular probabilidades
y_prob = rf_final.predict_proba(X_test)[:, 1]

# Calcular curva ROC
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

# Plotar
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2,
         label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()