In [1]:
# ===========================================
# Classifica√ß√£o de C√¢ncer de Mama
# Comparando Regress√£o Log√≠stica e Random Forest
# ===========================================

#  1. Importar bibliotecas
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

#  2. Carregar dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

print("Dimens√µes do dataset:", X.shape)
print("Classes:", np.unique(y))
print("Descri√ß√£o das classes:", data.target_names)

#  3. Separar treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Escalonar dados (necess√°rio para Regress√£o Log√≠stica)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#  4. Treinar modelos
# Regress√£o Log√≠stica
log_reg = LogisticRegression(max_iter=500, random_state=42)
log_reg.fit(X_train_scaled, y_train)

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

#  5. Avaliar modelos
def avaliar(y_true, y_pred, modelo):
    print(f"\nüîπ Modelo: {modelo}")
    print("Acur√°cia:", round(accuracy_score(y_true, y_pred), 3))
    print("Precis√£o:", round(precision_score(y_true, y_pred), 3))
    print("Recall:", round(recall_score(y_true, y_pred), 3))
    print("F1-Score:", round(f1_score(y_true, y_pred), 3))
    print("Matriz de Confus√£o:\n", confusion_matrix(y_true, y_pred))

# Predi√ß√µes
y_pred_log = log_reg.predict(X_test_scaled)
y_pred_rf = rf.predict(X_test)

# Resultados
avaliar(y_test, y_pred_log, "Regress√£o Log√≠stica")
avaliar(y_test, y_pred_rf, "Random Forest")

#  6. Compara√ß√£o em Tabela
resultados = pd.DataFrame({
    "Modelo": ["Regress√£o Log√≠stica", "Random Forest"],
    "Acur√°cia": [
        accuracy_score(y_test, y_pred_log),
        accuracy_score(y_test, y_pred_rf)
    ],
    "Precis√£o": [
        precision_score(y_test, y_pred_log),
        precision_score(y_test, y_pred_rf)
    ],
    "Recall": [
        recall_score(y_test, y_pred_log),
        recall_score(y_test, y_pred_rf)
    ],
    "F1-Score": [
        f1_score(y_test, y_pred_log),
        f1_score(y_test, y_pred_rf)
    ]
})

print("\nüìä Resultados comparativos:")
display(resultados)


Dimens√µes do dataset: (569, 30)
Classes: [0 1]
Descri√ß√£o das classes: ['malignant' 'benign']

üîπ Modelo: Regress√£o Log√≠stica
Acur√°cia: 0.988
Precis√£o: 0.991
Recall: 0.991
F1-Score: 0.991
Matriz de Confus√£o:
 [[ 63   1]
 [  1 106]]

üîπ Modelo: Random Forest
Acur√°cia: 0.936
Precis√£o: 0.944
Recall: 0.953
F1-Score: 0.949
Matriz de Confus√£o:
 [[ 58   6]
 [  5 102]]

üìä Resultados comparativos:


Unnamed: 0,Modelo,Acur√°cia,Precis√£o,Recall,F1-Score
0,Regress√£o Log√≠stica,0.988304,0.990654,0.990654,0.990654
1,Random Forest,0.935673,0.944444,0.953271,0.948837
