#### **Pré-processamento + Pipeline**

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np

# Separar colunas
colunas_num = X.select_dtypes(include=np.number).columns.tolist()
colunas_cat = X.select_dtypes(include='object').columns.tolist()

# Pré-processador
preprocessador = ColumnTransformer([
    ('num', StandardScaler(), colunas_num),
    ('cat', OneHotEncoder(handle_unknown='ignore'), colunas_cat)
])

### **Pipeline com modelo (ex: Regressão Logística)**


In [None]:
pipeline_log = Pipeline([
    ('preprocessador', preprocessador),
    ('modelo', LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42))
])

# Validação cruzada
resultados_log = cross_validate(
    pipeline_log, X_train, y_train,
    cv=5,
    scoring=['accuracy', 'precision', 'recall', 'f1'],
    return_train_score=True
)

pd.DataFrame(resultados_log).mean()

### **Pipeline com Árvore de Decisão**

In [None]:
pipeline_arvore = Pipeline([
    ('preprocessador', preprocessador),
    ('modelo', DecisionTreeClassifier(class_weight='balanced', max_depth=5, random_state=42))
])

resultados_arvore = cross_validate(
    pipeline_arvore, X_train, y_train,
    cv=5,
    scoring=['accuracy', 'precision', 'recall', 'f1'],
    return_train_score=True
)

pd.DataFrame(resultados_arvore).mean()

### **(Opcional) Ajuste com GridSearchCV dentro do pipeline**


In [None]:

from sklearn.model_selection import GridSearchCV

param_grid = {
    'modelo__max_depth': [3, 5, 10],
    'modelo__min_samples_split': [2, 5, 10]
}

pipeline_gs = Pipeline([
    ('preprocessador', preprocessador),
    ('modelo', DecisionTreeClassifier(class_weight='balanced', random_state=42))
])

grid = GridSearchCV(pipeline_gs, param_grid, cv=5, scoring='f1', n_jobs=-1)
grid.fit(X_train, y_train)

print("Melhores parâmetros:", grid.best_params_)