# Regressão Logística

In [None]:

import joblib
from multiprocessing import cpu_count
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import os
from scipy.stats import uniform, loguniform
from numpy import linspace

from sklearn.metrics import PrecisionRecallDisplay
from sklearn.metrics import f1_score

# I CONFIGURAÇÕES

In [None]:
# Defina os caminhos possíveis aqui
path_jorge = fr"G:\.shortcut-targets-by-id\1UEbcW3gnD82DQPOVUL7QL1Eitp5Qp_rx\Correções TCC\Tcc_aplicação\data"
path_elaine = "C:/Users/Elaine/Desktop/TCC_acidentes/data/" 


if os.path.exists(path_jorge):
    data_dir = path_jorge
    print(f"Ambiente detectado: Orientador (G:)")
elif os.path.exists(path_elaine):
    data_dir = path_elaine
    print(f"Ambiente detectado: Orientanda")
else:
    data_dir = "./dados" 
    print("Nenhum caminho absoluto encontrado. Tentando pasta local './dados'")

print(f"Lendo dados de: {data_dir}")

# 1 ABERTURA DOS DADOS

In [None]:
# Carregar o preprocess
preprocess = joblib.load('preprocess.pkl')

# Carregar os splits
X_train, X_test, y_train, y_test = joblib.load('data_splits.pkl')

X_train.info()
X_train.head()

# 2 OTMIZAÇÃO DO MODELO

In [None]:
# Pipeline
pipeline_lr = Pipeline(steps=[
    ('preprocess', preprocess),
    ('clf', LogisticRegression(max_iter=300))
])

# Grid
param_grid_lr = {
    'clf__solver': ['saga'],                
    'clf__penalty': ['elasticnet'],         
    'clf__C': loguniform(1e-2, 1e2),
    'clf__l1_ratio': uniform(0, 1),        
    'clf__class_weight': [None, 'balanced']
}

# Busca
random_lr = RandomizedSearchCV(
    estimator=pipeline_lr,
    param_distributions=param_grid_lr,  
    n_iter=5,                           
    scoring='f1',
    cv=5,
    n_jobs=cpu_count() // 2,
    random_state=42,
    verbose=1
)

# Fit
random_lr.fit(X_train, y_train)

# 3 DIAGNÓSTICO

## 3.1 Métricas Básicas

In [None]:
# Resultados
y_pred_lr = random_lr.predict(X_test)
print(f"Melhor F1 na Validação Cruzada: {random_lr.best_score_:.4f}")
print(classification_report(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))

## 3.2 Mudança de Limiar do Predict

In [None]:
y_proba = random_lr.predict_proba(X_test)[:, 1]

# 2. Teste vários limiares
thresholds = linspace(0, 0.9, 25)

print("Limiar | F1-Score | Recall | Precision")
print("---------------------------------------")
for th in thresholds:
    y_pred_th = (y_proba >= th).astype(int)
    f1 = f1_score(y_test, y_pred_th)
    # Calculando recall e precision manualmente para printar
    rec = sum((y_pred_th == 1) & (y_test == 1)) / sum(y_test == 1)
    prec = sum((y_pred_th == 1) & (y_test == 1)) / sum(y_pred_th == 1)
    
    print(f" {th:.2f}  |  {f1:.4f}  |  {rec:.4f} |  {prec:.4f}")