In [1]:
!git clone https://github.com/gabrielagcam/Detec-o-precoce-de-AVCs.git

Cloning into 'Detec-o-precoce-de-AVCs'...
remote: Enumerating objects: 124, done.[K
remote: Counting objects: 100% (124/124), done.[K
remote: Compressing objects: 100% (97/97), done.[K
remote: Total 124 (delta 48), reused 42 (delta 10), pack-reused 0 (from 0)[K
Receiving objects: 100% (124/124), 731.31 KiB | 6.77 MiB/s, done.
Resolving deltas: 100% (48/48), done.


In [2]:
# Instalação necessária
!pip install xgboost
!pip install tensorflow_decision_forests
!pip install tqdm tqdm_joblib

import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_decision_forests as tfdf

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from tqdm import tqdm
from tqdm_joblib import tqdm_joblib

Collecting tqdm_joblib
  Downloading tqdm_joblib-0.0.4-py3-none-any.whl.metadata (269 bytes)
Downloading tqdm_joblib-0.0.4-py3-none-any.whl (1.7 kB)
Installing collected packages: tqdm_joblib
Successfully installed tqdm_joblib-0.0.4


  from tqdm.autonotebook import tqdm


In [3]:
# Carregar os dados tratados
df = pd.read_csv("Detec-o-precoce-de-AVCs/Data/Stroke.csv")

# Separar X e y
X = df.drop('stroke', axis=1)
y = df['stroke']

In [4]:
# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [5]:
# Pipeline com SelectKBest e XGBoost
pipeline = Pipeline([
    ('kbest', SelectKBest(score_func=f_classif)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42))
])

In [6]:
# Grid de hiperparâmetros
param_grid = {
    'kbest__k': list(range(1, X_train.shape[1] + 1)),
    'xgb__n_estimators': [50],
    'xgb__max_depth': [3, 5, 10],
    'xgb__learning_rate': [0.01, 0.1, 0.3]
}

# Configurando o GridSearchCV com validação cruzada de 5 folds
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=0
)

# Estimativa total de iterações
total_iter = grid_search.cv * np.prod([len(v) for v in param_grid.values()])
print(f"Total de iterações estimadas: {total_iter}")

# Executando com barra de progresso
with tqdm_joblib(tqdm(desc="GridSearchCV", total=total_iter)) as progress_bar:
    grid_search.fit(X_train, y_train)

Total de iterações estimadas: 855


GridSearchCV:   0%|          | 0/855 [00:00<?, ?it/s]

  0%|          | 0/855 [00:00<?, ?it/s]

Parameters: { "use_label_encoder" } are not used.



In [7]:
# Resultados
print("\nMelhores hiperparâmetros encontrados:")
print(grid_search.best_params_)

print(f"Melhor acurácia média na validação cruzada: {grid_search.best_score_:.4f}")


Melhores hiperparâmetros encontrados:
{'kbest__k': 19, 'xgb__learning_rate': 0.3, 'xgb__max_depth': 10, 'xgb__n_estimators': 50}
Melhor acurácia média na validação cruzada: 0.9715


In [8]:
# Avaliação final no conjunto de teste
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

In [9]:
print("\nRelatório de Classificação:\n", classification_report(y_test, y_pred))
print("Matriz de Confusão:\n", confusion_matrix(y_test, y_pred))


Relatório de Classificação:
               precision    recall  f1-score   support

           0       1.00      0.95      0.97       951
           1       0.95      1.00      0.98       943

    accuracy                           0.97      1894
   macro avg       0.98      0.97      0.97      1894
weighted avg       0.98      0.97      0.97      1894

Matriz de Confusão:
 [[903  48]
 [  0 943]]
