In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Carregar os dados tratados
df = pd.read_csv("../Data/Stroke.csv")

# Separar X e y
X = df.drop('stroke', axis=1)
y = df['stroke']

# Divisão dos dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Configurando o classificador KNN com os melhores parâmetros encontrados
knn_model = KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='kd_tree', p=2)

# Loop para rodar SelectKBest para cada valor de k
for k in range(1, X_train.shape[1] + 1):  # De 1 até 18 (total de features)
    print(f"\nAvaliação com k = {k} (Seleção das {k} melhores features):")
    
    # SelectKBest para o valor de k
    skb = SelectKBest(score_func=f_classif, k=k)
    X_train_k = skb.fit_transform(X_train, y_train)
    X_test_k = skb.transform(X_test)

    # Treinando o modelo com as k melhores features
    knn_model.fit(X_train_k, y_train)
    
    # Fazendo as previsões
    y_pred = knn_model.predict(X_test_k)
    
    # Imprimindo o classification report
    print(classification_report(y_test, y_pred))
    
    # Calculando a acurácia
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Acurácia: {accuracy:.4f}")



Avaliação com k = 1 (Seleção das 1 melhores features):
              precision    recall  f1-score   support

           0       0.77      0.73      0.75       951
           1       0.74      0.79      0.76       943

    accuracy                           0.76      1894
   macro avg       0.76      0.76      0.76      1894
weighted avg       0.76      0.76      0.76      1894

Acurácia: 0.7571

Avaliação com k = 2 (Seleção das 2 melhores features):
              precision    recall  f1-score   support

           0       0.83      0.71      0.77       951
           1       0.75      0.86      0.80       943

    accuracy                           0.78      1894
   macro avg       0.79      0.78      0.78      1894
weighted avg       0.79      0.78      0.78      1894

Acurácia: 0.7825

Avaliação com k = 3 (Seleção das 3 melhores features):
              precision    recall  f1-score   support

           0       0.83      0.71      0.77       951
           1       0.75      0.86  