In [392]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, RobustScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

#Obtendo dataset do repositório
url = 'https://raw.githubusercontent.com/professortiagoinfnet/inteligencia_artificial/refs/heads/main/heart.csv'
heart_df = pd.read_csv(url).dropna()

#Separando as features e o target
columns = list(heart_df.head())
target = columns.pop()
features = columns

#Separando os dados de treino dos de validação
x_train, x_val, y_train, y_val = train_test_split(heart_df[features], heart_df[target], train_size=0.80)

#Transformando variáveis categóricas com BinaryEncoding e OneHotEncoding,
#enquanto as contínuas tem suas escalas modificadas com StandardScaler.
f_continuous = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']
f_binary = ['Sex', 'ExerciseAngina']
f_multiclass = ['ChestPainType', 'RestingECG', 'ST_Slope']

preprocessor = ColumnTransformer(
    transformers = [
        ('continuous', RobustScaler(), f_continuous),
        ('binary', OrdinalEncoder(), f_binary),
        ('multiclass', OneHotEncoder(handle_unknown='ignore'), f_multiclass)
    ],
    remainder='passthrough'
)

x_train_transformed = preprocessor.fit_transform(x_train)
x_val_transformed = preprocessor.transform(x_val)

#Aplicando KNN como modelo de previsão e verificando
#a acurácia com diferentes k vizinhos
for k in range(10, 511, 50):
  knn = KNeighborsClassifier(n_neighbors=k)
  knn.fit(x_train_transformed, y_train)
  predicted = knn.predict(x_val_transformed)
  print(f'k: {k} accuracy: {accuracy_score(y_val, predicted):.2f}')


k: 10 accuracy: 0.90
k: 60 accuracy: 0.88
k: 110 accuracy: 0.88
k: 160 accuracy: 0.88
k: 210 accuracy: 0.87
k: 260 accuracy: 0.86
k: 310 accuracy: 0.85
k: 360 accuracy: 0.86
k: 410 accuracy: 0.85
k: 460 accuracy: 0.84
k: 510 accuracy: 0.84
