In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

data=pd.read_csv('heart.csv')

# Verificar se existem "null values" no dataset
if data.isnull().sum().any():
    print("Missing values in the dataset")

# Separar as Features e o Target
X=data.drop(columns='target', axis=1)
Y=data['target']

# Dividir os dados em training data & Test data, de forma aleatória
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)


#Standardização das features
#KNN é um algoritmo baseado na distância, por isso deve-se fazer Standardização
standard_X=StandardScaler()
X_train=standard_X.fit_transform(X_train)
X_test=standard_X.fit_transform(X_test)

# Verificar qual é o melhor valor de k (nº de neighbors)
error = []
# Calcular o erro para K's entre 1 e 30
for i in range(1, 30):
    knn_model = KNeighborsClassifier(n_neighbors=i)
    knn_model.fit(X_train, Y_train)
    pred_i = knn_model.predict(X_test)
    error.append(np.mean(pred_i != Y_test))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 30), error, color='red', linestyle='dashed', marker='o',
         markerfacecolor='blue', markersize=10)
plt.title('Error Rate (K Value)')
plt.xlabel('K Value')
plt.ylabel('Mean Error')
plt.show()
print("Minimum error:-",min(error),"at K =",error.index(min(error))+1)
K=error.index(min(error))+1

#K-Nearest Neighbors (KNN)
model = KNeighborsClassifier(n_neighbors=K, metric='euclidean')

#Treinar o modelo KNN com a Training Data
model.fit(X_train, Y_train)

# Avaliação do Modelo

#Accuracy on training data
X_train_prediction=model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy on Training Data: ', training_data_accuracy)

#Accuracy on test data
X_test_prediction=model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy on Test Data: ', test_data_accuracy)