#Francis Maga

#K Nearest Neighbor (KNN)

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sn
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv("sample_data/heart.csv")

In [None]:
data.info()

In [None]:
data['target'].value_counts().plot(kind='bar')
data['target'].value_counts()

In [None]:
x_columns = data.columns[:-1]

# Set up subplots
fig, axes = plt.subplots(nrows=len(x_columns)//3 + 1, ncols=3, figsize=(15, 5*len(x_columns)//3 + 1))
fig.subplots_adjust(hspace=0.5)

# Iterate over columns and create count plots
for i, column in enumerate(x_columns):
    row = i // 3
    col = i % 3

    sn.countplot(x=column, data=data, hue='target', ax=axes[row, col])
    axes[row, col].set_title(f'Count Plot for {column}')
    axes[row, col].set_xlabel(column)
    axes[row, col].set_ylabel('Count')

# Remove empty subplots
for i in range(len(x_columns), len(x_columns)//3 + 1 * 3):
    fig.delaxes(axes.flatten()[i])

plt.show()

In [None]:
data.isnull().sum()

#Training

In [None]:
X = data.drop('target', axis=1)
y = data['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors=50)

In [None]:
knn_classifier.fit(X_train, y_train)

In [None]:
y_pred = knn_classifier.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [None]:
print('\nConfusion Matrix:')
print(conf_matrix)

In [None]:
# Select two features for visualization (you can change these)
feature1 = 'age'
feature2 = 'thalach'

# Plotting function to show decision boundaries and data points
def plot_knn_decision_boundary(X_train, y_train, knn_classifier, feature1, feature2):
    plt.figure(figsize=(10, 8))

In [None]:
plt.figure(figsize=(8, 6))
sn.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Less Likely', 'Likely'], yticklabels=['Less Likely', 'Likely'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
print('\nClassification Report:')
print(classification_rep)