In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

iris = load_iris()
x = iris.data[:, [0,1]]
y = iris.target

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1-x2) ** 2))

def knn_predict(x, x_train, y_train, k):
    distances = [euclidean_distance(x, xi) for xi in x_train]
    k_indices = np.argsort(distances)[:k]
    k_labels = y_train[k_indices]
    
    labels, counts = np.unique(k_labels, return_counts=True)
    return labels[np.argmax(counts)]

def compute_accuracy(x_val, y_val, x_train, y_train, k):
    correct = 0
    for x, y in zip(x_val, y_val):
        pred = knn_predict(x, x_train, y_train, k)
        if pred == y:
            correct += 1
    return correct / len(y_val)

np.random.seed(45)
indices = np.random.permutation(len(x))
split = int(0.7 * len(x))

train_idx = indices[:split]
val_idx = indices[split:]

x_train, y_train = x[train_idx], y[train_idx]
X_val, y_val = x[val_idx], y[val_idx]

k_candidates = range(1, 11)
best_k = None
best_acc = 0.0

print("k값별 정확도:")
for k in k_candidates:
    acc = compute_accuracy(X_val, y_val, x_train, y_train, k)
    print(f"k = {k} -> 정확도: {acc :.2f}")
    if acc > best_acc:
        best_acc = acc
        best_k = k

print(f"\n최적의 k값은: {best_k} (정확도: {best_acc :.2f})")

k값별 정확도:
k = 1 -> 정확도: 0.71
k = 2 -> 정확도: 0.64
k = 3 -> 정확도: 0.69
k = 4 -> 정확도: 0.62
k = 5 -> 정확도: 0.69
k = 6 -> 정확도: 0.73
k = 7 -> 정확도: 0.80
k = 8 -> 정확도: 0.71
k = 9 -> 정확도: 0.78
k = 10 -> 정확도: 0.76

최적의 k값은: 7 (정확도: 0.80)
