In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, root_mean_squared_error, log_loss
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("../data/The_Cancer_data_1500_V2.csv")

X = df.iloc[:, :8]  # Features (first 8 columns)
y = df.iloc[:, 8]   # Target (9th column)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

for neighbors in range (1, 21):
    knn = KNeighborsClassifier(n_neighbors=neighbors, leaf_size=40)
    knn.fit(X_train_scaled, y_train)
    y_pred = knn.predict(X_test_scaled)
    y_pred_proba = knn.predict_proba(X_test_scaled)

    accuracy = accuracy_score(y_test, y_pred)
    mse = root_mean_squared_error(y_test, y_pred)
    nll = log_loss(y_test, y_pred)
    
    print(f"Neighbors: {neighbors}  |  Accuracy: {accuracy:.3f}  |  MSE: {mse:.3f}  |  NLL: {nll:.3f}")


Neighbors: 1  |  Accuracy: 0.863  |  MSE: 0.370  |  NLL: 4.926
Neighbors: 2  |  Accuracy: 0.860  |  MSE: 0.374  |  NLL: 5.046
Neighbors: 3  |  Accuracy: 0.883  |  MSE: 0.342  |  NLL: 4.205
Neighbors: 4  |  Accuracy: 0.897  |  MSE: 0.321  |  NLL: 3.725
Neighbors: 5  |  Accuracy: 0.900  |  MSE: 0.316  |  NLL: 3.604
Neighbors: 6  |  Accuracy: 0.890  |  MSE: 0.332  |  NLL: 3.965
Neighbors: 7  |  Accuracy: 0.897  |  MSE: 0.321  |  NLL: 3.725
Neighbors: 8  |  Accuracy: 0.887  |  MSE: 0.337  |  NLL: 4.085
Neighbors: 9  |  Accuracy: 0.890  |  MSE: 0.332  |  NLL: 3.965
Neighbors: 10  |  Accuracy: 0.877  |  MSE: 0.351  |  NLL: 4.445
Neighbors: 11  |  Accuracy: 0.893  |  MSE: 0.327  |  NLL: 3.845
Neighbors: 12  |  Accuracy: 0.877  |  MSE: 0.351  |  NLL: 4.445
Neighbors: 13  |  Accuracy: 0.867  |  MSE: 0.365  |  NLL: 4.806
Neighbors: 14  |  Accuracy: 0.857  |  MSE: 0.379  |  NLL: 5.166
Neighbors: 15  |  Accuracy: 0.863  |  MSE: 0.370  |  NLL: 4.926
Neighbors: 16  |  Accuracy: 0.857  |  MSE: 0.379 