In [13]:
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [14]:
X, y = load_iris(return_X_y=True)
def split_data_and_normalize(X, y):
    # Note: stratify is only for classification, not regression
    # For regression problems (continuous target), we don't use stratify
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    print(f"Training set size: {X_train.shape[0]}")
    print(f"Test set size: {X_test.shape[0]}")

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled, y_train, y_test


X_train_scaled, X_test_scaled, y_train, y_test = split_data_and_normalize(X, y)

Training set size: 120
Test set size: 30


In [None]:
ks = [1, 3, 5, 11, 50]
neighs = [ KNeighborsClassifier(n_neighbors=k) for k in ks ]
for model in neighs:
    model.fit(X_train_scaled, y_train)

In [16]:
for k, model in zip(ks, neighs):
    y_train_pred = model.predict(X_train_scaled)
    y_test_pred = model.predict(X_test_scaled)

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    print(f"kNN model with k={k}")
    print(f"Training Accuracy: {train_accuracy:.4f} ({train_accuracy * 100:.2f}%)")
    print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy * 100:.2f}%)")
    print()

kNN model with k=1
Training Accuracy: 1.0000 (100.00%)
Test Accuracy: 1.0000 (100.00%)

kNN model with k=3
Training Accuracy: 0.9417 (94.17%)
Test Accuracy: 1.0000 (100.00%)

kNN model with k=4
Training Accuracy: 0.9583 (95.83%)
Test Accuracy: 1.0000 (100.00%)

kNN model with k=5
Training Accuracy: 0.9583 (95.83%)
Test Accuracy: 1.0000 (100.00%)

kNN model with k=11
Training Accuracy: 0.9583 (95.83%)
Test Accuracy: 1.0000 (100.00%)

