In [166]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [167]:
#Load CSV, remove non-ordinal features, and normalize ordinal features
def load_and_prepare_data(filepath):
    df = pd.read_csv(filepath)
    df = df[['Volume', 'Doors', 'Style']]
    scaler = MinMaxScaler()
    df[['Volume', 'Doors']] = scaler.fit_transform(df[['Volume', 'Doors']])
    return df

In [168]:
#Split into 80% training and 20% testing
def split_data(df, test_size=0.2):
    train, test = train_test_split(df, test_size=test_size)
    return train, test

In [169]:
#KNN and make predictions
def train_knn(X_train, y_train, X_test, k):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    return knn.predict(X_test), knn

In [170]:
#Iterate over K values to find the best accuracy
def calculate_accuracies(X_train, y_train, X_test, y_test):
    accuracies = []

    for k in range(1, 31):
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train, y_train)
        y_pred = knn.predict(X_test)
        accuracies.append(accuracy_score(y_test, y_pred))

    return range(1, 31), accuracies

In [171]:
#Add the prediction and confidence columns
def add_predictions_and_confidence(test_df, X_train, y_train, X_test, best_k):
    _, best_knn = train_knn(X_train, y_train, X_test, best_k)

    predictions = best_knn.predict(X_test)
    probabilities = best_knn.predict_proba(X_test)
    confidence = np.max(probabilities, axis=1)

    test_df['Prediction'] = predictions
    test_df['Confidence'] = confidence

    return test_df

In [172]:
#load and prepare data
df = load_and_prepare_data('/content/drive/MyDrive/Colab Notebooks/Labtask2/AllCars.csv')
train, test = split_data(df)
#save CSV files
train.to_csv('Training.csv', index=False)
test.to_csv('Testing.csv', index=False)

In [173]:
# Prepare features and target
X_train = train[['Volume', 'Doors']]
y_train = train['Style']
X_test = test[['Volume', 'Doors']]
y_test = test['Style']

In [174]:
# Iterate over K to find highest accuracy
k_values, accuracies = calculate_accuracies(X_train, y_train, X_test, y_test)

# Save
accuracy_df = pd.DataFrame({'K': k_values, 'Accuracy': accuracies})
accuracy_df.to_csv('Accuracy.csv', index=False)

# Print results
for i in range(len(k_values)):
    k = k_values[i]
    acc = accuracies[i]
    print(f"K={k}: Accuracy={acc:.4f}")

# Find best K
best_k = k_values[np.argmax(accuracies)]
best_accuracy = max(accuracies)
print(f"\n Best K = {best_k} with Accuracy = {best_accuracy:.4f}")

K=1: Accuracy=0.5806
K=2: Accuracy=0.3871
K=3: Accuracy=0.7097
K=4: Accuracy=0.7097
K=5: Accuracy=0.6774
K=6: Accuracy=0.6774
K=7: Accuracy=0.7419
K=8: Accuracy=0.7419
K=9: Accuracy=0.7097
K=10: Accuracy=0.6452
K=11: Accuracy=0.6774
K=12: Accuracy=0.6774
K=13: Accuracy=0.6774
K=14: Accuracy=0.6774
K=15: Accuracy=0.6774
K=16: Accuracy=0.6774
K=17: Accuracy=0.6774
K=18: Accuracy=0.6774
K=19: Accuracy=0.6774
K=20: Accuracy=0.6774
K=21: Accuracy=0.6452
K=22: Accuracy=0.6774
K=23: Accuracy=0.6774
K=24: Accuracy=0.6452
K=25: Accuracy=0.6774
K=26: Accuracy=0.6452
K=27: Accuracy=0.6129
K=28: Accuracy=0.6129
K=29: Accuracy=0.6129
K=30: Accuracy=0.6129

 Best K = 7 with Accuracy = 0.7419


In [175]:
# Add predictions and confidence to the test set
test = add_predictions_and_confidence(test, X_train, y_train, X_test, best_k)
test.to_csv('Testing.csv', index=False)

In [176]:
from google.colab import files
files.download('Training.csv')
files.download('Testing.csv')
files.download('Accuracy.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>