In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


In [5]:
# Load the data
data = pd.read_csv('Allcars.csv')


In [6]:
# Remove Make (non-ordinal)
# Keep Volume and Doors (ordinal)
X = data[['Volume', 'Doors']].values
y = data['Style'].values

In [7]:
# Split: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [8]:
# Training.csv
train_df = pd.DataFrame(X_train, columns=['Volume', 'Doors'])
train_df['Style'] = y_train
train_df.to_csv('Training.csv', index=False)

# Testing.csv
test_df = pd.DataFrame(X_test, columns=['Volume', 'Doors'])
test_df['Style'] = y_test
test_df.to_csv('Testing.csv', index=False)

In [9]:
# Normalize Ordinal
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

In [10]:
# find highest accuracy
accuracy_results = []
best_k = 1
best_accuracy = 0
best_model = None

max_k = min(50, len(X_train) - 1)

for k in range(1, max_k + 1):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_norm, y_train)
    y_pred = knn.predict(X_test_norm)
    accuracy = accuracy_score(y_test, y_pred)
    
    accuracy_results.append({'K': k, 'Accuracy': accuracy})
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_k = k
        best_model = knn

print(f"Best K: {best_k}, Accuracy: {best_accuracy}")

Best K: 3, Accuracy: 0.6875


In [11]:
# Accuracy results
accuracy_df = pd.DataFrame(accuracy_results)
accuracy_df.to_csv('Accuracy.csv', index=False)

In [12]:
# Predictions

predictions = best_model.predict(X_test_norm)
probabilities = best_model.predict_proba(X_test_norm)
confidence = np.max(probabilities, axis=1)

# Update Testing.csv
testing_df = pd.read_csv('Testing.csv')
testing_df['Prediction'] = predictions
testing_df['Confidence'] = confidence
testing_df.to_csv('Testing.csv', index=False)