In [1]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/AllCars.csv")
df.head()

Mounted at /content/drive


Unnamed: 0,Make,Volume,Doors,Style
0,Toyota,102,4,Sedan
1,Kia,121,5,SUV
2,Mazda,113,4,Sedan
3,Porshe,134,5,SUV
4,Chevrolet,134,5,SUV


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

RANDOM_SEED = 42

required = {"Make", "Volume", "Doors", "Style"}
missing = required - set(df.columns)
if missing:
    raise ValueError(f"AllCars.csv missing columns: {missing}")

X = df[["Volume", "Doors"]].copy()
y = df["Style"].copy()

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=["Volume", "Doors"])

resultant_df = pd.concat([y.reset_index(drop=True), X_scaled], axis=1)
resultant_df.columns = ["Style", "Volume", "Doors"]

train_df, test_df = train_test_split(
    resultant_df,
    test_size=0.2,
    random_state=RANDOM_SEED,
    shuffle=True
)

train_df.to_csv("Training.csv", index=False)
test_df.to_csv("Testing.csv", index=False)

X_train = train_df[["Volume", "Doors"]].to_numpy()
y_train = train_df["Style"].to_numpy()
X_test  = test_df[["Volume", "Doors"]].to_numpy()
y_test  = test_df["Style"].to_numpy()

max_k = min(50, len(train_df))
results = []

best_k = None
best_acc = -1.0
best_model = None

for k in range(1, max_k + 1):
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)

    results.append({"K": k, "Accuracy": acc})

    if acc > best_acc or (acc == best_acc and (best_k is None or k < best_k)):
        best_acc = acc
        best_k = k
        best_model = model

pd.DataFrame(results).to_csv("Accuracy.csv", index=False)

probas = best_model.predict_proba(X_test)
classes = best_model.classes_

pred_idx = probas.argmax(axis=1)
predictions = classes[pred_idx]
confidences = probas.max(axis=1)

testing_out = pd.read_csv("Testing.csv")
testing_out["Prediction"] = predictions
testing_out["Confidence"] = confidences
testing_out.to_csv("Testing.csv", index=False)

print(f"Best K = {best_k}, Best Accuracy = {best_acc:.4f}")
print("Wrote: Training.csv, Testing.csv (with Prediction/Confidence), Accuracy.csv")


Best K = 42, Best Accuracy = 0.6875
Wrote: Training.csv, Testing.csv (with Prediction/Confidence), Accuracy.csv


In [3]:
from google.colab import files
files.download("Training.csv")
files.download("Testing.csv")
files.download("Accuracy.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>