In [None]:
import pandas as pd
import numpy as np

data = pd.read_csv('car_evaluation.csv')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
data.rename(columns={'vhigh': 'buyprice', 'vhigh.1': 'maintainprice', '2': 'doors', '2.1': 'persons', 'small': 'luggagesize', 'low': 'safety', 'unacc':'rating'}, inplace=True)
data

In [None]:
for column in data.columns:
    if data[column].dtype =='object':
        unique_values = data[column].unique()
        print(f"Column '{column}' has {len(unique_values)} unique values: " )
        print(unique_values)
        print("\n")
    else:
        min_value = data[column].min()
        max_value = data[column].max()
        print(f"Column '{column}' has values in range from {min_value} to {max_value}")
        print("\n")

In [None]:
for column in data.columns:
    print(data[column].value_counts())
    print("\n")

In [12]:
def hamming_distance(instance1, instance2):
    return sum(val1 != val2 for val1, val2 in zip(instance1, instance2))

def knn(train_data, test_instance, k):
    distances = []

    for _, train_instance in train_data.iterrows():
        dist = hamming_distance(train_instance[:-1], test_instance)
        distances.append((train_instance, dist))

    distances.sort(key=lambda x: x[1])

    neighbors = [dist[0] for dist in distances[:k]]

    class_counts = {}
    for neighbor in neighbors:
        label = neighbor['rating']
        if label in class_counts:
            class_counts[label] += 1
        else:
            class_counts[label] = 1

    predicted_class = max(class_counts, key=class_counts.get)

    return predicted_class


In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

best_accuracy = 0.0
best_k = 0
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

accuracies = []

for k in range(1, 11):
    fold_accuracies = []

    for train_index, test_index in kf.split(data):
        train_data, test_data = data.iloc[train_index], data.iloc[test_index]
        
        predictions = []

        for _, test_instance in test_data.iterrows():
            predicted_class = knn(train_data, test_instance, k)
            predictions.append(predicted_class)

        true_labels = test_data['rating']
        accuracy = accuracy_score(true_labels, predictions)
        fold_accuracies.append(accuracy)

    avg_accuracy = np.mean(fold_accuracies)
    accuracies.append(avg_accuracy)
    
    if avg_accuracy > best_accuracy:
        best_accuracy = avg_accuracy
        best_k = k

print("Best accuracy:", best_accuracy)
print(f"Best k for the model is: {best_k}")
print(f"Best accuracy of the model for k={best_k} is: {best_accuracy * 100:.2f}%")


In [None]:
k = 1

num_folds = 5

kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

accuracy_scores = []

for train_index, test_index in kf.split(data):
    train_data, test_data = data.iloc[train_index], data.iloc[test_index]

    predictions = []

    for _, test_instance in test_data.iterrows():
        predicted_class = knn(train_data, test_instance, k)
        predictions.append(predicted_class)

    true_labels = test_data['rating']

    accuracy = accuracy_score(true_labels, predictions)
    accuracy_scores.append(accuracy)

mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)

print(f"Mean Accuracy: {mean_accuracy*100} %")
print(f"Standard Deviation of Accuracy: {std_accuracy}")


In [None]:
def hamming_distance(instance1, instance2):
    return sum(val1 != val2 for val1, val2 in zip(instance1, instance2))

def knn(train_data, test_instance, k):
    distances = []

    for _, train_instance in train_data.iterrows():
        dist = hamming_distance(train_instance[:-1], test_instance)
        distances.append((train_instance, dist))

    distances.sort(key=lambda x: x[1])

    neighbors = [dist[0] for dist in distances[:k]]

    class_counts = {}
    for neighbor in neighbors:
        label = neighbor['rating']
        if label in class_counts:
            class_counts[label] += 1
        else:
            class_counts[label] = 1

    predicted_class = max(class_counts, key=class_counts.get)

    return predicted_class

k = 3  
num_folds = 5
validation_size = 0.10

accuracies = []

for fold in range(num_folds):
    train_data, test_data = train_test_split(data, test_size=validation_size)
    correct_predictions = 0

    for _, test_instance in test_data.iterrows():
        predicted_class = knn(train_data, test_instance[:-1], k)
        actual_class = test_instance['rating']

        if predicted_class == actual_class:
            correct_predictions += 1

    accuracy = correct_predictions / len(test_data)
    accuracies.append(accuracy)

    print(f"Accuracy for Fold {fold + 1}: {accuracy* 100:.2f} %")

mean_accuracy = np.mean(accuracies)
max_accuracy = max(accuracies)
print(f"Mean Accuracy over {num_folds}-fold cross-validation: {mean_accuracy*100:.2f} %")
print(f"\n\nMaximum Accuracy among all folds: {max_accuracy*100:.2f} %")