# DATA PREAPARATION

In [4]:
import random
import numpy as np
random.seed(100)

data_path = 'Iris.csv'
numeric_data = np.genfromtxt(data_path, delimiter=',', skip_header=1, usecols=[1, 2, 3, 4])
species = np.genfromtxt(data_path, delimiter=',', skip_header=1, usecols=[5], dtype=str)

unique_species = np.unique(species)
label_dict = {name: i for i, name in enumerate(unique_species)}
numeric_labels = np.array([label_dict[name] for name in species])

my_data = np.column_stack((numeric_data, numeric_labels))

print("First 5 rows of the processed dataset:")
for i in range(5):
    print(my_data[i])

Train_set = []
Val_set = []
Test_set = []

for sample in my_data:
    R = random.random()
    if R >= 0 and R <= 0.7:
        Train_set.append(sample)
    elif R > 0.7 and R <= 0.85:
        Val_set.append(sample)
    else:
        Test_set.append(sample)

print(f"Training set size: {len(Train_set)}")
print(f"Validation set size: {len(Val_set)}")
print(f"Test set size: {len(Test_set)}")

First 5 rows of the processed dataset:
[5.1 3.5 1.4 0.2 0. ]
[4.9 3.  1.4 0.2 0. ]
[4.7 3.2 1.3 0.2 0. ]
[4.6 3.1 1.5 0.2 0. ]
[5.  3.6 1.4 0.2 0. ]
Training set size: 103
Validation set size: 25
Test set size: 22


# KNN Classification 

In [5]:
def knn_classify(train_set, validation_sample, k=5):
    distances = []
    vx = validation_sample[:-1]
    
    for train_sample in train_set:
        tx = train_sample[:-1]
        distance = np.sqrt(np.sum((vx - tx)**2))
        distances.append((train_sample, distance))
    
    distances.sort(key=lambda x: x[1])
    k_nearest = distances[:k]
    k_nearest_classes = [sample[0][-1] for sample in k_nearest]
    
    from collections import Counter
    class_counts = Counter(k_nearest_classes)
    predicted_class = class_counts.most_common(1)[0][0]
    
    return predicted_class

def calculate_validation_accuracy(train_set, val_set, k):
    correct = 0
    
    for v_sample in val_set:
        true_class = v_sample[-1]
        predicted_class = knn_classify(train_set, v_sample, k)
        if predicted_class == true_class:
            correct += 1
    
    accuracy = (correct / len(val_set)) * 100
    return accuracy

# KNN CLASSIFICATION REPORT

In [6]:
k_values = [1, 3, 5, 10, 15]
validation_accuracies = {}

for k in k_values:
    accuracy = calculate_validation_accuracy(Train_set, Val_set, k)
    validation_accuracies[k] = accuracy
    print(f"Validation Accuracy for K={k}: {accuracy:.2f}%")

best_k = max(validation_accuracies, key=validation_accuracies.get)
print(f"\nBest K value: {best_k} with accuracy: {validation_accuracies[best_k]:.2f}%")

print("\nK\tValidation Accuracy")
print("-" * 25)
for k in k_values:
    print(f"{k}\t{validation_accuracies[k]:.2f}%")

test_accuracy = calculate_validation_accuracy(Train_set, Test_set, best_k)
print(f"\nTest Accuracy (K={best_k}): {test_accuracy:.2f}%")

Validation Accuracy for K=1: 96.00%
Validation Accuracy for K=3: 96.00%
Validation Accuracy for K=5: 100.00%
Validation Accuracy for K=10: 100.00%
Validation Accuracy for K=15: 96.00%

Best K value: 5 with accuracy: 100.00%

K	Validation Accuracy
-------------------------
1	96.00%
3	96.00%
5	100.00%
10	100.00%
15	96.00%

Test Accuracy (K=5): 100.00%
