In [6]:
import numpy as np
import os
import pandas as pd
from PIL import Image

In [7]:
cars_path = 'C:/Users/muham/OneDrive/Masaüstü/FIZ-437E/HAM DATA/cars-gray/'
flowers_path = 'C:/Users/muham/OneDrive/Masaüstü/FIZ-437E/HAM DATA/flower-gray/'

cars = os.listdir(cars_path)
flowers = os.listdir(flowers_path)

In [8]:
train_size = 0.9

In [9]:
def img2vector(path, image_name):
    image = Image.open(os.path.join(path, image_name))
    vector = np.array(image)
    vector = vector.flatten()
    return vector

In [10]:
car_vectors = np.zeros((len(cars), 2500))
for i in range(0, len(cars)):
    car_vectors[i] = img2vector(cars_path, cars[i])

In [11]:
flowers_vectors = np.zeros((len(flowers), 2500))
for i in range(0, len(flowers)):
    flowers_vectors[i] = img2vector(flowers_path, flowers[i])

In [12]:
car = pd.DataFrame(car_vectors)
flowers_df = pd.DataFrame(flowers_vectors)

car['label'] = 1
flowers_df['label'] = -1

car_train = car.sample(frac=train_size, random_state=200)
car_test = car.drop(car_train.index)

flowers_train = flowers_df.sample(frac=train_size, random_state=200)
flowers_test = flowers_df.drop(flowers_train.index)

train_df = pd.concat([car_train, flowers_train])
test_df = pd.concat([car_test, flowers_test])

In [13]:
train_x = train_df.drop('label', axis=1)
train_y = train_df['label']

test_x = test_df.drop('label', axis=1)
test_y = test_df['label']

In [14]:
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y)**2))

In [15]:
def knn(x, y, k):
    distances = []
    for i in range(0, len(x)):
        distances.append(euclidean_distance(x.iloc[i], y))
    distances = np.array(distances)
    sorted_indices = np.argsort(distances)
    sorted_indices = sorted_indices[:k]
    return sorted_indices

def predict(x, y, k):
    indices = knn(x, y, k)
    labels = train_y.iloc[indices]
    return labels.mode()[0]

def accuracy(x, y, k):
    correct = 0
    for i in range(0, len(x)):
        prediction = predict(train_x, x.iloc[i], k)
        if prediction == y.iloc[i]:
            correct += 1
    return correct / len(x)


    

In [18]:
#k loop and plot all accuracies and k

import matplotlib.pyplot as plt

k_list = []
accuracy_train_list = []
accuracy_test_list = []


for k in range(1, 30,2):
    accuracy_train = accuracy(train_x, train_y, k)
    accuracy_test = accuracy(test_x, test_y, k)
    k_list.append(k)
    accuracy_train_list.append(accuracy_train)
    accuracy_test_list.append(accuracy_test)
    

plt.plot(k_list, accuracy_train_list, label='train')
plt.plot(k_list, accuracy_test_list, label='test')
plt.xlabel('k')
plt.ylabel('accuracy')
plt.show()