In [9]:
import cv2
import numpy as np
import os
import pandas as pd

In [10]:
def turn_images_to_array(dir_name, my_data):

    for i in os.listdir(dir_name):
        img_load = cv2.imread(dir_name+'/'+i)  # loading the image from directory
        img=cv2.cvtColor(img_load, cv2.COLOR_BGR2GRAY) # reduce depth to 1 from 3
        smaller_image = cv2.resize(img, (32, 32), interpolation=cv2.INTER_CUBIC)  # resize the image
        #canny_edges = cv2.Canny(smaller_image,100,200)
        img = smaller_image.astype(np.float32)  # convert resized image to float array
        data1 = [img, i[0],[]] # img->image array i[0]->type []->nearest neighbors
        my_data.append(data1)  # collect all datas in a array

    return my_data

def calculateDistance(test,train):
    # Calculates Manhattan distance between test and train matrices
    a = abs(test.flatten()-train.flatten())
    x = sum(a)
    return x

def checkTypeOfImage(index):
    #returns type of the image at given index
    return df.loc[index,"type"]


def predictedType_KNN(test_index):
    covid = 0  # number of covids in the nearest to test data
    normal = 0
    viral = 0
    for t in df.loc[test_index,"nearestNeighbors"]:
        if checkTypeOfImage(t) == 'N':
            normal+=1
        elif checkTypeOfImage(t) == 'C':
            covid+=1
        elif checkTypeOfImage(t) == 'V':
            viral+=1
    result = max(covid,viral,normal)
    # returns result according to maximum
    if result == covid:
        return 'C'
    elif result == normal:
        return 'N'
    else:
        return 'V'

def weightedKNN_predictedType(test_index):
    covid = 0  # sum of distances of covids in the nearest to test data
    normal = 0
    viral = 0
    for neighbor_index in df.loc[test_index,"nearestNeighbors"]:
        if checkTypeOfImage(neighbor_index) == 'N':
            normal += distancesMatrix[test_index,neighbor_index]
        elif checkTypeOfImage(neighbor_index) == 'C':
            covid += distancesMatrix[test_index,neighbor_index]
        elif checkTypeOfImage(neighbor_index) == 'V':
            viral += distancesMatrix[test_index,neighbor_index]
    a = []
    if covid != 0:
        a.append(covid)
    if viral != 0:
        a.append(viral)
    if normal != 0:
        a.append(normal)
    result = min(a)
    # returns result according to minimum distance
    if result == covid:
        return 'C'
    elif result == normal:
        return 'N'
    else:
        return 'V'

In [11]:
data_array = []
# converts images to 32x32 array
data_array = turn_images_to_array('dataset/COVID',data_array)
data_array = turn_images_to_array('dataset/Viral Pneumonia', data_array)
data_array = turn_images_to_array('dataset/NORMAL', data_array)

In [12]:
# collects all data into df Dataframe (array of image, its type and its nearest neighbors)
# Nearest neighbors are empty, at first
df = pd.DataFrame(data_array,columns=["image","type","nearestNeighbors"])
df = df.sample(frac=1).reset_index(drop=True)  # Shuffle the data for k-fold cross validation


In [17]:
k = 7 # this is the 5 nearest neighbor algorithm
k_fold = 10 # data will split into 10 parts (1/10 -> test)
fold_size = int(len(df)/k_fold)  # size of each fold

In [14]:

distancesMatrix = np.ones((len(df),len(df))) # matrix that keeps distance between all data
for i in range(len(df)):
    for j in range(i,len(df)):
        distance = calculateDistance(df.loc[i,"image"],df.loc[j,"image"])
        distancesMatrix[i,j] = distance
        distancesMatrix[j,i] = distance

In [18]:
sum_of_knn_accurracies = 0
sum_of_weightedKnn_accurracies = 0

for i in range(k_fold):
    knn_correct_prediction = 0
    knn_wrong_prediction = 0
    weighted_knn_correct_pred = 0
    weighted_knn_wrong_pred = 0
    for testIndex in range(len(df)):
        sortedNeighbors = np.argsort(distancesMatrix[testIndex]) # sort distances and keeps distance's index
        for neighborIndex in sortedNeighbors: # iterates index of sorted neighbors
            if testIndex == neighborIndex: # if current data and data in the neighbors list are the same, pass
                continue
            if len(df.loc[testIndex,"nearestNeighbors"]) == k: # collects just k nearest neighbors
                break
            else:
                if neighborIndex not in range(i*fold_size,(i+1)*fold_size,1): # if neighbor is not in own fold
                    df.loc[testIndex,"nearestNeighbors"].append(neighborIndex) # add index of neighbor to nearest neighbor list

        if df.loc[testIndex,"type"] == predictedType_KNN(testIndex): # compare actual type and predicted type using knn algorithm
            knn_correct_prediction+=1
        else:
            knn_wrong_prediction+=1
        if df.loc[testIndex,"type"] == weightedKNN_predictedType(testIndex): # compare actual type and predicted type using knn algorithm
            weighted_knn_correct_pred += 1
        else:
            weighted_knn_wrong_pred += 1
        df.loc[testIndex,"nearestNeighbors"].clear() # for each fold, clear nearest neighbors
    knn_accurracy = (knn_correct_prediction/(knn_correct_prediction+knn_wrong_prediction))*100
    weighted_accurracy = (weighted_knn_correct_pred/(weighted_knn_correct_pred+weighted_knn_wrong_pred))*100
    sum_of_knn_accurracies += knn_accurracy
    sum_of_weightedKnn_accurracies += weighted_accurracy

avg_knn_accurracy = sum_of_knn_accurracies/k_fold
avg_weightedKnn_accurracy = sum_of_weightedKnn_accurracies/k_fold

In [19]:
print(avg_knn_accurracy)
print(avg_weightedKnn_accurracy)


93.04921196526213
68.51399163718239
