In [141]:
import csv
import math
from random import randrange, seed, shuffle, choice


In [142]:

# Function to load and preprocess the data
def load_csv(filename):
    data = []
    with open(filename, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            if not row:
                continue
            data.append(row)
    return data


In [143]:

# Convert string column to float
def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())


In [144]:

# Split a dataset into a train and test set
def train_test_split(dataset, split=0.70):
    train = list(dataset)
    train_size = split * len(dataset)
    test = list()
    while len(test) < len(dataset) - train_size:
        index = randrange(len(train))
        test.append(train.pop(index))
    return train, test


In [145]:

# Calculate Euclidean distance between two vectors
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):
        distance += (row1[i] - row2[i])**2
    return math.sqrt(distance)


In [146]:

# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
    distances = list()
    for train_row in train:
        dist = euclidean_distance(test_row, train_row)
        distances.append((train_row, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors


In [147]:

# Make a prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
    neighbors = get_neighbors(train, test_row, num_neighbors)
    output_values = [row[-1] for row in neighbors]
    prediction = max(set(output_values), key=output_values.count)
    return prediction


In [148]:

# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0


In [149]:

# KNN Algorithm
def k_nearest_neighbors(train, test, num_neighbors):
    predictions = list()
    for row in test:
        output = predict_classification(train, row, num_neighbors)
        predictions.append(output)
    return(predictions)


In [150]:

# Load and prepare data
filename = 'C:\\Users\\fhitl\\Documents\\Deep_Neural_Networks_Assignments\\datasets\\pima-indians-diabetes.csv'
dataset = load_csv(filename)

# Convert string attributes to integers
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)

# Evaluate algorithm
seed(1)
train, test = train_test_split(dataset, 0.70)

best_accuracy = 0
best_k = 0

for num_neighbors in [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]:
    predictions = k_nearest_neighbors(train, test, num_neighbors)
    actual = [row[-1] for row in test]
    accuracy = accuracy_metric(actual, predictions)
    print(f'Neighbors: {num_neighbors}, Accuracy: {accuracy:.2f}%')

    # Check if this is the best accuracy so far
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_k = num_neighbors

# Print the best k and its accuracy
print(f'\nBest number of neighbors: {best_k}, with Accuracy: {best_accuracy:.2f}%')

# Select a random individual from the test set
random_individual = choice(test)

# Predict for the random individual
predicted_class = predict_classification(train, random_individual, best_k)
print(f"\nSelected individual's data: {random_individual[:-1]}")
print(f"\nActual Class: {random_individual[-1]}")
print(f"\nPredicted Class (0 for non-diabetic, 1 for diabetic): {predicted_class}")

Neighbors: 1, Accuracy: 68.83%
Neighbors: 3, Accuracy: 71.43%
Neighbors: 5, Accuracy: 70.13%
Neighbors: 7, Accuracy: 73.16%
Neighbors: 9, Accuracy: 72.29%
Neighbors: 11, Accuracy: 72.73%
Neighbors: 13, Accuracy: 72.73%
Neighbors: 15, Accuracy: 73.59%
Neighbors: 17, Accuracy: 72.73%
Neighbors: 19, Accuracy: 71.43%
Neighbors: 21, Accuracy: 71.43%

Best number of neighbors: 15, with Accuracy: 73.59%

Selected individual's data: [1.0, 131.0, 64.0, 14.0, 415.0, 23.7, 0.389, 21.0]

Actual Class: 0

Predicted Class (0 for non-diabetic, 1 for diabetic): 0
