In [1]:
# Import necessary packages
from math import sqrt
from csv import reader
import pandas as pd
from sklearn.metrics import accuracy_score

In [2]:
# Load a CSV file
def load_csv(filename):
    dataset = []
    with open("Dataset/Play_Predictor.csv", 'r') as file:
        csv_data = reader(file)
        for row in csv_data:
            # print(row)
            dataset.append(row[1:])
    dataset = dataset[1:]

    col1, col2, col3 = [], [], []
    row_length = len(dataset[0][1])
    for i in range(0, row_length):
        for row in dataset:
            col1.append(row[0])
            col2.append(row[1])
            col3.append(row[2])

    col1 = pd.factorize(col1)[0]
    col2 = pd.factorize(col2)[0]
    col3 = pd.factorize(col3)[0]
    # print(col1)

    row_length = len(dataset)
    # print(row_length)
    for i in range(0, row_length):
        dataset[i][0] = col1[i]
        dataset[i][1] = col2[i]
        dataset[i][2] = col3[i]
    return dataset

In [3]:
######
#Demo.
######
dataset = load_csv('Dataset/Play_Predict.csv')
print(dataset)

[[0, 0, 0], [0, 0, 0], [1, 0, 1], [2, 1, 1], [2, 2, 1], [2, 2, 0], [1, 2, 1], [0, 1, 0], [0, 2, 1], [2, 1, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1], [2, 1, 0], [2, 1, 1], [2, 2, 1], [2, 2, 0], [1, 2, 1], [0, 1, 0], [0, 2, 1], [2, 1, 1], [0, 1, 1], [0, 0, 0], [0, 0, 0], [1, 0, 1], [2, 1, 1], [2, 2, 1], [1, 2, 1], [0, 1, 0], [0, 2, 1], [0, 0, 0], [1, 0, 1], [2, 1, 1], [2, 2, 1], [2, 2, 0], [1, 2, 1], [0, 1, 0], [0, 2, 1], [2, 1, 1], [2, 2, 0]]


In [4]:
# Calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1) - 1):
        distance += (row1[i] - row2[i]) ** 2
    return sqrt(distance)

In [5]:
######
#Demo.
######
test = [5,3,2,1]
euclidean_distance(test, dataset[0])

6.164414002968976

In [6]:
# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
    distances = list()
    for train_row in train:
        dist = euclidean_distance(test_row, train_row)
        distances.append((train_row, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors

In [7]:
######
#Demo.
######
get_neighbors(dataset, test, 5)

[[2, 2, 1], [2, 2, 1], [2, 2, 1], [2, 2, 1], [2, 1, 1]]

In [8]:
# Make a prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
    neighbors = get_neighbors(train, test_row, num_neighbors)
    output_values = [row[-1] for row in neighbors]
    prediction = max(set(output_values), key=output_values.count)
    return prediction

In [9]:
######
#Demo.
######
predict_classification(dataset, test, 5)

1

In [10]:
# Calculate accuracy
def accuracy(orignal_label, predicted_label):
    acc = accuracy_score(orignal_label, predicted_label, sample_weight=None)
    print("\nAccuracy :\n", acc * 100)

In [11]:
######
#Demo.
######
p = [0,0,0]
l = [0,0,0]
accuracy(p, l)


Accuracy :
 100.0


In [12]:
# Load the dataset from IRIS.csv
if __name__ == "__main__":
    filename = 'Dataset/Play_Predict.csv'
    dataset1 = load_csv(filename)
        
    # define model parameter
    num_neighbors = 5

    # define test data
    test_data = [[0, 2, 1],
                 [2, 1, 1],
                 [2, 2, 0]]

    # predict the label
    predict_lbl, ori_lbl = [], []
    for row in test_data:
        label = predict_classification(dataset, row, num_neighbors)
        print('Data=%s, Predicted label: %s, Original label: %s' % (row[0:3], label, row[-1]))
        predict_lbl.append(label)
        ori_lbl.append(row[-1])

    # Display accuracy
    accuracy(ori_lbl, predict_lbl)


Data=[0, 2, 1], Predicted label: 1, Original label: 1
Data=[2, 1, 1], Predicted label: 1, Original label: 1
Data=[2, 2, 0], Predicted label: 1, Original label: 0

Accuracy :
 66.66666666666666
