In [1]:
from math import sqrt, floor, inf
from statistics import mean
from scipy.spatial.distance import cdist
from files import readFile
import random
 
def get_neighbors(train, test_row, num_neighbors):
	distances = []
	for train_row in train:
		dist = cdist([test_row[:len(test_row)-1]], [train_row[:len(train_row)-1]], 'cityblock')
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	return [x for x in distances[:num_neighbors]]

def predict_classification(train, test_row, num_neighbors):
	l = [0, 0]
	neighbors = get_neighbors(train, test_row, num_neighbors)
	for x in neighbors:
		l[int(x[0][-1])] += 1 / x[1] if x[1] else inf
	return 0 if l[0] >= l[1] else 1

dataset = readFile("Breast_cancer_data.csv")

ll = [[0, 0] for x in range(len(dataset))]
for i, row in enumerate(dataset):
	for j, c in enumerate(row):
		ll[j][0] = min(c, ll[j][0])
		ll[j][1] = max(c, ll[j][1])

for i, row in enumerate(dataset):
	for j, c in enumerate(row):
		dataset[i][j] = (c - ll[j][0]) / (ll[j][1] - ll[j][0])

errors = []
for i in range(5):
	random.shuffle(dataset)
	x = floor(len(dataset) * 0.8)
	train = dataset[:x]
	test = dataset[x:]

	similarity_matrix = []
	error = 0
	for t in test:
		row = []
		for tt in train:
			row.append(cdist([t[:len(t)-1]], [tt[:len(tt)-1]], 'cityblock')[0][0])
		similarity_matrix.append(row)
		for k in [10, 20, 30]:
			prediction = predict_classification(train, t, k)
			print('Expected: %d, Got: %d.' % (t[-1], prediction))
			if prediction != t[-1]:
				error += 1
		errors.append(error / len(test))
	print(similarity_matrix)
print("Errors average:", mean(errors))

Expected: 0, Got: 1.
Expected: 0, Got: 1.
Expected: 0, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 0, Got: 0.
Expected: 0, Got: 0.
Expected: 0, Got: 0.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 0, Got: 1.
Expected: 0, Got: 1.
Expected: 0, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 0, Got: 0.
Expected: 0, Got: 0.
Expected: 0, Got: 0.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 1, Got: 1.
Expected: 0, Got: 0.
Expected: 0, Got: 0.
Expected: 0, Got: 0.
Expected: 0, Got: 0.
Expected: 0, Got: 0.
Expected: 0, 