## TP 1 - Algoritmos II

In [1]:
# Classe utilitária que representa um Nó individual em uma k-d Tree 

class Node:
    def __init__(self, is_leaf, considered_coordinate, position, point):
        """
        Controi um Nó para uma k-d Tree
        
        is_leaf: Booleano que indica se esse Nó é uma Folha na árvore ou se é um Nó interno 
        True ou False
        
        considered_coordinate: Qual coordenada dos pontos foi analisada para se construir esse Nó 
        0..n-1, onde n é a dimensão (qtde de coordenadas) dos pontos
        
        position: Posição ou valor do ponto na coordenada considerada
        Float
        OBS: Se 'point is not None' então position = point.positions[considered_coordinate]
        
        point: Se esse Nó não for interno (for folha), ele deve ter um 'point' como contúdo 
        """
        
        self.left = None
        self.right = None
        self.is_leaf = is_leaf 
        self.considered_coordinate = considered_coordinate
        self.position = position
        
        self.point = point
        
    def __str__(self):
        if self.is_leaf:
            return str(self.point)
        else:
            return str(self.position)
    
    def __repr__(self):
        if self.is_leaf:
            return str(self.point)
        else:
            return str(self.position)

In [2]:
# Classe utilitária que representa um ponto em um espaço k-dimensional qualquer

class Point:
    
    def __init__(self, positions, classification = None):
        """
        positions: Vetor (de tamanho k) de floats que representa a posição desse ponto em um espaço k-dimensional
        (Valores em X)
        
        classification: A classificação ou rótulo real desse ponto (Valor em y)
        
        predicted_classification: A classificação ou rótulo desse ponto (Valor em y') que foi computada pelo kNN
        caso esse ponto esteja no conjunto de teste. Esse valor não é recebido pelo construtor, mas deve ser 
        inicializado como None
        """
        self.positions = positions
        self.classification = classification
        self.predicted_classification = None
    
    def __str__(self):
        point_as_str = "[" + (", ".join(str(e) for e in self.positions)) + "]"
        
        if self.classification is not None:
            point_as_str = point_as_str + "-(" + str(self.classification) + ")"
        
        if self.predicted_classification is not None:
            point_as_str = point_as_str + "-{" + str(self.predicted_classification) + "}"
        
        return point_as_str
    
    def __repr__(self):
        point_as_str = "[" + (", ".join(str(e) for e in self.positions)) + "]"
        
        if self.classification is not None:
            point_as_str = point_as_str + "-(" + str(self.classification) + ")"
        
        if self.predicted_classification is not None:
            point_as_str = point_as_str + "-{" + str(self.predicted_classification) + "}"
        
        return point_as_str

In [3]:
# Função utilitária que recebe uma lista de pontos e uma determinada coordenada desses pontos e retorna
# uma nova lista contendo os mesmos pontos ordenados com base na coordenada passada como parâmetro

# OBS: Eu pretendia usar esse método para encontrar o ponto mediano: Uma vez que a lista estivesse ordenada,
# o ponto mediano estaria trivialmente na posição len(lista) / 2. Além disso, nessa mesma lista, todos os pontos
# à esquerda seriam menores que o mediano e todos à direita seriam maiores que o mediano, logo, seria 
# fácil fazer a separação dos pontos.
# Porém, usando algoritmos com a técnica "prune-and-search", é possível encontrar o ponto mediano e
# separações dos conjuntos de pontos igualmente satisfatórias (elas não necessariamente estariam ordenadas,
# mas isso não é necessário para o nosso caso) para nosso problema de gerar a k-d-tree em tempo linear 
# (usando merge-sort, teriamos uma complexidade desnecessária de O(n log(n)))

"""def merge(left, right, coordinate):
    l_counter = r_counter = 0
    sorted_arr = []
    
    while l_counter < len(left) and r_counter < len(right):
        val_left = left[l_counter]
        val_right = right[r_counter]
        
        if (val_left.positions[coordinate] <= val_right.positions[coordinate]):
            sorted_arr.append(val_left)
            l_counter += 1
        else:
            sorted_arr.append(val_right)
            r_counter += 1
    
    while l_counter < len(left):
        sorted_arr.append(left[l_counter])
        l_counter += 1
        
    while r_counter < len(right):
        sorted_arr.append(right[r_counter])
        r_counter += 1
        
    return sorted_arr

def merge_sort_points(points, coordinate):
    size = len(points)
    
    if size <= 1:
        return points
    
    middle = int((size - 1) / 2) + 1
    
    left = merge_sort_points(points[:middle], coordinate) # Processando a metade da esquerda
    right = merge_sort_points(points[middle:], coordinate) # Processando a metade da direita
    
    return merge(left, right, coordinate)

p1 = Point([5, 1])
p2 = Point([1, 2])
p3 = Point([2, 3])
p4 = Point([4, 4])
p5 = Point([9, 5])
p6 = Point([3, 6])

l1 = [p2, p6, p3, p4, p1, p5]
l2 = [p1, p2, p3, p4, p5, p6]
l3 = [p2, p3, p6, p4, p1, p5]

merge_sort_points(l3, 1)"""

'def merge(left, right, coordinate):\n    l_counter = r_counter = 0\n    sorted_arr = []\n    \n    while l_counter < len(left) and r_counter < len(right):\n        val_left = left[l_counter]\n        val_right = right[r_counter]\n        \n        if (val_left.positions[coordinate] <= val_right.positions[coordinate]):\n            sorted_arr.append(val_left)\n            l_counter += 1\n        else:\n            sorted_arr.append(val_right)\n            r_counter += 1\n    \n    while l_counter < len(left):\n        sorted_arr.append(left[l_counter])\n        l_counter += 1\n        \n    while r_counter < len(right):\n        sorted_arr.append(right[r_counter])\n        r_counter += 1\n        \n    return sorted_arr\n\ndef merge_sort_points(points, coordinate):\n    size = len(points)\n    \n    if size <= 1:\n        return points\n    \n    middle = int((size - 1) / 2) + 1\n    \n    left = merge_sort_points(points[:middle], coordinate) # Processando a metade da esquerda\n    rig

In [4]:
from random import randrange

# Função utilitária que recebe um conjunto de pontos e, considerando uma determinada coordenada (também
# recebida como parâmetro), encontra o k-ésimo menor ponto desse conjunto. Para encontrar o ponto mediano
# de um conjunto de tamanho n, basta executar quickSelect com k = n/2
# Goodrich e Tamassia, Cap 9.2.1
def quickSelect(points, k, coordinate):
    if len(points) == 1:
        return points[0]
    
    random_elem = points[randrange(len(points))]
    
    lesser = []
    equal = []
    greater = []
    
    for elem in points:
        if elem.positions[coordinate] < random_elem.positions[coordinate]:
            lesser.append(elem)
        elif elem.positions[coordinate] == random_elem.positions[coordinate]:
            equal.append(elem)
        else:
            greater.append(elem)
    
    if k <= len(lesser):
        return quickSelect(lesser, k, coordinate)
    elif k <= (len(lesser) + len(equal)):
        return random_elem
    else:
        return quickSelect(greater,( k-len(lesser)-len(equal) ), coordinate)

# QuickSelect para conjuntos de inteiros
"""def quickSelect(S, k):
    if len(S) == 1:
        return S[0]
    
    random_elem = S[randrange(len(S))]
    
    lesser = []
    equal = []
    greater = []
    
    for elem in S:
        if elem < random_elem:
            lesser.append(elem)
        elif elem == random_elem:
            equal.append(elem)
        else:
            greater.append(elem)
    
    if k <= len(lesser):
        return quickSelect(lesser, k)
    elif k <= (len(lesser) + len(equal)):
        return random_elem
    else:
        return quickSelect(greater,( k-len(lesser)-len(equal) ))
        
S = [0,1,2,3,4,5,6,7,8,9]
S1 = [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,2,2]
S2 = [5,8,0,1,7,3,6,9,4,2]

quickSelect(S2, 3)    
"""

'def quickSelect(S, k):\n    if len(S) == 1:\n        return S[0]\n    \n    random_elem = S[randrange(len(S))]\n    \n    lesser = []\n    equal = []\n    greater = []\n    \n    for elem in S:\n        if elem < random_elem:\n            lesser.append(elem)\n        elif elem == random_elem:\n            equal.append(elem)\n        else:\n            greater.append(elem)\n    \n    if k <= len(lesser):\n        return quickSelect(lesser, k)\n    elif k <= (len(lesser) + len(equal)):\n        return random_elem\n    else:\n        return quickSelect(greater,( k-len(lesser)-len(equal) ))\n        \nS = [0,1,2,3,4,5,6,7,8,9]\nS1 = [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,2,2]\nS2 = [5,8,0,1,7,3,6,9,4,2]\n\nquickSelect(S2, 3)    \n'

In [5]:
p1 = Point([1, 1])
p2 = Point([2, 2])
p3 = Point([3, 3])
p4 = Point([4, 4])
p5 = Point([5, 5])
p6 = Point([6, 6])
p7 = Point([7, 7])
p8 = Point([8, 8])

pontos1 = [p1, p2, p3, p4, p5, p6, p7, p8]
pontos2 = [p3, p6, p1, p7, p2, p8, p5, p4]
pontos3 = [p8, p3, p1, p7, p2, p6, p4, p5]
quickSelect(pontos3, 4, 0)

[4, 4]

In [6]:
# Função utilitária que recebe um conjunto de pontos e, para uma determinada coordenada desses pontos,
# encontra o ponto mediano e separa o conjunto original em dois subconjuntos da "esquerda" e "direita" 
# de tal forma que, no subconjunto da esquerda, se encontram todos os pontos que são menores ou iguais que 
# o mediano na coordenada que está sendo considerada e, no subconjunto da direita, se encontram os 
# pontos estritamente maiores que o ponto mediano. (Esses elementos NÃO estão necessariamente ordenados)

def find_median_point_and_separate(points, coordinate):
    
    median_index = int((len(points) - 1) / 2) + 1
    median_point = quickSelect(points, median_index, coordinate)
    
    lesser_or_equal = []
    greater = []
    
    for elem in points:
        if elem.positions[coordinate] <= median_point.positions[coordinate]:
            lesser_or_equal.append(elem)
        else:
            greater.append(elem)
    
    return median_point, lesser_or_equal, greater

In [7]:
p1 = Point([1, 1])
p2 = Point([2, 2])
p3 = Point([3, 3])
p4 = Point([4, 4])
p5 = Point([5, 5])
p6 = Point([6, 6])
p7 = Point([7, 7])
p8 = Point([8, 8])

pontos1 = [p1, p2, p3, p4, p5, p6, p7, p8]
pontos2 = [p3]
pontos3 = [p8, p3, p1, p7, p2, p6, p4]
median_point, lteq, gt = find_median_point_and_separate(pontos1, 0)
print(median_point)
print(lteq)
print(gt)

[4, 4]
[[1, 1], [2, 2], [3, 3], [4, 4]]
[[5, 5], [6, 6], [7, 7], [8, 8]]


In [8]:
# Classe utilitária que representa a estrutura de dados "Pilha"

class Heap:
    
    def __init__(self):
        self.values = []
        
    def push(self, val):
        self.values.append(val)
    
    def pop(self):
        last_index = len(self.values)
        if last_index < 0:
            return None
        
        return self.values.pop(last_index - 1)
    
    def is_empty(self):
        return len(self.values) == 0
    
    def print_values (self):
        print(self.values)

In [9]:
t = Heap()
t.print_values()
t.push(15)
t.print_values()
t.push(11)
t.print_values()
t.push(42)
t.print_values()
v1 = t.pop()
print(v1)
t.print_values()
v2 = t.pop()
print(v2)
t.print_values()
t.push(1)
t.print_values()

[]
[15]
[15, 11]
[15, 11, 42]
42
[15, 11]
11
[15]
[15, 1]


In [10]:
class Auxiliary_kdtree_instruction:
    
    def __init__ (self, node, initial_index, final_index, direction):
        self.node = node
        self.initial_index = initial_index
        self.final_index = final_index
        self.direction = direction # 'LEFT', 'RIGHT' ou 'NONE'

In [11]:
# Função utilitária para construir uma k-d Tree a partir de um conjunto de pontos k-dimensionais.
# Os pontos ficarão armazenados nos Nós folhas, sendo que os nós internos armazenarão apenas referencias
# dos valores usados para dividir o espaço em cortes ou hiperplanos de uma determinada dimensão dos pontos.
# Cada profundidade da árvore será computada seguindo uma ordem sequencial das dimensões dos pontos.
# Por exemplo, com pontos tridimensionais (x = 0, y = 1 e z = 2), a raíz da árvore verificará a
# dimensão 0, seus nos filhos verificarão a dimensão 1, seus netos verificarão a dimensão 2, seus
# bisnetos verificarão a dimensão 0 e assim por diante

def create_k_d_tree(points):
    #return create_k_d_tree_recursive(points, 0)
    return create_k_d_tree_iterative(points)

def create_k_d_tree_recursive(points, depth):
    if (len(points) == 0):
        return None

    dimensions = len(points[0].positions)
    considered_coordinate = depth % dimensions
    
    if len(points) == 1:
        leaf = Node(True, considered_coordinate, points[0].positions[considered_coordinate], points[0])
        return leaf
    else:
        median_point, lteq, gt = find_median_point_and_separate(points, considered_coordinate)
        
        internal_node = Node(False, considered_coordinate, median_point.positions[considered_coordinate], None)
        
        internal_node.left = create_k_d_tree_recursive(lteq, depth+1)
        internal_node.right = create_k_d_tree_recursive(gt, depth+1)
        
        return internal_node
    
def create_k_d_tree_iterative(points):
    if (len(points) == 0):
        return None
    
    dimensions = len(points[0].positions)
    
    aux_heap = Heap()
    dummy_node = Node(False, -1, None, None)
    
    root_inst = Auxiliary_kdtree_instruction(dummy_node, 0, len(points), "NONE")
    aux_heap.push(root_inst)
    root_node = None
    
    while not aux_heap.is_empty():
        current_instruction = aux_heap.pop()
        
        initial_index = current_instruction.initial_index
        final_index = current_instruction.final_index
        parent_node = current_instruction.node
        new_considered_coordinate = (parent_node.considered_coordinate + 1) % dimensions
        
        if (initial_index + 1) == final_index:
            # Apenas um ponto. Uma folha!
            leaf = Node(True, new_considered_coordinate, points[initial_index].positions[new_considered_coordinate], points[initial_index])
            
            if current_instruction.direction == "LEFT":
                parent_node.left = leaf
            elif current_instruction.direction == "RIGHT":
                parent_node.right = leaf
            elif current_instruction.direction == "NONE":
                root_node = leaf
            
        else:
            sub_array = points[initial_index:final_index]
            
            median_index = int((len(sub_array) - 1) / 2) + 1
            median_point, lteq, gt = find_median_point_and_separate(sub_array, new_considered_coordinate)
            concatenated_array = lteq + gt
            points[initial_index:final_index] = concatenated_array
            
            internal_node = Node(False, new_considered_coordinate, median_point.positions[new_considered_coordinate], None)
            
            if current_instruction.direction == "LEFT":
                parent_node.left = internal_node
            elif current_instruction.direction == "RIGHT":
                parent_node.right = internal_node
            elif current_instruction.direction == "NONE":
                root_node = internal_node
            
            left_inst = Auxiliary_kdtree_instruction(internal_node, initial_index, (initial_index + median_index), "LEFT")
            right_inst = Auxiliary_kdtree_instruction(internal_node, (initial_index + median_index), final_index, "RIGHT")
            aux_heap.push(left_inst)
            aux_heap.push(right_inst)
    
    return root_node

In [12]:
# Função utilitária auxiliar para fazer um caminhamento "inorder" em uma k-d-tree

def inorder(root):
    if root:
        inorder(root.left)
        
        print(root)
            
        inorder(root.right)

In [13]:
p1 = Point([1, 3])

points = [p1]
k_d_tree = create_k_d_tree(points)
inorder(k_d_tree)

[1, 3]


In [14]:
p1 = Point([1, 3])
p2 = Point([2, 2])
p3 = Point([4, 4])
p4 = Point([5, 7])

points = [p1, p2, p3, p4]

k_d_tree = create_k_d_tree(points)
inorder(k_d_tree)

[2, 2]
2
[1, 3]
2
[4, 4]
4
[5, 7]


In [15]:
p1 = Point([6.7, 5.3])
p2 = Point([1.8, 3.6])
p3 = Point([4.8, 1])
p4 = Point([7.6, 4.5])
p5 = Point([6.7, 4.3])
p6 = Point([3.4, 7])
p7 = Point([5.9, 3])
p8 = Point([4.6, 4.2])
p9 = Point([2, 8.9])
p10 = Point([3.7, 8.6])

points = [p1, p2, p3, p4, p5, p6, p7, p8, p9, p10]

k_d_tree = create_k_d_tree(points)
inorder(k_d_tree)

[1.8, 3.6]
3.6
[3.4, 7]
3.4
[4.6, 4.2]
7
[2, 8.9]
2
[3.7, 8.6]
4.6
[4.8, 1]
1
[5.9, 3]
5.9
[6.7, 4.3]
4.3
[6.7, 5.3]
6.7
[7.6, 4.5]


In [16]:
# Função utilitária que computa a distância euclidiana entre dois pointos. Note que, para economizar
# processamento, não tiramos a raíz quadrada durante o cálculo, logo, a distância retornada estará
# elevada ao quadrado

def euclidean_distance(p1, p2):
    dimensions = len(p1.positions)
    
    sum = 0
    
    for coord in range(dimensions):
        val_p1 = p1.positions[coord]
        val_p2 = p2.positions[coord]
        
        delta = val_p2 - val_p1
        sum += (delta**2)
    # return sqrt(sum)
    return sum

In [17]:
p1 = Point([1, 1])
p2 = Point([2, 2])
p3 = Point([1, 3])
p4 = Point([0, 0])
p5 = Point([8, 6])
p6 = Point([4, 2, 8])
p7 = Point([2, 9, 2])

print(euclidean_distance(p1, p2))
print(euclidean_distance(p1, p3))
print(euclidean_distance(p2, p3))
print(euclidean_distance(p4, p5))
print(euclidean_distance(p6, p7))

2
4
2
100
89


In [18]:
# Classe utilitária que recebe uma k-d-tree e um ponto e encontra o vizinho mais próximo desse ponto
# que esteja nessa árvore

class NearestNeighbour:

    def search_nearest_neighbour(self, k_d_tree_root, point):
        self.current_best = None
        self.k_d_tree_root = k_d_tree_root
        self.point = point
        self.search_nearest_neighbour_recursive(self.k_d_tree_root)
        return self.current_best
        
    def search_nearest_neighbour_recursive(self, k_d_tree_node):
        if k_d_tree_node.is_leaf:
            if self.current_best is None:
                self.current_best = k_d_tree_node.point
            else:
                current_node_distance = euclidean_distance(k_d_tree_node.point, self.point)
                current_best_distance = euclidean_distance(self.current_best, self.point)
                if current_node_distance < current_best_distance:
                    self.current_best = k_d_tree_node.point
        else:
            recursive_direction = ""
            
            if self.point.positions[k_d_tree_node.considered_coordinate] <= k_d_tree_node.position:
                recursive_direction = "LEFT"
                self.search_nearest_neighbour_recursive(k_d_tree_node.left)
            else:
                recursive_direction = "RIGHT"
                self.search_nearest_neighbour_recursive(k_d_tree_node.right)
                
            #-----------------------------------------
            #Verifying if its necessary to check the other subtree
            
            #cb_pos_val = self.current_best.positions[k_d_tree_node.considered_coordinate]
            distance_between_point_and_current_best = euclidean_distance(self.current_best, self.point)
            point_pos_val = self.point.positions[k_d_tree_node.considered_coordinate]
            hyperplane_split_value = k_d_tree_node.position
            
            if ((hyperplane_split_value - point_pos_val)**2) <= distance_between_point_and_current_best:
                # The hipersphere intersects the hiperplane, therefore, there could be better (closer) points
                # https://en.wikipedia.org/wiki/K-d_tree
                
                if recursive_direction == "LEFT":
                    self.search_nearest_neighbour_recursive(k_d_tree_node.right)
                else:
                    self.search_nearest_neighbour_recursive(k_d_tree_node.left)

In [19]:
p1 = Point([1, 3])
p2 = Point([2, 2])
p3 = Point([4, 4])
p4 = Point([5, 7])

points = [p1, p2, p3, p4]

k_d_tree = create_k_d_tree(points)
#inorder(k_d_tree)

px = Point([1, 1])
py = Point([5, 9])
pz = Point([3, 3])
pk = Point([5, 5])

nn = NearestNeighbour()
print(nn.search_nearest_neighbour(k_d_tree, px))
print(nn.search_nearest_neighbour(k_d_tree, py))
print(nn.search_nearest_neighbour(k_d_tree, pz))
print(nn.search_nearest_neighbour(k_d_tree, pk))

[2, 2]
[5, 7]
[4, 4]
[4, 4]


In [20]:
p1 = Point([1, 3])
p2 = Point([1, 8])
p3 = Point([2, 2])
p4 = Point([2, 10])
p5 = Point([3, 6])
p6 = Point([4, 1])
p7 = Point([5, 4])
p8 = Point([6, 8])
p9 = Point([7, 4])
p10 = Point([7, 7])
p11 = Point([8, 2])
p12 = Point([8, 5])
p13 = Point([9, 9])

points = [p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13]

k_d_tree = create_k_d_tree(points)
#inorder(k_d_tree)

px = Point([4, 8])

nn = NearestNeighbour()
print(nn.search_nearest_neighbour(k_d_tree, px))

[6, 8]


In [21]:
# Classe utilitária que recebe uma k-d-tree, um inteiro k e um ponto e encontra os k vizinhos 
# mais próximos desse ponto que estejam nessa árvore

class KNearestNeighbours:

    def search_k_nearest_neighbours(self, k, k_d_tree_root, point):
        self.k = k
        self.current_best_priority_queue = []
        self.k_d_tree_root = k_d_tree_root
        self.point = point
        self.search_k_nearest_neighbours_recursive(self.k_d_tree_root)
        return self.current_best_priority_queue
    
    def queue_is_full(self):
        return len(self.current_best_priority_queue) == self.k
    
    def find_farthest_best_point(self):
        farthest_distance = 0
        index_of_farthest_best_point = -1
        farthest_best_point = None

        counter = 0

        for one_best_point in self.current_best_priority_queue:
            one_best_point_distance = euclidean_distance(one_best_point, self.point)

            if one_best_point_distance > farthest_distance:
                farthest_distance = one_best_point_distance
                index_of_farthest_best_point = counter
                farthest_best_point = one_best_point

            counter += 1
        
        return farthest_best_point, index_of_farthest_best_point, farthest_distance
    
    def check_value_and_update_queue(self, k_d_tree_node):
        if not self.queue_is_full():
            self.current_best_priority_queue.append(k_d_tree_node.point)
        else:
            
            farthest_best_point, index_of_farthest_best_point, farthest_distance = self.find_farthest_best_point()
            
            current_node_distance = euclidean_distance(k_d_tree_node.point, self.point)
            
            if current_node_distance < farthest_distance:
                self.current_best_priority_queue.pop(index_of_farthest_best_point)
                self.current_best_priority_queue.append(k_d_tree_node.point)
        
    def search_k_nearest_neighbours_recursive(self, k_d_tree_node):
        if k_d_tree_node is None:
            return
        
        if k_d_tree_node.is_leaf:
            self.check_value_and_update_queue(k_d_tree_node)
        else:
            recursive_direction = ""
            
            if self.point.positions[k_d_tree_node.considered_coordinate] <= k_d_tree_node.position:
                recursive_direction = "LEFT"
                self.search_k_nearest_neighbours_recursive(k_d_tree_node.left)
            else:
                recursive_direction = "RIGHT"
                self.search_k_nearest_neighbours_recursive(k_d_tree_node.right)
            
            #-----------------------------------------
            #Verifying if its necessary to check the other subtree 
            
            farthest_best_point, index_of_farthest_best_point, farthest_distance = self.find_farthest_best_point()
            #OBS: 'farthest_distance' is the radius of the hipersphere
            
            #Instead of using the following value, use 'farthest_distance' to compare
            #farthest_best_point_pos_val = farthest_best_point.positions[k_d_tree_node.considered_coordinate]
            point_pos_val = self.point.positions[k_d_tree_node.considered_coordinate]
            hyperplane_split_value = k_d_tree_node.position
            
            if (not self.queue_is_full()) or (((hyperplane_split_value - point_pos_val)**2) <= farthest_distance):
                if recursive_direction == "LEFT":
                    self.search_k_nearest_neighbours_recursive(k_d_tree_node.right)
                else:
                    self.search_k_nearest_neighbours_recursive(k_d_tree_node.left)

In [22]:
p1 = Point([1, 3])
p2 = Point([2, 2])
p3 = Point([4, 4])
p4 = Point([5, 7])

points = [p1, p2, p3, p4]

k_d_tree = create_k_d_tree(points)
#inorder(k_d_tree)

px = Point([1, 1])
py = Point([5, 9])
pz = Point([3, 3])
pk = Point([5, 5])

knn = KNearestNeighbours()
print(knn.search_k_nearest_neighbours(1, k_d_tree, px))
print(knn.search_k_nearest_neighbours(1, k_d_tree, py))
print(knn.search_k_nearest_neighbours(1, k_d_tree, pz))
print(knn.search_k_nearest_neighbours(1, k_d_tree, pk))
print("---")
print(knn.search_k_nearest_neighbours(2, k_d_tree, px))
print(knn.search_k_nearest_neighbours(2, k_d_tree, py))
print(knn.search_k_nearest_neighbours(2, k_d_tree, pz))
print(knn.search_k_nearest_neighbours(2, k_d_tree, pk))

[[2, 2]]
[[5, 7]]
[[4, 4]]
[[4, 4]]
---
[[2, 2], [1, 3]]
[[5, 7], [4, 4]]
[[4, 4], [2, 2]]
[[5, 7], [4, 4]]


In [23]:
p1 = Point([1, 3])
p2 = Point([1, 8])
p3 = Point([2, 2])
p4 = Point([2, 10])
p5 = Point([3, 6])
p6 = Point([4, 1])
p7 = Point([5, 4])
p8 = Point([6, 8])
p9 = Point([7, 4])
p10 = Point([7, 7])
p11 = Point([8, 2])
p12 = Point([8, 5])
p13 = Point([9, 9])

points = [p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13]

k_d_tree = create_k_d_tree(points)
#inorder(k_d_tree)

px = Point([4, 8])

knn = KNearestNeighbours()

print(knn.search_k_nearest_neighbours(1, k_d_tree, px))
print(knn.search_k_nearest_neighbours(2, k_d_tree, px))
print(knn.search_k_nearest_neighbours(3, k_d_tree, px))
print(knn.search_k_nearest_neighbours(4, k_d_tree, px))
print(knn.search_k_nearest_neighbours(5, k_d_tree, px))

[[6, 8]]
[[3, 6], [6, 8]]
[[3, 6], [2, 10], [6, 8]]
[[3, 6], [1, 8], [2, 10], [6, 8]]
[[3, 6], [1, 8], [2, 10], [6, 8], [7, 7]]


In [24]:
import pandas as pd
import random
import math

class KNNClassifierAlgorithm:
    
    def __init__(self, original_dataset):
        self.original_dataset = original_dataset
        
        #------------ Creating the set of points based on the original Dataset ------------
        self.set_of_all_points = []
        #number_of_columns = original_dataset.shape[1]
        #X = dataset.iloc[:, 0:(number_of_columns-1)]     # The first columns in our Dataset, except the last one
        #y = dataset.iloc[:, (number_of_columns-1)]       # The last column
        for index, row in original_dataset.iterrows():
            row_as_array = row.to_numpy()
            number_of_columns = len(row_as_array)
            x = row_as_array[0:(number_of_columns-1)]     # The first columns in our Dataset, except the last one
            y = row_as_array[(number_of_columns-1)]       # The last column
            
            p = Point(x, y)
            self.set_of_all_points.append(p)
        
        #------------ Separating Train and Test subsets ------------
        random.shuffle(self.set_of_all_points)
        train_proportion = 0.70
        index_separator = int(len(self.set_of_all_points) * train_proportion)
        
        self.Train = self.set_of_all_points[:index_separator]
        self.Test = self.set_of_all_points[index_separator:]
        
        #------------ Computing the k-d-tree with the Train subset ------------
        self.k_d_tree = create_k_d_tree(self.Train)
        
        #------------ Computing the optimal 'k' value------------
        
        optimal_k = int(math.sqrt(len(self.set_of_all_points)))
        if optimal_k % 2 == 0:
            optimal_k -= 1
        #optimal_k = 10
        
        #------------ Predicting the classification with the Test subset ------------
        self.hits = 0
        self.misses = 0
        
        knn = KNearestNeighbours()
        
        for test_point in self.Test:
            k_nearest_neighbours = knn.search_k_nearest_neighbours(optimal_k, self.k_d_tree, test_point)
            
            class_dict = {}
            
            for nearest_neighbour in k_nearest_neighbours:
                if nearest_neighbour.classification in class_dict:
                    class_dict[nearest_neighbour.classification] += 1
                else:
                    class_dict[nearest_neighbour.classification] = 1
            
            majority_class = None
            majority_number = 0

            for k, v in class_dict.items():
                if v > majority_number:
                    majority_number = v
                    majority_class = k
            
            #print("Predicted class: ", majority_class, " -- Actual class: ", test_point.classification)
            test_point.predicted_classification = majority_class
            
            #Comparing the predicted value with the actual real value
            if majority_class == test_point.classification:
                self.hits += 1
            else:
                self.misses += 1
            
        #------------ Computing the accuracy score ------------
        self.accuracy_score = self.hits / len(self.Test)
        
        #------------ Computing the precision and recall scores ------------
        
        class_dict_with_true_and_false_positives = {}
            
        for test_point in self.Test:
            if test_point.predicted_classification not in class_dict_with_true_and_false_positives:
                class_dict_with_true_and_false_positives[test_point.predicted_classification] = [0, 0]
                # [num of true positives, num of false positives]
            
            if test_point.predicted_classification == test_point.classification:
                true_positives = class_dict_with_true_and_false_positives[test_point.predicted_classification][0]
                true_positives += 1
                class_dict_with_true_and_false_positives[test_point.predicted_classification][0] = true_positives
            else:
                false_positives = class_dict_with_true_and_false_positives[test_point.predicted_classification][1]
                false_positives += 1
                class_dict_with_true_and_false_positives[test_point.predicted_classification][1] = false_positives
                
        self.precision_and_recall_per_class = []
        
        for k, v in class_dict_with_true_and_false_positives.items():
            precision = v[0] / (v[0] + v[1])
            recall = v[0] / self.num_of_real_occurrences_of_class(k)
            
            precision_and_recall = {
                "class" : k,
                "precision" : precision,
                "recall" : recall
            }
            
            self.precision_and_recall_per_class.append(precision_and_recall)
            
    def num_of_real_occurrences_of_class(self, point_class):
        num = 0
        for test_point in self.Test:
            if test_point.classification == point_class:
                num += 1
        
        return num
        
    def print_info(self):
        print("----- Train subset: (", len(self.Train), " elements) -----")
        for train_point in self.Train:
            print(train_point)
        print()
        print("----- Test subset: (", len(self.Test), " elements, with predicted value) -----")
        for test_point in self.Test:
            print(test_point)
        print()
        print("----- Hits: ", self.hits, " -----")
        print("----- Misses: ", self.misses, " -----")
        print("----- Accuracy score: ", self.accuracy_score, " -----")
        print("----- Precision and Recall per class: -----")
        for precision_and_recall in self.precision_and_recall_per_class:
            print("For class (", precision_and_recall["class"], ") , the precision is ", precision_and_recall["precision"], " and the recall is ", precision_and_recall["recall"])
        

Até então, praticamente todas as funções utilitárias que precisaremos já foram implementadas. Agora, podemos realmente começar a trabalhar com um dataset real e fazer a previsão e classificação com kNN.

In [25]:
def load_dataset(path, max_size = 2000):
    # OBS: Tive SERÍSSIMOS problemas com datasets muito grandes. O Kernel do meu Jupyter estava morrendo
    # a todo momento quando se tentava gerar a k-d-tree para alguns datasets maiores, logo, para 
    # contornar esse problema, eu carrego o dataset e, se ele possuir, por padrão, mais que 1000 entradas,
    # eu randomizo suas linhas e pego suas 1000 primeiras entradas para serem usadas.
    # (Nos datasets mais pesados, tive que diminuir a quantidade de linhas processadas para 100!!!)
    
    dataset = pd.read_csv(path)
    
    if len(dataset) > max_size:
        dataset = dataset.sample(frac=1)
        dataset = dataset.head(max_size)
    
    return dataset

### banana

In [26]:
dataset = load_dataset('datasets_tratados/banana.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 1400  elements) -----
[-1.91, -2.24]-(1.0)
[-1.86, -1.78]-(1.0)
[-1.84, -1.64]-(1.0)
[-1.7, -1.79]-(1.0)
[-1.65, -2.05]-(1.0)
[-1.8, -1.71]-(1.0)
[-2.14, -1.59]-(1.0)
[-1.86, -1.57]-(1.0)
[-2.05, -1.4]-(1.0)
[-1.66, -1.52]-(1.0)
[-1.71, -1.27]-(1.0)
[-1.64, -1.8]-(1.0)
[-1.62, -1.85]-(1.0)
[-1.6, -1.44]-(1.0)
[-1.57, -1.74]-(1.0)
[-1.4, -1.54]-(1.0)
[-1.55, -1.47]-(1.0)
[-1.63, -1.41]-(1.0)
[-1.45, -1.29]-(1.0)
[-1.42, -1.28]-(1.0)
[-1.25, -1.28]-(-1.0)
[-1.14, -1.27]-(1.0)
[-1.92, -1.06]-(1.0)
[-1.8, -1.25]-(1.0)
[-2.1, -1.06]-(1.0)
[-1.69, -1.2]-(1.0)
[-1.53, -1.21]-(1.0)
[-1.52, -1.15]-(1.0)
[-1.94, -1.03]-(1.0)
[-1.87, -1.0]-(1.0)
[-2.04, -0.978]-(1.0)
[-1.59, -1.0]-(1.0)
[-1.52, -0.993]-(1.0)
[-1.81, -1.21]-(1.0)
[-1.35, -1.19]-(1.0)
[-1.39, -1.09]-(1.0)
[-1.31, -1.16]-(1.0)
[-1.21, -1.22]-(-1.0)
[-1.19, -1.14]-(1.0)
[-1.38, -1.08]-(1.0)
[-1.28, -1.07]-(1.0)
[-1.26, -1.01]-(1.0)
[-1.19, -1.03]-(-1.0)
[-1.2, -1.02]-(-1.0)
[-1.01, -1.47]-(-1.0)
[-0.98, -1.49]-(

### ecoli1

In [27]:
dataset = load_dataset('datasets_tratados/ecoli1.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 188  elements) -----
[0.38, 0.3, 0.48, 0.5, 0.43, 0.29, 0.39]-(negative)
[0.44, 0.42, 0.48, 0.5, 0.42, 0.25, 0.2]-(negative)
[0.24, 0.34, 0.48, 0.5, 0.38, 0.3, 0.4]-(negative)
[0.27, 0.42, 0.48, 0.5, 0.37, 0.38, 0.43]-(negative)
[0.35, 0.37, 0.48, 0.5, 0.3, 0.34, 0.43]-(negative)
[0.34, 0.33, 0.48, 0.5, 0.38, 0.35, 0.44]-(negative)
[0.39, 0.32, 0.48, 0.5, 0.46, 0.24, 0.35]-(negative)
[0.48, 0.42, 0.48, 0.5, 0.45, 0.25, 0.35]-(negative)
[0.17, 0.39, 0.48, 0.5, 0.53, 0.3, 0.39]-(negative)
[0.34, 0.35, 0.48, 0.5, 0.51, 0.49, 0.56]-(negative)
[0.36, 0.41, 0.48, 0.5, 0.48, 0.47, 0.54]-(negative)
[0.48, 0.41, 0.48, 0.5, 0.51, 0.9, 0.88]-(positive)
[0.35, 0.34, 0.48, 0.5, 0.46, 0.3, 0.27]-(negative)
[0.52, 0.36, 0.48, 0.5, 0.41, 0.28, 0.38]-(negative)
[0.32, 0.42, 0.48, 0.5, 0.35, 0.28, 0.38]-(negative)
[0.34, 0.42, 0.48, 0.5, 0.41, 0.34, 0.43]-(negative)
[0.4, 0.29, 0.48, 0.5, 0.42, 0.35, 0.44]-(negative)
[0.0, 0.38, 0.48, 0.5, 0.42, 0.48, 0.55]-(negative)
[0.18, 0.3, 0

### glass0

In [28]:
dataset = load_dataset('datasets_tratados/glass0.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 120  elements) -----
[1.51768786, 12.55875, 3.52016, 1.42955, 73.1476, 0.57132, 8.53964, 0.0, 0.0]-(positive)
[1.51645774, 13.03755, 3.39893, 1.25942, 73.0076, 0.52164, 8.58268, 0.0, 0.0]-(negative)
[1.51215232, 12.991, 3.47077, 1.12139, 72.9796, 0.621, 8.34596, 0.0, 0.1581]-(positive)
[1.51643496, 12.15975, 3.52016, 1.3493, 72.89, 0.57132, 8.52888, 0.0, 0.0]-(negative)
[1.5175284, 12.57205, 3.47077, 1.3814, 73.3884, 0.60237, 8.5504, 0.0, 0.0306]-(positive)
[1.51768786, 12.45235, 2.71196, 1.29152, 73.702, 0.5589, 9.05612, 0.0, 0.1224]-(positive)
[1.51743728, 12.19965, 3.25076, 1.15991, 73.5508, 0.621, 8.89472, 0.0, 0.1224]-(negative)
[1.51652608, 11.94695, 0.0, 1.1888, 75.1804, 2.70135, 8.927, 0.0, 0.0]-(negative)
[1.51591102, 12.8181, 3.52016, 1.90142, 72.862, 0.68931, 7.96936, 0.0, 0.0]-(negative)
[1.5159338, 13.09075, 3.52016, 1.55153, 72.8676, 0.6831, 8.04468, 0.0, 0.459]-(negative)
[1.51315464, 13.0176, 0.0, 3.04097, 70.482, 6.21, 6.95792, 0.0, 0.0]-(negative

### haberman

In [29]:
dataset = load_dataset('datasets_tratados/haberman.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 171  elements) -----
[39.0, 58.0, 0.0]-(negative)
[42.0, 59.0, 0.0]-(positive)
[43.0, 60.0, 0.0]-(negative)
[33.0, 60.0, 0.0]-(negative)
[34.0, 60.0, 1.0]-(negative)
[37.0, 58.0, 0.0]-(negative)
[37.0, 60.0, 0.0]-(negative)
[38.0, 60.0, 0.0]-(negative)
[40.0, 58.0, 0.0]-(negative)
[38.0, 60.0, 1.0]-(negative)
[41.0, 59.0, 0.0]-(negative)
[45.0, 60.0, 0.0]-(negative)
[49.0, 61.0, 0.0]-(negative)
[50.0, 61.0, 0.0]-(negative)
[50.0, 59.0, 0.0]-(negative)
[52.0, 61.0, 0.0]-(negative)
[53.0, 60.0, 1.0]-(negative)
[44.0, 61.0, 0.0]-(negative)
[46.0, 62.0, 0.0]-(negative)
[50.0, 61.0, 0.0]-(negative)
[49.0, 62.0, 1.0]-(negative)
[52.0, 62.0, 1.0]-(negative)
[43.0, 59.0, 2.0]-(positive)
[38.0, 59.0, 2.0]-(negative)
[46.0, 58.0, 2.0]-(positive)
[31.0, 59.0, 2.0]-(negative)
[37.0, 59.0, 6.0]-(negative)
[41.0, 59.0, 8.0]-(negative)
[42.0, 61.0, 4.0]-(negative)
[30.0, 62.0, 3.0]-(negative)
[46.0, 62.0, 5.0]-(positive)
[37.0, 60.0, 15.0]-(negative)
[41.0, 60.0, 23.0]-(positive

### iris0

In [30]:
dataset = load_dataset('datasets_tratados/iris0.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 84  elements) -----
[4.3, 3.0, 1.1, 0.1]-(positive)
[4.4, 3.0, 1.3, 0.2]-(positive)
[4.4, 2.9, 1.4, 0.2]-(positive)
[4.8, 3.0, 1.4, 0.1]-(positive)
[4.9, 3.0, 1.4, 0.2]-(positive)
[4.8, 3.1, 1.6, 0.2]-(positive)
[4.9, 3.1, 1.5, 0.1]-(positive)
[4.9, 3.1, 1.5, 0.1]-(positive)
[4.6, 3.1, 1.5, 0.2]-(positive)
[5.1, 2.5, 3.0, 1.1]-(negative)
[4.9, 3.1, 1.5, 0.1]-(positive)
[5.5, 2.4, 3.7, 1.0]-(negative)
[5.5, 2.3, 4.0, 1.3]-(negative)
[5.5, 2.6, 4.4, 1.2]-(negative)
[5.6, 2.5, 3.9, 1.1]-(negative)
[5.7, 2.6, 3.5, 1.0]-(negative)
[5.6, 3.0, 4.1, 1.3]-(negative)
[5.6, 2.8, 4.9, 2.0]-(negative)
[5.4, 3.0, 4.5, 1.5]-(negative)
[5.8, 2.7, 5.1, 1.9]-(negative)
[5.7, 2.9, 4.2, 1.3]-(negative)
[4.7, 3.2, 1.3, 0.2]-(positive)
[5.1, 3.4, 1.5, 0.2]-(positive)
[4.6, 3.6, 1.0, 0.2]-(positive)
[5.8, 4.0, 1.2, 0.2]-(positive)
[5.3, 3.7, 1.5, 0.2]-(positive)
[5.2, 4.1, 1.5, 0.1]-(positive)
[5.0, 3.2, 1.2, 0.2]-(positive)
[4.8, 3.0, 1.4, 0.3]-(positive)
[4.6, 3.4, 1.4, 0.3]-(positive

### new-thyroid2

In [31]:
dataset = load_dataset('datasets_tratados/new-thyroid2.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 120  elements) -----
[103, 7.3, 1.0, 0.7, 0.5]-(negative)
[97, 7.8, 1.3, 1.2, 0.9]-(negative)
[105, 5.7, 1.0, 0.9, 0.9]-(negative)
[106, 8.9, 0.7, 1.0, 2.3]-(negative)
[103, 5.1, 1.4, 1.2, 5.0]-(negative)
[101, 6.7, 1.3, 1.0, 5.7]-(negative)
[106, 6.7, 1.5, 1.2, 3.9]-(negative)
[103, 8.1, 1.4, 0.5, 3.8]-(negative)
[108, 3.5, 0.6, 1.7, 1.4]-(negative)
[108, 7.1, 1.3, 1.6, 2.2]-(negative)
[109, 7.6, 1.3, 2.2, 1.9]-(negative)
[98, 9.1, 1.4, 1.9, -0.3]-(negative)
[102, 5.3, 1.4, 1.3, 6.7]-(negative)
[105, 7.0, 1.5, 2.7, 4.3]-(negative)
[108, 8.7, 1.2, 2.2, 2.5]-(negative)
[104, 6.1, 1.8, 0.5, 0.8]-(negative)
[101, 7.1, 2.2, 0.8, 2.2]-(negative)
[108, 6.5, 1.0, 0.9, 1.5]-(negative)
[105, 8.7, 1.5, 1.1, 1.5]-(negative)
[101, 6.3, 1.5, 0.9, 2.9]-(negative)
[93, 8.9, 1.5, 0.8, 2.7]-(negative)
[104, 6.3, 2.0, 1.2, 4.0]-(negative)
[109, 8.4, 2.1, 1.1, 3.6]-(negative)
[105, 8.1, 2.0, 1.9, -0.5]-(negative)
[90, 8.1, 1.6, 1.4, 1.1]-(negative)
[107, 8.4, 1.8, 1.5, 0.8]-(negativ

### page-blocks0

In [32]:
dataset = load_dataset('datasets_tratados/page-blocks0.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 1400  elements) -----
[7, 8, 56, 1.143, 0.286, 0.732, 1.78, 16, 41, 9]-(negative)
[7, 8, 56, 1.143, 0.25, 0.768, 1.56, 14, 43, 9]-(negative)
[7, 8, 56, 1.143, 0.268, 0.768, 1.07, 15, 43, 14]-(negative)
[7, 11, 77, 1.571, 0.143, 0.558, 1.57, 11, 43, 7]-(negative)
[7, 7, 49, 1.0, 0.265, 0.796, 1.44, 13, 39, 9]-(negative)
[8, 8, 64, 1.0, 0.25, 0.672, 1.6, 16, 43, 10]-(negative)
[7, 10, 70, 1.429, 0.3, 0.771, 1.5, 21, 54, 14]-(negative)
[7, 9, 63, 1.286, 0.349, 0.746, 1.69, 22, 47, 13]-(negative)
[7, 11, 77, 1.571, 0.299, 0.727, 1.35, 23, 56, 17]-(negative)
[7, 11, 77, 1.571, 0.299, 0.779, 1.64, 23, 60, 14]-(negative)
[8, 10, 80, 1.25, 0.288, 0.813, 1.64, 23, 65, 14]-(negative)
[5, 7, 35, 1.4, 0.371, 0.743, 4.33, 13, 26, 3]-(negative)
[6, 7, 42, 1.167, 0.238, 0.69, 10.0, 10, 29, 1]-(positive)
[4, 6, 24, 1.5, 0.333, 0.792, 2.0, 8, 19, 4]-(negative)
[6, 7, 42, 1.167, 0.262, 0.786, 1.83, 11, 33, 6]-(negative)
[7, 11, 77, 1.571, 0.117, 0.494, 1.8, 9, 38, 5]-(positive)
[6,

### pimaImb

In [33]:
dataset = load_dataset('datasets_tratados/pimaImb.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 430  elements) -----
[0.0, 101.0, 64.0, 17.0, 0.0, 21.0, 0.252, 21.0]-(negative)
[1.0, 80.0, 55.0, 0.0, 0.0, 19.1, 0.258, 21.0]-(negative)
[2.0, 99.0, 0.0, 0.0, 0.0, 22.2, 0.108, 23.0]-(negative)
[3.0, 80.0, 0.0, 0.0, 0.0, 0.0, 0.174, 22.0]-(negative)
[0.0, 101.0, 62.0, 0.0, 0.0, 21.9, 0.336, 25.0]-(negative)
[3.0, 87.0, 60.0, 18.0, 0.0, 21.8, 0.444, 21.0]-(negative)
[1.0, 71.0, 62.0, 0.0, 0.0, 21.8, 0.416, 26.0]-(negative)
[0.0, 99.0, 0.0, 0.0, 0.0, 25.0, 0.253, 22.0]-(negative)
[3.0, 113.0, 44.0, 13.0, 0.0, 22.4, 0.14, 22.0]-(negative)
[1.0, 73.0, 50.0, 10.0, 0.0, 23.0, 0.248, 21.0]-(negative)
[2.0, 90.0, 60.0, 0.0, 0.0, 23.5, 0.191, 25.0]-(negative)
[2.0, 111.0, 60.0, 0.0, 0.0, 26.2, 0.343, 23.0]-(negative)
[2.0, 91.0, 62.0, 0.0, 0.0, 27.3, 0.525, 22.0]-(negative)
[2.0, 85.0, 65.0, 0.0, 0.0, 39.6, 0.93, 27.0]-(negative)
[1.0, 82.0, 64.0, 13.0, 95.0, 21.2, 0.415, 23.0]-(negative)
[2.0, 68.0, 62.0, 13.0, 15.0, 20.1, 0.257, 23.0]-(negative)
[1.0, 95.0, 60.0, 18.0,

### shuttle-c0-vs-c4

In [34]:
dataset = load_dataset('datasets_tratados/shuttle-c0-vs-c4.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 1024  elements) -----
[37, 0, 76, 0, 30, -24, 40, 45, 6]-(negative)
[37, -1, 76, -4, 30, -7, 40, 45, 6]-(negative)
[37, 0, 76, -4, 28, 0, 40, 48, 8]-(negative)
[37, 0, 75, 0, 28, -10, 38, 46, 8]-(negative)
[37, 0, 78, 0, 24, -26, 41, 55, 14]-(negative)
[37, 0, 78, 0, 24, -15, 41, 55, 14]-(negative)
[37, 0, 77, -7, 26, -1, 40, 51, 10]-(negative)
[37, 0, 78, 0, 10, -2, 41, 68, 26]-(negative)
[37, 0, 77, 0, 26, -27, 40, 51, 10]-(negative)
[37, 0, 78, -4, 24, 0, 42, 55, 14]-(negative)
[37, 0, 79, 0, 10, -21, 42, 68, 26]-(negative)
[37, 0, 80, -1, 12, -3, 43, 67, 24]-(negative)
[37, 0, 79, 0, 6, -1, 43, 74, 32]-(negative)
[37, 0, 78, 0, 8, -16, 42, 70, 28]-(negative)
[37, 0, 78, -7, -6, 0, 41, 86, 44]-(negative)
[37, 0, 78, 0, -6, -22, 42, 86, 44]-(negative)
[37, 0, 76, 0, 28, 16, 39, 47, 8]-(negative)
[37, 0, 75, 0, 26, 16, 38, 49, 12]-(negative)
[37, 0, 76, 0, 18, -11, 40, 58, 18]-(negative)
[37, 0, 78, 0, 24, 29, 41, 55, 14]-(negative)
[37, 0, 78, 0, 6, 12, 41, 73, 

### vehicle3

In [35]:
dataset = load_dataset('datasets_tratados/vehicle3.csv')
knnClassifier = KNNClassifierAlgorithm(dataset)
knnClassifier.print_info()

----- Train subset: ( 473  elements) -----
[82, 36, 51, 114, 53, 4, 135, 50, 18, 126, 150, 268, 144, 86, 15, 4, 181, 182]-(negative)
[83, 39, 69, 127, 54, 5, 135, 49, 18, 131, 155, 274, 162, 69, 16, 6, 187, 190]-(positive)
[85, 33, 50, 104, 53, 4, 115, 59, 17, 118, 136, 193, 127, 83, 1, 30, 179, 185]-(negative)
[73, 37, 53, 111, 54, 6, 126, 55, 18, 128, 135, 227, 147, 82, 1, 15, 176, 184]-(positive)
[86, 42, 65, 116, 53, 6, 152, 45, 19, 141, 175, 335, 172, 85, 5, 4, 179, 183]-(negative)
[85, 40, 66, 121, 52, 4, 152, 44, 19, 133, 170, 340, 163, 87, 13, 3, 180, 183]-(positive)
[83, 40, 53, 114, 53, 6, 132, 53, 18, 140, 142, 247, 157, 86, 8, 7, 176, 183]-(negative)
[85, 42, 66, 122, 54, 6, 148, 46, 19, 141, 172, 317, 174, 88, 6, 14, 180, 182]-(negative)
[90, 39, 57, 114, 48, 7, 135, 51, 18, 139, 155, 261, 151, 85, 12, 8, 183, 182]-(negative)
[92, 41, 66, 125, 52, 7, 139, 50, 18, 143, 160, 275, 161, 81, 7, 19, 182, 184]-(negative)
[89, 37, 51, 111, 54, 5, 120, 56, 17, 127, 138, 213, 147, 8