In [248]:
import math
import pandas as pd

reference: https://www.youtube.com/watch?v=u4M5rRYwRHs

In [278]:
def euclidean(point1, point2):
    """
    Finds the euclidean distance between two points (tuples of k dimensions each)
    """
    distance = 0
    for i in range(len(point1)):
        distance += (point1[i] - point2[i]) ** 2
    return math.sqrt(distance)

In [279]:
def closestPoint(allNeighbors, point):
    """
    Brute force way to find the closest point
    """
    minDistance = None
    minPoint = None
    for current in allNeighbors:
        currentDistance = euclidean(point, current)
        if minDistance is None or currentDistance < minDistance:
            minDistance = currentDistance
            minPoint = current
    return minPoint

In [304]:
k = 11
def build_kdtree(points, depth=0):
    """
    Builds a kdtree
    """
    n = len(points)
    # if no points, can't build a kdtree
    if n <=0:
        return None
    
    axis = depth % k
    # sort the points based on the axis
    sorted_points = sorted(points, key=lambda point: point[axis])
    # the middle index is the splitting point
    mid = n//2
    # return the point, left subtree and right subtree
    return {
        'point': sorted_points[mid],
        'left': build_kdtree(sorted_points[:mid], depth + 1),
        'right': build_kdtree(sorted_points[mid + 1:], depth + 1)
    }

In [281]:
def kdTreeClosest(root, point, depth=0, nearest=None):
    """
    Finds the nearest neighbor in the kdtree
    """
    if root is None:
        return nearest
    
    axis = depth % k
    # did we find a better result so far?
    next_best = None
    # the next branch to recurse on
    next_branch = None
    # distance between the searching point and the best result (nearest)
    if nearest is None or euclidean(point, nearest) > euclidean(point, root['point']):
        next_best = root['point']
    else:
        next_best = nearest
    # find which tree to recurse on
    # if item on left, recurse on left
    if point[axis] < root['point'][axis]:
        next_branch = root['left']
    else:
        # recurse right
        next_branch = root['right']
    
    return kdTreeClosest(next_branch, point, depth+1, next_best)

In [282]:
df = pd.read_csv('data.csv')
df = df.drop('DATE', axis=1)
newDf = df.fillna(0)

In [283]:
tuples = [tuple(x) for x in newDf.values]

In [284]:
tuples

[(10.74, 0.0, 26.0, 14.0, 21.9, 28.0, 0.0, 0.0, 0.0, 0.0, 0.0),
 (11.18, 0.0, 14.0, 5.0, 23.9, 35.1, 0.0, 0.0, 0.0, 0.0, 0.0),
 (12.3, 0.0, 23.0, 4.0, 23.0, 30.0, 0.0, 0.0, 0.0, 0.0, 0.0),
 (9.4, 0.0, 25.0, 19.0, 16.1, 21.9, 0.0, 0.0, 0.0, 1.0, 0.0),
 (8.5, 0.0, 28.0, 22.0, 15.0, 21.9, 1.0, 0.0, 0.0, 1.0, 0.0),
 (7.61, 0.0, 28.0, 25.0, 14.1, 18.1, 1.0, 0.0, 0.0, 0.0, 0.0),
 (2.68, 0.22, 26.0, 20.0, 8.1, 10.1, 1.0, 1.0, 0.0, 0.0, 0.0),
 (10.29, 0.02, 23.0, 17.0, 18.1, 25.1, 1.0, 0.0, 0.0, 0.0, 0.0),
 (5.59, 0.0, 25.0, -3.0, 10.1, 15.0, 0.0, 0.0, 0.0, 0.0, 0.0),
 (8.28, 0.0, 26.0, -9.0, 21.9, 25.1, 0.0, 0.0, 0.0, 0.0, 0.0),
 (6.93, 0.01, 25.0, 13.0, 17.0, 23.0, 0.0, 0.0, 0.0, 1.0, 0.0),
 (7.61, 0.0, 25.0, 15.0, 14.1, 18.1, 0.0, 0.0, 0.0, 0.0, 0.0),
 (11.18, 0.0, 31.0, 21.0, 18.1, 23.0, 0.0, 0.0, 0.0, 0.0, 0.0),
 (9.4, 0.0, 37.0, 22.0, 16.1, 21.9, 0.0, 0.0, 0.0, 1.0, 0.0),
 (5.37, 0.0, 37.0, 31.0, 16.1, 21.9, 1.0, 0.0, 0.0, 1.0, 0.0),
 (6.93, 0.0, 33.0, 27.0, 13.0, 17.0, 1.0, 0.0, 0.0, 1.

In [285]:
listPoints =[(1,2),(2,3),(4,5)]

In [305]:
kdTree = build_kdtree(tuples)

In [319]:
searchPoint = (3.04, 0.0, 31.0, 22.0, 12.1, 15.0, 1.0, 0.0, 0.0, 1.0, 1.0)
kdClosest = kdTreeClosest(kdTree, searchPoint)
kdClosest

(5.59, 0.0, 33.0, 21.0, 12.1, 15.0, 1.0, 0.0, 0.0, 0.0, 0.0)

In [320]:
euclidean((6.04, 0.0, 31.0, 22.0, 12.1, 15.0, 1.0, 0.0, 0.0, 1.0, 0.0),(4.04, 0.0, 31.0, 22.0, 12.1, 15.0, 1.0, 0.0, 0.0, 1.0, 1.0) )

2.23606797749979

In [324]:
euclidean(nearest, searchPoint)

2.934229029915695

In [322]:
euclidean(kdClosest, searchPoint)

3.6745748053346254

In [323]:
nearest = closestPoint(tuples, searchPoint)