In [41]:
from collections import namedtuple
from operator import itemgetter
from pprint import pformat

class Node(namedtuple("Node", "location left_child right_child")):
    def __repr__(self):
        return pformat(tuple(self))

def kdtree(point_list, depth: int = 0):
    if not point_list:
        return None

    k = len(point_list[0])  # assumes all points have the same dimension
    # Select axis based on depth so that axis cycles through all valid values
    axis = depth % k

    # Sort point list by axis and choose median as pivot element
    point_list.sort(key=itemgetter(axis))
    median = len(point_list) // 2

    # Create node and construct subtrees
    return Node(
        location=point_list[median],
        left_child=kdtree(point_list[:median], depth + 1),
        right_child=kdtree(point_list[median + 1 :], depth + 1),
    )

def main(test_arr):
    """Example usage"""
    #point_list = [(7, 2), (5, 4), (9, 6), (4, 7), (8, 1), (2, 3)]
    point_list = test_arr
    tree = kdtree(point_list)
    print(tree)
    return tree

In [42]:
import pandas as pd
import numpy as np

In [43]:
data_set = pd.read_csv('diabetes.csv')
data_set

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [44]:
modified_data = data_set
modified_data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [45]:
#test_data = modified_data.drop('Pregnancies',axis=1)
test_data = modified_data.drop('SkinThickness',axis=1)
test_data = test_data.drop('BloodPressure',axis=1)
test_data = test_data.drop('BMI',axis=1)
test_data = test_data.drop('Age',axis=1)
test_data = test_data.drop('DiabetesPedigreeFunction',axis=1)
test_data = test_data.drop('Insulin',axis=1)
test_data

Unnamed: 0,Pregnancies,Glucose,Outcome
0,6,148,1
1,1,85,0
2,8,183,1
3,1,89,0
4,0,137,1
...,...,...,...
763,10,101,0
764,2,122,0
765,5,121,0
766,1,126,1


In [46]:
test = test_data.drop('Outcome',axis=1)
test_arr = test.to_numpy()
test_arr

array([[  6, 148],
       [  1,  85],
       [  8, 183],
       ...,
       [  5, 121],
       [  1, 126],
       [  1,  93]], dtype=int64)

In [47]:
point_list = [(7, 2), (5, 4), (9, 6), (4, 7), (8, 1), (2, 3)]
point_list[0][1]
test_list = test_arr.tolist()
test_list[0]

[6, 148]

In [48]:
t = main(test_list)

([3, 100],
 ([1, 112],
 ([1, 100],
 ([1, 93],
 ([1, 80],
 ([0, 78],
 ([1, 0],
 ([0, 67],
 ([1, 0], ([0, 57], None, None), ([1, 0], None, None)),
 ([0, 74], ([0, 73], None, None), None)),
 ([1, 71],
 ([1, 71], ([1, 71], None, None), None),
 ([1, 77], ([1, 73], None, None), None))),
 ([0, 93],
 ([0, 86],
 ([0, 84], ([0, 84], None, None), None),
 ([0, 91], ([0, 91], None, None), None)),
 ([1, 79],
 ([1, 79], ([1, 79], None, None), None),
 ([0, 93], ([0, 93], None, None), None)))),
 ([1, 88],
 ([1, 84],
 ([1, 81],
 ([1, 81], ([1, 80], None, None), None),
 ([1, 83], ([1, 82], None, None), None)),
 ([1, 87],
 ([1, 86], ([1, 85], None, None), None),
 ([1, 87], ([1, 87], None, None), None))),
 ([1, 90],
 ([1, 89],
 ([1, 88], ([1, 88], None, None), None),
 ([1, 89], ([1, 89], None, None), None)),
 ([1, 91],
 ([1, 90], ([1, 90], None, None), None),
 ([1, 92], ([1, 91], None, None), None))))),
 ([0, 105],
 ([0, 101],
 ([0, 97],
 ([0, 95],
 ([0, 94], ([0, 94], None, None), None),
 ([0, 95], ([0, 9

In [49]:
test_list[0][1]

137

In [70]:
from collections import namedtuple
from operator import itemgetter
from pprint import pformat

class Node(namedtuple("Node", "location left_child right_child")):
    def __repr__(self):
        return pformat(tuple(self))

def kdtree(point_list, depth: int = 0):
    if not point_list:
        return None

    k = len(point_list[0])  # assumes all points have the same dimension
    # Select axis based on depth so that axis cycles through all valid values
    axis = depth % k

    # Sort point list by axis and choose median as pivot element
    point_list.sort(key=itemgetter(axis))
    median = len(point_list) // 2
    
    if (point_list[median][0] < 1 ): 
            left_child=kdtree(point_list[:median], depth + 1)
            print(left_child)
      
    mytuple = tuple(map(int, Node.left_child.split(' ')))
    print(mytuple)

    # Create node and construct subtrees
    return Node(
        location=point_list[median],
            left_child= kdtree(point_list[:median], depth + 1) if (point_list[median][0] > 1 ) else 'None', 
            right_child=kdtree( point_list[median + 1 :], depth + 1)if (point_list[median][1] < 140 ) else 'None',
    )

def main(test_arr):
    """Example usage"""
    #point_list = [(7, 2), (5, 4), (9, 6), (4, 7), (8, 1), (2, 3)]
    point_list = test_arr
    tree = kdtree(point_list)
    print(tree)
    return tree
    

In [71]:
a = main(test_list)
a

AttributeError: '_collections._tuplegetter' object has no attribute 'split'

In [35]:
a = np.array(a)
a

array([list([3, 100]), ([1, 112], 'None', 'None'), 'None'], dtype=object)

In [36]:
# initializing list
test_list = a
  
# printing original list 
print ("The original list is : " + str(test_list))
  
# using naive method 
# to remove None values in list
res = []
for val in test_list:
    if val != 'None' :
        res.append(val)
  
# printing result
print ("List after removal of None values : " +  str(res))

The original list is : [list([3, 100]) ([1, 112], 'None', 'None') 'None']
List after removal of None values : [[3, 100], ([1, 112], 'None', 'None')]
