K-nearest neighbors algorithm by attribute importance.

The goal is to calculate nearest neighbor with decreasing importance of attribute from left to right.
First attribute is the most important and last is the least important.
To achieve this I developed following halving interval algorithm.

In [1]:
import numpy as np

In [2]:
# Neighbors and Neighbor
neighbors = np.array([[0, 1, 0, 1],
                 [1, 2, 1, 2],
                 [2, 1, 1, 1],
                 [1, 0, 0, 0],
                 [1, 2, 0, 2],
                 [2, 2, 2, 2],
                 [1, 0, 0, 1]])


neighbor = np.array([[1, 1, 0, 1]])
#neighbor = np.array([[2, 1, 1, 1]])

attributes = len(neighbor[0])

In [3]:
# Define arrays for calculation
half_more = 0.6 # Always more then half (0.5)
rest_from_one = np.zeros([attributes])
cum_half = np.zeros([attributes])
half = np.zeros([attributes])

In [4]:
# Calculations for halving
half[0] = half_more
for i in range(1, attributes):
    cum_half[i] = cum_half[i-1] + half[i-1]
    rest_from_one[i] = 1 - cum_half[i]
    half[i] = rest_from_one[i] * half_more

half[i] = rest_from_one[i]

In [5]:
rest_from_one, cum_half, half

(array([0.   , 0.4  , 0.16 , 0.064]),
 array([0.   , 0.6  , 0.84 , 0.936]),
 array([0.6  , 0.24 , 0.096, 0.064]))

In [6]:
# Attributes distances
distances = neighbors - neighbor

In [7]:
# Attributes absolute distances
distances_abs = np.absolute(distances)
distances_abs

array([[1, 0, 0, 0],
       [0, 1, 1, 1],
       [1, 0, 1, 0],
       [0, 1, 0, 1],
       [0, 1, 0, 1],
       [1, 1, 2, 1],
       [0, 1, 0, 0]])

In [8]:
# Attribute distance
distance = np.matmul(distances_abs, half)
distance

array([0.6  , 0.4  , 0.696, 0.304, 0.304, 1.096, 0.24 ])

In [9]:
# Ranking distance
from scipy.stats import rankdata
neighbors_rank = rankdata(distance, method='ordinal')
neighbors_rank

array([5, 4, 6, 2, 3, 7, 1], dtype=int64)

In [10]:
# Ranking neighbor to neighbors
nearest_n = 7
print('neighbor', ' '*14, neighbor,'\n')
for rank_element in range(nearest_n):
    for ind, item in enumerate(neighbors_rank, start=0):
        if item == rank_element+1: # Rank start = 1
            print('rank', item, 'index',ind, 'neighbors', neighbors[ind], 'distance', np.round(distance[ind],3))


neighbor                [[1 1 0 1]] 

rank 1 index 6 neighbors [1 0 0 1] distance 0.24
rank 2 index 3 neighbors [1 0 0 0] distance 0.304
rank 3 index 4 neighbors [1 2 0 2] distance 0.304
rank 4 index 1 neighbors [1 2 1 2] distance 0.4
rank 5 index 0 neighbors [0 1 0 1] distance 0.6
rank 6 index 2 neighbors [2 1 1 1] distance 0.696
rank 7 index 5 neighbors [2 2 2 2] distance 1.096


In [11]:
# Nearest Neighbors KDTree algorithm

http://scikit-learn.org/stable/modules/neighbors.html

http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KDTree.html


In [12]:
# KDTree
import numpy as np
from sklearn.neighbors import KDTree
tree = KDTree(neighbors, leaf_size=2)              
distance, ind = tree.query(neighbor, k=7)

In [13]:
# Ranking neighbor to neighbors with KDTree
print('neighbor', ' '*14, neighbor,'\n')
for i in range(len(ind[0])):
    print('rank', i+1, 'index', ind[0,i], 'neighbors', neighbors[ind[0,i]],'distanceance', np.round(distance[0,ind[0,i]],3))


neighbor                [[1 1 0 1]] 

rank 1 index 0 neighbors [0 1 0 1] distanceance 1.0
rank 2 index 6 neighbors [1 0 0 1] distanceance 2.646
rank 3 index 3 neighbors [1 0 0 0] distanceance 1.414
rank 4 index 4 neighbors [1 2 0 2] distanceance 1.414
rank 5 index 2 neighbors [2 1 1 1] distanceance 1.414
rank 6 index 1 neighbors [1 2 1 2] distanceance 1.0
rank 7 index 5 neighbors [2 2 2 2] distanceance 1.732


In [14]:
'''
# You can observ the ranking differences:

# neighbor                [[1 1 0 1]] 

# Attribute decreasing importance algorithm
# rank 1 index 6 neighbors [1 0 0 1] distance 0.24

# KDTree algorithm
# rank 1 index 0 neighbors [0 1 0 1] distanceance 1.0
'''

'\n# You can observ the ranking differences:\n\n# neighbor                [[1 1 0 1]] \n\n# Attribute decreasing importance algorithm\n# rank 1 index 6 neighbors [1 0 0 1] distance 0.24\n\n# KDTree algorithm\n# rank 1 index 0 neighbors [0 1 0 1] distanceance 1.0\n'

In [15]:
#

I want to know what solutions are provided within python libraries or tensorflow libraries for finding nearest neighbor by attribute importance.