In [1]:
import numpy as np
from scipy import spatial

def euclidian_distance(v1, v2):
    dist = np.linalg.norm(v1-v2)
    return dist

def cosine_distance(v1, v2):
    dist = spatial.distance.cosine(v1, v2)
    return dist

def cosine_distance2(v1, v2):
    dist = 1 - np.dot(v1, v2)/(np.linalg.norm(v1)*np.linalg.norm(v2))
    return dist

def get_neighbors(k, v, data, dist_function = 'euclidian'):
    """
    k : the number of neighbors
    v : a vector of which k nearest neigbor vectors are searched for 
    data : n by d matrix where n is a number of objects and d is the object's dimension
    """
    distances = list()
    for row in data:
        if dist_function == 'euclidian':
            dist = euclidian_distance(v, row)
        elif dist_function == 'cosine':
            dist = cosine_distance(v, row)
        elif dist_function == 'cosine2':
            dist = cosine_distance2(v, row)
        distances.append((row, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(k+1):
        neighbors.append(distances[i][0])
    return neighbors

In [2]:
dataset = [[2.7810836,2.550537003,0],
           [1.465489372,2.362125076,0],
           [3.396561688,4.400293529,0],
           [1.38807019,1.850220317,0],
           [3.06407232,3.005305973,0],
           [7.627531214,2.759262235,1],
           [5.332441248,2.088626775,1],
           [6.922596716,1.77106367,1],
           [8.675418651,-0.242068655,1],
           [7.673756466,3.508563011,1]]
dataset = np.array(dataset)
dataset

array([[ 2.7810836 ,  2.550537  ,  0.        ],
       [ 1.46548937,  2.36212508,  0.        ],
       [ 3.39656169,  4.40029353,  0.        ],
       [ 1.38807019,  1.85022032,  0.        ],
       [ 3.06407232,  3.00530597,  0.        ],
       [ 7.62753121,  2.75926224,  1.        ],
       [ 5.33244125,  2.08862677,  1.        ],
       [ 6.92259672,  1.77106367,  1.        ],
       [ 8.67541865, -0.24206865,  1.        ],
       [ 7.67375647,  3.50856301,  1.        ]])

In [3]:
get_neighbors(3, dataset[0], dataset, dist_function = 'euclidian')

[array([2.7810836, 2.550537 , 0.       ]),
 array([3.06407232, 3.00530597, 0.        ]),
 array([1.46548937, 2.36212508, 0.        ]),
 array([1.38807019, 1.85022032, 0.        ])]

In [4]:
get_neighbors(3, dataset[0], dataset, dist_function = 'cosine')

[array([2.7810836, 2.550537 , 0.       ]),
 array([3.06407232, 3.00530597, 0.        ]),
 array([3.39656169, 4.40029353, 0.        ]),
 array([1.38807019, 1.85022032, 0.        ])]

In [5]:
get_neighbors(3, dataset[0], dataset, dist_function = 'cosine2')

[array([2.7810836, 2.550537 , 0.       ]),
 array([3.06407232, 3.00530597, 0.        ]),
 array([3.39656169, 4.40029353, 0.        ]),
 array([1.38807019, 1.85022032, 0.        ])]

In [6]:
def generate_random_data(nobjects=10, ndim=3):
    np.random.seed(2)
    rng = np.random.RandomState(1999)
    A = rng.randn(nobjects, ndim) + 2
    return A

In [7]:
def get_temporal_nearest_neighbors(k, v, vt, data, t, dist_function = 'euclidian'):
    """
    k : the number of neighbors
    v : a vector looking for k neigbor vectors
    vt: time tick of v
    data : n by d matrix where n is a number of objects and d is the object's dimension
    t : an array of data's time tick
    return list of neighbors of v and the time tick of each corresponding neighbor, the first element in the list is v itself
    """
    distances = list()
    v = np.hstack((v,vt))
    data = np.hstack((data, t))
    for i, row in enumerate(data):
        if dist_function == 'euclidian':        
            dist = euclidian_distance(v, row)
        elif dist_function == 'cosine':
            dist = cosine_distance(v, row)
        distances.append((row, dist, t[i]))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    timeticks = list()
    for i in range(k+1):
        neighbors.append(distances[i][0])
        timeticks.append(distances[i][2])
    return neighbors, timeticks

In [8]:
A = generate_random_data(nobjects=60, ndim=3)
k = 3
v = A[29,:]
vt = 30
data = A
t = np.arange(1,A.shape[0]+1).reshape(-1,1)
neighbors, timeticks = get_temporal_nearest_neighbors(k, v, vt, data, t, dist_function = 'euclidian')
print(f'Nearest neighbors time tick {timeticks}')

Nearest neighbors time tick [array([30]), array([29]), array([32]), array([28])]


In [9]:
def get_weighted_temporal_nearest_neighbors(k, v, vt, data, t, dist_function = 'euclidian'):
    """
    k : the number of neighbors
    v : a vector looking for k neigbor vectors
    vt: time tick of v
    data : n by d matrix where n is a number of objects and d is the object's dimension
    t : an array of data's time tick
    return list of neighbors of v and the time tick of each corresponding neighbor, the first element in the list is v itself
    """
    distances = list()
    for i, row in enumerate(data):
        if dist_function == 'euclidian':        
            dist = abs(t[i]-vt) * euclidian_distance(v, row)
        elif dist_function == 'cosine':
            dist = abs(t[i]-vt)* cosine_distance(v, row)
        distances.append((row, dist, t[i]))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    timeticks = list()
    for i in range(k+1):
        neighbors.append(distances[i][0])
        timeticks.append(distances[i][2])
    return neighbors, timeticks

In [10]:
neighbors, timeticks = get_weighted_temporal_nearest_neighbors(k, v, vt, data, t, dist_function = 'euclidian')
print(f'Nearest neighbors time tick {timeticks}')

Nearest neighbors time tick [array([30]), array([29]), array([34]), array([31])]
