In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# experiment 1:
# first, creating a set of N randomly generated (m, 1) vectors:
import numpy as np
m = 10
N = 1000
# vectors = [np.random.rand(*(m, 1)) for _ in range(N)] # uniform distribution between 0 and 1
vectors = [np.random.randn(*(m, 1)) for _ in range(N)] # normal dist so can get negative vals too
# print (vectors)

In [3]:
# finding nearest neighbor of each vector using O(N^2) brute force approach:
'''
nearest_neighbors = {} # the keys will be the indices of the vectors from 0 to N-1, and the corresponding values will be the indices of its nearest neighbor
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors[i] = None
    for j in range(len(vectors)):
        if j != i:
            dist = np.linalg.norm(vectors[i] - vectors[j])
            if dist < nearest_dist:
                nearest_dist = dist
                nearest_neighbors[i] = j
print (nearest_neighbors) 
'''
# more efficient approach (although technically same big O runtime complexiity i think cuz N^2 vs N(N+1)/2) which will also allow us to find top 5 nearest neighbors easily
pairwise_distances = {} # {0: {1: 0.1, 2: 0.3, 3: 0.6}, 1: {0: 0.1, 2: 0.7, 3: 0.8}}
for i in range(len(vectors)):
    for j in range(i+1, len(vectors)):
        dist = np.linalg.norm(vectors[i] - vectors[j])
        #pairwise_distances[(i, j)] = dist
        try:
            pairwise_distances[i][j] = dist
        except:
            pairwise_distances[i] = {j: dist}
        try:
            pairwise_distances[j][i] = dist
        except:
            pairwise_distances[j] = {i: dist}
nearest_neighbors = {}
five_nearest_neighbors = {}
for i in list(pairwise_distances.keys()):
    nearest_neighbors[i] = min(pairwise_distances[i], key=pairwise_distances[i].get)
    five_nearest_neighbors[i] = list(dict(sorted(pairwise_distances[i].items(), key=lambda item: item[1])).keys())[:5]
print (nearest_neighbors)
print (five_nearest_neighbors)

{0: 286, 1: 756, 2: 650, 3: 126, 4: 934, 5: 254, 6: 371, 7: 834, 8: 734, 9: 207, 10: 201, 11: 610, 12: 697, 13: 725, 14: 483, 15: 890, 16: 566, 17: 763, 18: 928, 19: 767, 20: 13, 21: 588, 22: 488, 23: 874, 24: 33, 25: 249, 26: 840, 27: 579, 28: 989, 29: 380, 30: 282, 31: 497, 32: 438, 33: 371, 34: 720, 35: 569, 36: 757, 37: 914, 38: 981, 39: 765, 40: 25, 41: 48, 42: 899, 43: 850, 44: 483, 45: 238, 46: 132, 47: 894, 48: 41, 49: 401, 50: 681, 51: 480, 52: 534, 53: 914, 54: 138, 55: 201, 56: 236, 57: 665, 58: 570, 59: 744, 60: 301, 61: 518, 62: 218, 63: 657, 64: 242, 65: 335, 66: 682, 67: 744, 68: 640, 69: 162, 70: 138, 71: 374, 72: 392, 73: 957, 74: 830, 75: 307, 76: 362, 77: 779, 78: 319, 79: 367, 80: 641, 81: 837, 82: 895, 83: 999, 84: 243, 85: 337, 86: 444, 87: 436, 88: 128, 89: 910, 90: 865, 91: 970, 92: 515, 93: 478, 94: 200, 95: 287, 96: 909, 97: 134, 98: 649, 99: 635, 100: 385, 101: 111, 102: 111, 103: 715, 104: 310, 105: 688, 106: 365, 107: 528, 108: 195, 109: 0, 110: 17, 111: 10

In [4]:
# finding nearest neighbor of each vector using NaiveLSH:
from memristor.engine.model import NaiveLSH
from memristor.crossbar.model import LineResistanceCrossbar
from memristor.devices import StaticMemristor
# naive_lsh = NaiveLSH(
#     hash_size=10, # adjustable hyperparameter
#     crossbar_class=LineResistanceCrossbar,
#     crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
#     memristor_model_class=StaticMemristor,
#     memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
#     m=m,
#     r=1, # adjustable hyperparameter
# )
reps = 3 # adjustable hyperparameter (repetitions of the hashing)
all_bins = []
for _ in range(reps):
    naive_lsh = NaiveLSH(
        hash_size=5, # adjustable hyperparameter
        crossbar_class=LineResistanceCrossbar,
        crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
        memristor_model_class=StaticMemristor,
        memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
        m=m,
        r=1, # adjustable hyperparameter
    )
    bins = {}
    for i in range(len(vectors)):
        hash = naive_lsh.inference(vectors[i])
        # print (hash)
        if hash not in bins.keys():
            bins[hash] = [i]
        else:
            bins[hash].append(i)
    for bin in list(bins.values()):
        all_bins.append(bin)
#      {010:[1,5,7], 111:[5,6,7]}

# now at this point all_bins is a list like [[1,2], [1,3,5], ... ] where each element of all_bins is a bin containing indices of vectors that are likely
# to be close to each other. so now to find the nearest neighbor for each vector, we simply iterate through and check only those vectors that share a bin
# with it, so in this case for 1 we would check 2, 3, and 5, to find the nearest neighbor
nearest_neighbors_approx = {}
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors_approx[i] = None
    for bin in all_bins:
        if len(bin) > N/reps:
            continue
        if i in bin:
            for j in bin:
                if j != i:
                    dist = np.linalg.norm(vectors[i] - vectors[j])
                    if dist < nearest_dist:
                        nearest_dist = dist 
                        nearest_neighbors_approx[i] = j
print (nearest_neighbors_approx)

{0: 286, 1: 912, 2: 650, 3: 126, 4: 934, 5: 254, 6: 371, 7: 834, 8: 734, 9: 207, 10: 201, 11: 610, 12: 697, 13: 363, 14: 483, 15: 890, 16: 566, 17: 763, 18: 928, 19: 767, 20: 136, 21: 588, 22: 488, 23: 874, 24: 33, 25: 526, 26: 840, 27: 579, 28: 989, 29: 380, 30: 282, 31: 497, 32: 438, 33: 163, 34: 720, 35: 569, 36: 757, 37: 914, 38: 652, 39: 765, 40: 25, 41: 48, 42: 808, 43: 850, 44: 483, 45: 846, 46: 132, 47: 894, 48: 41, 49: 748, 50: 681, 51: 480, 52: 534, 53: 528, 54: 138, 55: 201, 56: 51, 57: 665, 58: 570, 59: 744, 60: 301, 61: 518, 62: 126, 63: 657, 64: 360, 65: 335, 66: 682, 67: 705, 68: 640, 69: 162, 70: 138, 71: 473, 72: 392, 73: 548, 74: 830, 75: 307, 76: 362, 77: 779, 78: 921, 79: 367, 80: 641, 81: 188, 82: 895, 83: 999, 84: 602, 85: 337, 86: 444, 87: 436, 88: 128, 89: 910, 90: 865, 91: 970, 92: 470, 93: 218, 94: 200, 95: 287, 96: 909, 97: 134, 98: 43, 99: 635, 100: 385, 101: 111, 102: 111, 103: 700, 104: 169, 105: 688, 106: 365, 107: 528, 108: 195, 109: 855, 110: 638, 111: 

In [5]:
print (nearest_neighbors)
print (nearest_neighbors_approx)

{0: 286, 1: 756, 2: 650, 3: 126, 4: 934, 5: 254, 6: 371, 7: 834, 8: 734, 9: 207, 10: 201, 11: 610, 12: 697, 13: 725, 14: 483, 15: 890, 16: 566, 17: 763, 18: 928, 19: 767, 20: 13, 21: 588, 22: 488, 23: 874, 24: 33, 25: 249, 26: 840, 27: 579, 28: 989, 29: 380, 30: 282, 31: 497, 32: 438, 33: 371, 34: 720, 35: 569, 36: 757, 37: 914, 38: 981, 39: 765, 40: 25, 41: 48, 42: 899, 43: 850, 44: 483, 45: 238, 46: 132, 47: 894, 48: 41, 49: 401, 50: 681, 51: 480, 52: 534, 53: 914, 54: 138, 55: 201, 56: 236, 57: 665, 58: 570, 59: 744, 60: 301, 61: 518, 62: 218, 63: 657, 64: 242, 65: 335, 66: 682, 67: 744, 68: 640, 69: 162, 70: 138, 71: 374, 72: 392, 73: 957, 74: 830, 75: 307, 76: 362, 77: 779, 78: 319, 79: 367, 80: 641, 81: 837, 82: 895, 83: 999, 84: 243, 85: 337, 86: 444, 87: 436, 88: 128, 89: 910, 90: 865, 91: 970, 92: 515, 93: 478, 94: 200, 95: 287, 96: 909, 97: 134, 98: 649, 99: 635, 100: 385, 101: 111, 102: 111, 103: 715, 104: 310, 105: 688, 106: 365, 107: 528, 108: 195, 109: 0, 110: 17, 111: 10

In [6]:
print (nearest_neighbors == nearest_neighbors_approx)
count = 0
cnt = 0
for i in range(N):
    if nearest_neighbors_approx[i] == nearest_neighbors[i]:
        count += 1
    if nearest_neighbors_approx[i] in five_nearest_neighbors[i]:
        cnt += 1
accuracy = count/N
top5_accuracy = cnt/N
print (accuracy)
print (top5_accuracy)

False
0.776
0.987


In [7]:
print (len(all_bins))
print (all_bins)
print ([len(bin) for bin in all_bins])
print ([len(bin) for bin in all_bins if len(bin) <= N/3])

67
[[0, 10, 55, 75, 79, 83, 91, 99, 127, 129, 154, 164, 168, 187, 194, 196, 197, 198, 201, 208, 213, 214, 220, 222, 231, 232, 239, 248, 250, 273, 277, 286, 290, 291, 303, 307, 309, 310, 323, 329, 367, 373, 374, 378, 381, 387, 407, 420, 432, 434, 467, 469, 474, 481, 487, 490, 496, 499, 507, 509, 511, 516, 521, 525, 529, 535, 537, 539, 541, 547, 562, 564, 573, 582, 594, 598, 615, 625, 627, 632, 642, 646, 655, 658, 659, 668, 670, 671, 701, 709, 714, 727, 735, 738, 739, 740, 747, 774, 778, 787, 819, 825, 828, 832, 837, 847, 851, 855, 859, 872, 873, 878, 888, 889, 898, 899, 900, 902, 904, 916, 918, 931, 940, 941, 950, 970, 973, 992, 999], [1, 23, 32, 34, 37, 44, 54, 68, 70, 86, 108, 114, 118, 138, 140, 152, 195, 212, 224, 228, 281, 301, 308, 317, 328, 338, 353, 354, 368, 371, 379, 438, 459, 479, 489, 495, 506, 515, 526, 568, 592, 593, 603, 617, 636, 672, 680, 689, 703, 704, 715, 716, 724, 741, 784, 812, 833, 874, 885, 906, 914, 923, 935, 955, 956, 964, 965, 971, 980, 984], [2, 51, 56, 71, 7

In [8]:
# when experimenting with like a bigger dataset and stuff come up with  a metric to compare these 2 dicts
# also compare the runtime complexities, cuz its possible that its working so well because of sth wrong in the implementation whiich results in the runtime
# just being the same as the brute force method

In [9]:
'''
things to check: 
- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity
- varying the parameters like N, m, hash size, etc. (try bigger/more data)
- using lineres_memristive_vmm not naive_memristive_vmm
- experiment with varying non-idealities
- experiment 2, create visualizations
- write section 4 of the paper    (rn highest priority is finalizing the experiment and section 4 of the paper)
- is the change made in StaticMemristor fine?
'''

'\nthings to check: \n- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity\n- varying the parameters like N, m, hash size, etc. (try bigger/more data)\n- using lineres_memristive_vmm not naive_memristive_vmm\n'