In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# experiment 1:
# first, creating a set of N randomly generated (m, 1) vectors:
import numpy as np
m = 10
N = 1000
# vectors = [np.random.rand(*(m, 1)) for _ in range(N)] # uniform distribution between 0 and 1
vectors = [np.random.randn(*(m, 1)) for _ in range(N)] # normal dist so can get negative vals too
# print (vectors)

In [3]:
# finding nearest neighbor of each vector using O(N^2) brute force approach:
'''
nearest_neighbors = {} # the keys will be the indices of the vectors from 0 to N-1, and the corresponding values will be the indices of its nearest neighbor
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors[i] = None
    for j in range(len(vectors)):
        if j != i:
            dist = np.linalg.norm(vectors[i] - vectors[j])
            if dist < nearest_dist:
                nearest_dist = dist
                nearest_neighbors[i] = j
print (nearest_neighbors) 
'''
# more efficient approach (although technically same big O runtime complexiity i think cuz N^2 vs N(N+1)/2) which will also allow us to find top 5 nearest neighbors easily
pairwise_distances = {} # {0: {1: 0.1, 2: 0.3, 3: 0.6}, 1: {0: 0.1, 2: 0.7, 3: 0.8}}
for i in range(len(vectors)):
    for j in range(i+1, len(vectors)):
        dist = np.linalg.norm(vectors[i] - vectors[j])
        #pairwise_distances[(i, j)] = dist
        try:
            pairwise_distances[i][j] = dist
        except:
            pairwise_distances[i] = {j: dist}
        try:
            pairwise_distances[j][i] = dist
        except:
            pairwise_distances[j] = {i: dist}
nearest_neighbors = {}
five_nearest_neighbors = {}
for i in list(pairwise_distances.keys()):
    nearest_neighbors[i] = min(pairwise_distances[i], key=pairwise_distances[i].get)
    five_nearest_neighbors[i] = list(dict(sorted(pairwise_distances[i].items(), key=lambda item: item[1])).keys())[:5]
print (nearest_neighbors)
print (five_nearest_neighbors)

{0: 356, 1: 304, 2: 210, 3: 5, 4: 201, 5: 570, 6: 607, 7: 744, 8: 429, 9: 831, 10: 52, 11: 115, 12: 450, 13: 737, 14: 260, 15: 909, 16: 210, 17: 400, 18: 541, 19: 535, 20: 932, 21: 328, 22: 917, 23: 505, 24: 793, 25: 923, 26: 787, 27: 782, 28: 153, 29: 60, 30: 434, 31: 605, 32: 631, 33: 233, 34: 548, 35: 222, 36: 670, 37: 802, 38: 221, 39: 44, 40: 655, 41: 400, 42: 797, 43: 77, 44: 39, 45: 718, 46: 141, 47: 91, 48: 394, 49: 36, 50: 196, 51: 480, 52: 10, 53: 888, 54: 58, 55: 692, 56: 635, 57: 498, 58: 651, 59: 425, 60: 151, 61: 51, 62: 577, 63: 427, 64: 601, 65: 460, 66: 626, 67: 456, 68: 395, 69: 224, 70: 705, 71: 147, 72: 990, 73: 845, 74: 458, 75: 426, 76: 570, 77: 251, 78: 972, 79: 393, 80: 259, 81: 137, 82: 402, 83: 381, 84: 5, 85: 90, 86: 497, 87: 170, 88: 442, 89: 72, 90: 776, 91: 245, 92: 495, 93: 218, 94: 895, 95: 772, 96: 776, 97: 72, 98: 99, 99: 98, 100: 172, 101: 337, 102: 62, 103: 235, 104: 652, 105: 845, 106: 889, 107: 396, 108: 600, 109: 160, 110: 259, 111: 752, 112: 760,

In [4]:
# finding nearest neighbor of each vector using NaiveLSH:
from memristor.engine.model import NaiveLSH
from memristor.crossbar.model import LineResistanceCrossbar
from memristor.devices import StaticMemristor
# naive_lsh = NaiveLSH(
#     hash_size=10, # adjustable hyperparameter
#     crossbar_class=LineResistanceCrossbar,
#     crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
#     memristor_model_class=StaticMemristor,
#     memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
#     m=m,
#     r=1, # adjustable hyperparameter
# )
reps = 3 # adjustable hyperparameter (repetitions of the hashing)
all_bins = []
for _ in range(reps):
    naive_lsh = NaiveLSH(
        hash_size=5, # adjustable hyperparameter
        crossbar_class=LineResistanceCrossbar,
        crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
        memristor_model_class=StaticMemristor,
        memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
        m=m,
        r=1, # adjustable hyperparameter
    )
    bins = {}
    for i in range(len(vectors)):
        hash = naive_lsh.inference(vectors[i])
        # print (hash)
        if hash not in bins.keys():
            bins[hash] = [i]
        else:
            bins[hash].append(i)
    for bin in list(bins.values()):
        all_bins.append(bin)
#      {010:[1,5,7], 111:[5,6,7]}

# now at this point all_bins is a list like [[1,2], [1,3,5], ... ] where each element of all_bins is a bin containing indices of vectors that are likely
# to be close to each other. so now to find the nearest neighbor for each vector, we simply iterate through and check only those vectors that share a bin
# with it, so in this case for 1 we would check 2, 3, and 5, to find the nearest neighbor
nearest_neighbors_approx = {}
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors_approx[i] = None
    for bin in all_bins:
        if len(bin) > N/reps:
            continue
        if i in bin:
            for j in bin:
                if j != i:
                    dist = np.linalg.norm(vectors[i] - vectors[j])
                    if dist < nearest_dist:
                        nearest_dist = dist 
                        nearest_neighbors_approx[i] = j
print (nearest_neighbors_approx)

{0: 356, 1: 304, 2: 292, 3: 570, 4: 201, 5: 838, 6: 607, 7: 744, 8: 429, 9: 692, 10: 52, 11: 115, 12: 450, 13: 737, 14: 260, 15: 440, 16: 415, 17: 400, 18: 541, 19: 535, 20: 932, 21: 328, 22: 365, 23: 505, 24: 61, 25: 923, 26: 787, 27: 782, 28: 153, 29: 449, 30: 434, 31: 401, 32: 631, 33: 931, 34: 548, 35: 222, 36: 670, 37: 802, 38: 221, 39: 44, 40: 961, 41: 400, 42: 964, 43: 77, 44: 39, 45: 718, 46: 141, 47: 91, 48: 394, 49: 36, 50: 196, 51: 480, 52: 10, 53: 888, 54: 58, 55: 692, 56: 373, 57: 498, 58: 651, 59: 425, 60: 151, 61: 51, 62: 577, 63: 427, 64: 601, 65: 460, 66: 626, 67: 456, 68: 395, 69: 224, 70: 705, 71: 147, 72: 97, 73: 845, 74: 898, 75: 426, 76: 570, 77: 251, 78: 972, 79: 393, 80: 780, 81: 495, 82: 402, 83: 381, 84: 570, 85: 90, 86: 497, 87: 210, 88: 442, 89: 72, 90: 776, 91: 763, 92: 495, 93: 731, 94: 895, 95: 362, 96: 776, 97: 72, 98: 99, 99: 98, 100: 172, 101: 337, 102: 62, 103: 235, 104: 652, 105: 845, 106: 889, 107: 396, 108: 600, 109: 160, 110: 259, 111: 752, 112: 7

In [5]:
print (nearest_neighbors)
print (nearest_neighbors_approx)

{0: 356, 1: 304, 2: 210, 3: 5, 4: 201, 5: 570, 6: 607, 7: 744, 8: 429, 9: 831, 10: 52, 11: 115, 12: 450, 13: 737, 14: 260, 15: 909, 16: 210, 17: 400, 18: 541, 19: 535, 20: 932, 21: 328, 22: 917, 23: 505, 24: 793, 25: 923, 26: 787, 27: 782, 28: 153, 29: 60, 30: 434, 31: 605, 32: 631, 33: 233, 34: 548, 35: 222, 36: 670, 37: 802, 38: 221, 39: 44, 40: 655, 41: 400, 42: 797, 43: 77, 44: 39, 45: 718, 46: 141, 47: 91, 48: 394, 49: 36, 50: 196, 51: 480, 52: 10, 53: 888, 54: 58, 55: 692, 56: 635, 57: 498, 58: 651, 59: 425, 60: 151, 61: 51, 62: 577, 63: 427, 64: 601, 65: 460, 66: 626, 67: 456, 68: 395, 69: 224, 70: 705, 71: 147, 72: 990, 73: 845, 74: 458, 75: 426, 76: 570, 77: 251, 78: 972, 79: 393, 80: 259, 81: 137, 82: 402, 83: 381, 84: 5, 85: 90, 86: 497, 87: 170, 88: 442, 89: 72, 90: 776, 91: 245, 92: 495, 93: 218, 94: 895, 95: 772, 96: 776, 97: 72, 98: 99, 99: 98, 100: 172, 101: 337, 102: 62, 103: 235, 104: 652, 105: 845, 106: 889, 107: 396, 108: 600, 109: 160, 110: 259, 111: 752, 112: 760,

In [6]:
print (nearest_neighbors == nearest_neighbors_approx)
count = 0
cnt = 0
for i in range(N):
    if nearest_neighbors_approx[i] == nearest_neighbors[i]:
        count += 1
    if nearest_neighbors_approx[i] in five_nearest_neighbors[i]:
        cnt += 1
accuracy = count/N
top5_accuracy = cnt/N
print (accuracy)
print (top5_accuracy)

False
0.777
0.99


In [7]:
print (len(all_bins))
print (all_bins)
print ([len(bin) for bin in all_bins])
print ([len(bin) for bin in all_bins if len(bin) <= N/3])

56
[[0, 7, 10, 12, 17, 19, 21, 24, 26, 30, 36, 37, 48, 49, 50, 52, 61, 66, 67, 71, 72, 89, 96, 97, 98, 99, 106, 107, 109, 111, 112, 116, 118, 121, 126, 131, 132, 135, 142, 143, 147, 156, 157, 160, 162, 163, 164, 167, 168, 179, 180, 182, 183, 184, 186, 187, 190, 193, 196, 197, 203, 206, 208, 209, 212, 213, 215, 220, 233, 239, 240, 248, 264, 266, 267, 275, 276, 278, 296, 301, 305, 313, 318, 319, 323, 325, 326, 328, 330, 331, 333, 340, 341, 342, 347, 355, 359, 363, 368, 374, 376, 382, 386, 387, 388, 394, 396, 401, 403, 412, 434, 436, 450, 451, 452, 453, 466, 473, 476, 478, 479, 483, 489, 496, 500, 502, 503, 504, 514, 515, 520, 521, 523, 540, 544, 557, 561, 569, 571, 572, 580, 582, 589, 602, 605, 606, 617, 624, 625, 626, 628, 633, 634, 637, 640, 642, 643, 644, 645, 650, 654, 658, 663, 666, 667, 670, 672, 674, 675, 676, 677, 682, 683, 685, 686, 688, 693, 704, 713, 714, 719, 724, 741, 744, 747, 752, 759, 760, 766, 767, 769, 773, 775, 776, 777, 779, 787, 789, 790, 791, 792, 801, 802, 811, 812

In [8]:
# when experimenting with like a bigger dataset and stuff come up with  a metric to compare these 2 dicts
# also compare the runtime complexities, cuz its possible that its working so well because of sth wrong in the implementation whiich results in the runtime
# just being the same as the brute force method

In [9]:
'''
things to check: 
- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity
- varying the parameters like N, m, hash size, etc. (try bigger/more data)
- using lineres_memristive_vmm not naive_memristive_vmm
- experiment with varying non-idealities
- experiment 2, create visualizations
- write section 4 of the paper    (rn highest priority is finalizing the experiment and section 4 of the paper)
- is the change made in StaticMemristor fine?
'''

'\nthings to check: \n- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity\n- varying the parameters like N, m, hash size, etc. (try bigger/more data)\n- using lineres_memristive_vmm not naive_memristive_vmm\n'