In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# experiment 1:
# first, creating a set of N randomly generated (m, 1) vectors:
import numpy as np
m = 10
N = 1000
# vectors = [np.random.rand(*(m, 1)) for _ in range(N)] # uniform distribution between 0 and 1
vectors = [np.random.randn(*(m, 1)) for _ in range(N)] # normal dist so can get negative vals too
# print (vectors)

In [3]:
# finding nearest neighbor of each vector using O(N^2) brute force approach:
'''
nearest_neighbors = {} # the keys will be the indices of the vectors from 0 to N-1, and the corresponding values will be the indices of its nearest neighbor
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors[i] = None
    for j in range(len(vectors)):
        if j != i:
            dist = np.linalg.norm(vectors[i] - vectors[j])
            if dist < nearest_dist:
                nearest_dist = dist
                nearest_neighbors[i] = j
print (nearest_neighbors) 
'''
# more efficient approach (although technically same big O runtime complexiity i think cuz N^2 vs N(N+1)/2) which will also allow us to find top 5 nearest neighbors easily
pairwise_distances = {} # {0: {1: 0.1, 2: 0.3, 3: 0.6}, 1: {0: 0.1, 2: 0.7, 3: 0.8}}
for i in range(len(vectors)):
    for j in range(i+1, len(vectors)):
        dist = np.linalg.norm(vectors[i] - vectors[j])
        #pairwise_distances[(i, j)] = dist
        try:
            pairwise_distances[i][j] = dist
        except:
            pairwise_distances[i] = {j: dist}
        try:
            pairwise_distances[j][i] = dist
        except:
            pairwise_distances[j] = {i: dist}
nearest_neighbors = {}
five_nearest_neighbors = {}
for i in list(pairwise_distances.keys()):
    nearest_neighbors[i] = min(pairwise_distances[i], key=pairwise_distances[i].get)
    five_nearest_neighbors[i] = list(dict(sorted(pairwise_distances[i].items(), key=lambda item: item[1])).keys())[:5]
print (nearest_neighbors)
print (five_nearest_neighbors)

{0: 346, 1: 984, 2: 719, 3: 26, 4: 7, 5: 15, 6: 211, 7: 4, 8: 82, 9: 486, 10: 946, 11: 146, 12: 785, 13: 222, 14: 912, 15: 26, 16: 985, 17: 882, 18: 221, 19: 354, 20: 30, 21: 113, 22: 943, 23: 788, 24: 931, 25: 217, 26: 15, 27: 315, 28: 395, 29: 318, 30: 759, 31: 918, 32: 705, 33: 478, 34: 180, 35: 544, 36: 585, 37: 94, 38: 613, 39: 854, 40: 382, 41: 693, 42: 632, 43: 839, 44: 812, 45: 240, 46: 14, 47: 339, 48: 182, 49: 865, 50: 78, 51: 638, 52: 893, 53: 716, 54: 314, 55: 597, 56: 322, 57: 305, 58: 565, 59: 734, 60: 229, 61: 35, 62: 157, 63: 829, 64: 551, 65: 152, 66: 207, 67: 671, 68: 382, 69: 439, 70: 344, 71: 542, 72: 319, 73: 503, 74: 863, 75: 970, 76: 889, 77: 251, 78: 50, 79: 914, 80: 190, 81: 638, 82: 884, 83: 638, 84: 146, 85: 388, 86: 128, 87: 714, 88: 256, 89: 436, 90: 886, 91: 710, 92: 821, 93: 739, 94: 828, 95: 822, 96: 228, 97: 405, 98: 814, 99: 912, 100: 63, 101: 231, 102: 819, 103: 658, 104: 731, 105: 445, 106: 732, 107: 956, 108: 288, 109: 687, 110: 272, 111: 131, 112: 

In [8]:
# finding nearest neighbor of each vector using NaiveLSH:
from memristor.engine.model import NaiveLSH
from memristor.crossbar.model import LineResistanceCrossbar
from memristor.devices import StaticMemristor
# naive_lsh = NaiveLSH(
#     hash_size=10, # adjustable hyperparameter
#     crossbar_class=LineResistanceCrossbar,
#     crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
#     memristor_model_class=StaticMemristor,
#     memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
#     m=m,
#     r=1, # adjustable hyperparameter
# )
reps = 3 # adjustable hyperparameter (repetitions of the hashing)
all_bins = []
for _ in range(reps):
    naive_lsh = NaiveLSH(
        hash_size=5, # adjustable hyperparameter
        crossbar_class=LineResistanceCrossbar,
        crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
        memristor_model_class=StaticMemristor,
        memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
        m=m,
        r=1, # adjustable hyperparameter
    )
    bins = {}
    for i in range(len(vectors)):
        hash = naive_lsh.inference(vectors[i])
        # print (hash)
        if hash not in bins.keys():
            bins[hash] = [i]
        else:
            bins[hash].append(i)
    for bin in list(bins.values()):
        all_bins.append(bin)
#      {010:[1,5,7], 111:[5,6,7]}

# now at this point all_bins is a list like [[1,2], [1,3,5], ... ] where each element of all_bins is a bin containing indices of vectors that are likely
# to be close to each other. so now to find the nearest neighbor for each vector, we simply iterate through and check only those vectors that share a bin
# with it, so in this case for 1 we would check 2, 3, and 5, to find the nearest neighbor
nearest_neighbors_approx = {}
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors_approx[i] = None
    for bin in all_bins:
        if len(bin) > N/3:
            continue
        if i in bin:
            for j in bin:
                if j != i:
                    dist = np.linalg.norm(vectors[i] - vectors[j])
                    if dist < nearest_dist:
                        nearest_dist = dist 
                        nearest_neighbors_approx[i] = j
print (nearest_neighbors_approx)

{0: 346, 1: 717, 2: 218, 3: 958, 4: 7, 5: 15, 6: 211, 7: 4, 8: 366, 9: 486, 10: 776, 11: 146, 12: 301, 13: 222, 14: 912, 15: 26, 16: 985, 17: 564, 18: 221, 19: 354, 20: 30, 21: 113, 22: 943, 23: 788, 24: 931, 25: 217, 26: 15, 27: 315, 28: 395, 29: 318, 30: 759, 31: 918, 32: 705, 33: 478, 34: 180, 35: 544, 36: 585, 37: 94, 38: 613, 39: 854, 40: 448, 41: 882, 42: 632, 43: 839, 44: 812, 45: 240, 46: 14, 47: 763, 48: 182, 49: 865, 50: 968, 51: 638, 52: 893, 53: 716, 54: 314, 55: 597, 56: 982, 57: 263, 58: 565, 59: 41, 60: 229, 61: 35, 62: 157, 63: 100, 64: 551, 65: 152, 66: 207, 67: 671, 68: 382, 69: 733, 70: 344, 71: 542, 72: 143, 73: 503, 74: 863, 75: 970, 76: 377, 77: 251, 78: 981, 79: 914, 80: 126, 81: 622, 82: 959, 83: 638, 84: 146, 85: 490, 86: 128, 87: 227, 88: 256, 89: 213, 90: 803, 91: 603, 92: 821, 93: 739, 94: 828, 95: 822, 96: 228, 97: 405, 98: 480, 99: 265, 100: 63, 101: 673, 102: 279, 103: 658, 104: 731, 105: 553, 106: 564, 107: 956, 108: 288, 109: 687, 110: 775, 111: 173, 11

In [9]:
print (nearest_neighbors)
print (nearest_neighbors_approx)

{0: 346, 1: 984, 2: 719, 3: 26, 4: 7, 5: 15, 6: 211, 7: 4, 8: 82, 9: 486, 10: 946, 11: 146, 12: 785, 13: 222, 14: 912, 15: 26, 16: 985, 17: 882, 18: 221, 19: 354, 20: 30, 21: 113, 22: 943, 23: 788, 24: 931, 25: 217, 26: 15, 27: 315, 28: 395, 29: 318, 30: 759, 31: 918, 32: 705, 33: 478, 34: 180, 35: 544, 36: 585, 37: 94, 38: 613, 39: 854, 40: 382, 41: 693, 42: 632, 43: 839, 44: 812, 45: 240, 46: 14, 47: 339, 48: 182, 49: 865, 50: 78, 51: 638, 52: 893, 53: 716, 54: 314, 55: 597, 56: 322, 57: 305, 58: 565, 59: 734, 60: 229, 61: 35, 62: 157, 63: 829, 64: 551, 65: 152, 66: 207, 67: 671, 68: 382, 69: 439, 70: 344, 71: 542, 72: 319, 73: 503, 74: 863, 75: 970, 76: 889, 77: 251, 78: 50, 79: 914, 80: 190, 81: 638, 82: 884, 83: 638, 84: 146, 85: 388, 86: 128, 87: 714, 88: 256, 89: 436, 90: 886, 91: 710, 92: 821, 93: 739, 94: 828, 95: 822, 96: 228, 97: 405, 98: 814, 99: 912, 100: 63, 101: 231, 102: 819, 103: 658, 104: 731, 105: 445, 106: 732, 107: 956, 108: 288, 109: 687, 110: 272, 111: 131, 112: 

In [10]:
print (nearest_neighbors == nearest_neighbors_approx)
count = 0
cnt = 0
for i in range(N):
    if nearest_neighbors_approx[i] == nearest_neighbors[i]:
        count += 1
    if nearest_neighbors_approx[i] in five_nearest_neighbors[i]:
        cnt += 1
accuracy = count/N
top5_accuracy = cnt/N
print (accuracy)
print (top5_accuracy)

False
0.712
0.968


In [12]:
print (len(all_bins))
print (all_bins)
print ([len(bin) for bin in all_bins])
print ([len(bin) for bin in all_bins if len(bin) <= N/3])

45
[[0, 37, 45, 96, 133, 137, 184, 197, 233, 292, 326, 370, 372, 405, 470, 509, 571, 609, 629, 640, 652, 665, 668, 674, 688, 706, 752, 808, 850, 878, 887, 892, 900, 964], [1, 3, 7, 8, 11, 13, 14, 17, 18, 21, 25, 26, 29, 31, 36, 41, 42, 46, 47, 48, 50, 55, 57, 58, 59, 60, 62, 63, 66, 69, 70, 72, 75, 77, 79, 80, 82, 84, 85, 86, 87, 89, 90, 95, 97, 98, 99, 101, 107, 108, 111, 113, 118, 121, 123, 125, 126, 128, 130, 131, 143, 146, 147, 149, 156, 158, 160, 165, 169, 170, 174, 175, 176, 177, 180, 182, 183, 185, 186, 189, 190, 191, 194, 195, 198, 199, 200, 201, 203, 204, 207, 210, 212, 214, 217, 219, 220, 221, 222, 224, 225, 226, 227, 229, 231, 234, 235, 237, 238, 239, 241, 244, 249, 253, 254, 256, 257, 258, 263, 264, 265, 266, 269, 270, 272, 273, 274, 276, 277, 278, 282, 287, 288, 290, 291, 294, 296, 298, 302, 303, 306, 307, 308, 311, 312, 317, 318, 319, 321, 323, 327, 328, 329, 330, 331, 332, 336, 338, 339, 342, 344, 350, 353, 357, 358, 359, 360, 361, 362, 364, 365, 367, 373, 379, 380, 387,

In [8]:
# when experimenting with like a bigger dataset and stuff come up with  a metric to compare these 2 dicts
# also compare the runtime complexities, cuz its possible that its working so well because of sth wrong in the implementation whiich results in the runtime
# just being the same as the brute force method

In [9]:
'''
things to check: 
- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity
- varying the parameters like N, m, hash size, etc. (try bigger/more data)
- using lineres_memristive_vmm not naive_memristive_vmm
- experiment with varying non-idealities
- experiment 2, create visualizations
- write section 4 of the paper    (rn highest priority is finalizing the experiment and section 4 of the paper)
- is the change made in StaticMemristor fine?
'''

'\nthings to check: \n- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity\n- varying the parameters like N, m, hash size, etc. (try bigger/more data)\n- using lineres_memristive_vmm not naive_memristive_vmm\n'