In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# experiment 1:
# first, creating a set of N randomly generated (m, 1) vectors:
import numpy as np
m = 10
N = 1000
# vectors = [np.random.rand(*(m, 1)) for _ in range(N)] # uniform distribution between 0 and 1
vectors = [np.random.randn(*(m, 1)) for _ in range(N)] # normal dist so can get negative vals too
# print (vectors)

In [3]:
# finding nearest neighbor of each vector using O(N^2) brute force approach:
'''
nearest_neighbors = {} # the keys will be the indices of the vectors from 0 to N-1, and the corresponding values will be the indices of its nearest neighbor
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors[i] = None
    for j in range(len(vectors)):
        if j != i:
            dist = np.linalg.norm(vectors[i] - vectors[j])
            if dist < nearest_dist:
                nearest_dist = dist
                nearest_neighbors[i] = j
print (nearest_neighbors) 
'''
# more efficient approach (although technically same big O runtime complexiity i think cuz N^2 vs N(N+1)/2) which will also allow us to find top 5 nearest neighbors easily
pairwise_distances = {} # {0: {1: 0.1, 2: 0.3, 3: 0.6}, 1: {0: 0.1, 2: 0.7, 3: 0.8}}
for i in range(len(vectors)):
    for j in range(i+1, len(vectors)):
        dist = np.linalg.norm(vectors[i] - vectors[j])
        #pairwise_distances[(i, j)] = dist
        try:
            pairwise_distances[i][j] = dist
        except:
            pairwise_distances[i] = {j: dist}
        try:
            pairwise_distances[j][i] = dist
        except:
            pairwise_distances[j] = {i: dist}
nearest_neighbors = {}
five_nearest_neighbors = {}
for i in list(pairwise_distances.keys()):
    nearest_neighbors[i] = min(pairwise_distances[i], key=pairwise_distances[i].get)
    five_nearest_neighbors[i] = list(dict(sorted(pairwise_distances[i].items(), key=lambda item: item[1])).keys())[:5]
print (nearest_neighbors)
print (five_nearest_neighbors)

{0: 457, 1: 909, 2: 499, 3: 997, 4: 277, 5: 594, 6: 475, 7: 795, 8: 612, 9: 595, 10: 627, 11: 276, 12: 344, 13: 107, 14: 873, 15: 812, 16: 558, 17: 231, 18: 439, 19: 492, 20: 800, 21: 652, 22: 307, 23: 584, 24: 5, 25: 112, 26: 930, 27: 308, 28: 457, 29: 43, 30: 949, 31: 44, 32: 767, 33: 911, 34: 627, 35: 62, 36: 809, 37: 510, 38: 358, 39: 441, 40: 688, 41: 889, 42: 435, 43: 29, 44: 31, 45: 727, 46: 790, 47: 869, 48: 43, 49: 692, 50: 684, 51: 875, 52: 235, 53: 243, 54: 730, 55: 996, 56: 512, 57: 647, 58: 448, 59: 916, 60: 290, 61: 773, 62: 35, 63: 766, 64: 456, 65: 580, 66: 794, 67: 371, 68: 173, 69: 981, 70: 477, 71: 765, 72: 332, 73: 295, 74: 104, 75: 229, 76: 678, 77: 111, 78: 878, 79: 229, 80: 273, 81: 996, 82: 773, 83: 16, 84: 39, 85: 335, 86: 815, 87: 836, 88: 273, 89: 756, 90: 162, 91: 698, 92: 852, 93: 552, 94: 919, 95: 13, 96: 123, 97: 932, 98: 870, 99: 262, 100: 552, 101: 736, 102: 726, 103: 641, 104: 74, 105: 737, 106: 265, 107: 788, 108: 853, 109: 942, 110: 643, 111: 912, 11

In [4]:
# finding nearest neighbor of each vector using NaiveLSH:
from memristor.engine.model import NaiveLSH
from memristor.crossbar.model import LineResistanceCrossbar
from memristor.devices import StaticMemristor
# naive_lsh = NaiveLSH(
#     hash_size=10, # adjustable hyperparameter
#     crossbar_class=LineResistanceCrossbar,
#     crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
#     memristor_model_class=StaticMemristor,
#     memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
#     m=m,
#     r=1, # adjustable hyperparameter
# )
reps = 3 # adjustable hyperparameter (repetitions of the hashing)
all_bins = []
for _ in range(reps):
    naive_lsh = NaiveLSH(
        hash_size=5, # adjustable hyperparameter
        crossbar_class=LineResistanceCrossbar,
        crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
        memristor_model_class=StaticMemristor,
        memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
        m=m,
        r=1, # adjustable hyperparameter
    )
    bins = {}
    for i in range(len(vectors)):
        hash = naive_lsh.inference(vectors[i])
        # print (hash)
        if hash not in bins.keys():
            bins[hash] = [i]
        else:
            bins[hash].append(i)
    for bin in list(bins.values()):
        all_bins.append(bin)
#      {010:[1,5,7], 111:[5,6,7]}

# now at this point all_bins is a list like [[1,2], [1,3,5], ... ] where each element of all_bins is a bin containing indices of vectors that are likely
# to be close to each other. so now to find the nearest neighbor for each vector, we simply iterate through and check only those vectors that share a bin
# with it, so in this case for 1 we would check 2, 3, and 5, to find the nearest neighbor
nearest_neighbors_approx = {}
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors_approx[i] = None
    for bin in all_bins:
        if len(bin) > N/3:
            continue
        if i in bin:
            for j in bin:
                if j != i:
                    dist = np.linalg.norm(vectors[i] - vectors[j])
                    if dist < nearest_dist:
                        nearest_dist = dist 
                        nearest_neighbors_approx[i] = j
print (nearest_neighbors_approx)

{0: 457, 1: 909, 2: 499, 3: 78, 4: 867, 5: 594, 6: 297, 7: 795, 8: 612, 9: 595, 10: 627, 11: 362, 12: 344, 13: 107, 14: 873, 15: 812, 16: 558, 17: 231, 18: 439, 19: 492, 20: 800, 21: 652, 22: 858, 23: 584, 24: 5, 25: 112, 26: 930, 27: 943, 28: 457, 29: 79, 30: 949, 31: 677, 32: 279, 33: 911, 34: 627, 35: 62, 36: 809, 37: 510, 38: 358, 39: 523, 40: 688, 41: 853, 42: 435, 43: 48, 44: 473, 45: 727, 46: 790, 47: 869, 48: 43, 49: 558, 50: 684, 51: 801, 52: 203, 53: 858, 54: 730, 55: 996, 56: 512, 57: 242, 58: 448, 59: 841, 60: 512, 61: 623, 62: 35, 63: 434, 64: 456, 65: 580, 66: 794, 67: 371, 68: 173, 69: 981, 70: 66, 71: 765, 72: 332, 73: 295, 74: 104, 75: 229, 76: 322, 77: 841, 78: 758, 79: 845, 80: 273, 81: 263, 82: 366, 83: 498, 84: 239, 85: 22, 86: 815, 87: 836, 88: 273, 89: 692, 90: 162, 91: 619, 92: 335, 93: 252, 94: 175, 95: 13, 96: 123, 97: 932, 98: 743, 99: 483, 100: 239, 101: 229, 102: 94, 103: 948, 104: 74, 105: 737, 106: 739, 107: 788, 108: 853, 109: 942, 110: 643, 111: 912, 11

In [5]:
print (nearest_neighbors)
print (nearest_neighbors_approx)

{0: 457, 1: 909, 2: 499, 3: 997, 4: 277, 5: 594, 6: 475, 7: 795, 8: 612, 9: 595, 10: 627, 11: 276, 12: 344, 13: 107, 14: 873, 15: 812, 16: 558, 17: 231, 18: 439, 19: 492, 20: 800, 21: 652, 22: 307, 23: 584, 24: 5, 25: 112, 26: 930, 27: 308, 28: 457, 29: 43, 30: 949, 31: 44, 32: 767, 33: 911, 34: 627, 35: 62, 36: 809, 37: 510, 38: 358, 39: 441, 40: 688, 41: 889, 42: 435, 43: 29, 44: 31, 45: 727, 46: 790, 47: 869, 48: 43, 49: 692, 50: 684, 51: 875, 52: 235, 53: 243, 54: 730, 55: 996, 56: 512, 57: 647, 58: 448, 59: 916, 60: 290, 61: 773, 62: 35, 63: 766, 64: 456, 65: 580, 66: 794, 67: 371, 68: 173, 69: 981, 70: 477, 71: 765, 72: 332, 73: 295, 74: 104, 75: 229, 76: 678, 77: 111, 78: 878, 79: 229, 80: 273, 81: 996, 82: 773, 83: 16, 84: 39, 85: 335, 86: 815, 87: 836, 88: 273, 89: 756, 90: 162, 91: 698, 92: 852, 93: 552, 94: 919, 95: 13, 96: 123, 97: 932, 98: 870, 99: 262, 100: 552, 101: 736, 102: 726, 103: 641, 104: 74, 105: 737, 106: 265, 107: 788, 108: 853, 109: 942, 110: 643, 111: 912, 11

In [6]:
print (nearest_neighbors == nearest_neighbors_approx)
count = 0
cnt = 0
for i in range(N):
    if nearest_neighbors_approx[i] == nearest_neighbors[i]:
        count += 1
    if nearest_neighbors_approx[i] in five_nearest_neighbors[i]:
        cnt += 1
accuracy = count/N
top5_accuracy = cnt/N
print (accuracy)
print (top5_accuracy)

False
0.601
0.941


In [7]:
print (len(all_bins))
print (all_bins)
print ([len(bin) for bin in all_bins])
print ([len(bin) for bin in all_bins if len(bin) <= N/3])

45
[[0, 42, 45, 56, 60, 74, 143, 206, 253, 257, 327, 334, 348, 389, 404, 434, 435, 438, 452, 454, 502, 506, 510, 512, 517, 563, 567, 572, 590, 608, 634, 644, 670, 678, 679, 714, 727, 790, 825, 833, 839, 879, 880, 902, 913, 960, 973], [1, 3, 4, 5, 7, 8, 10, 12, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 31, 32, 34, 35, 38, 39, 44, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 62, 64, 65, 69, 71, 72, 73, 75, 76, 77, 78, 80, 81, 83, 85, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 106, 107, 109, 110, 111, 114, 116, 117, 120, 122, 123, 124, 125, 126, 127, 128, 130, 132, 133, 134, 135, 138, 140, 141, 144, 146, 149, 151, 152, 154, 155, 156, 157, 158, 161, 162, 163, 164, 165, 166, 167, 170, 171, 172, 175, 176, 177, 179, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 195, 196, 197, 198, 199, 201, 202, 204, 205, 207, 208, 211, 212, 216, 218, 219, 223, 225, 228, 229, 230, 231, 233, 234, 238, 239, 240, 241, 242, 243, 246, 247, 248, 250, 251, 252, 254, 

In [8]:
# when experimenting with like a bigger dataset and stuff come up with  a metric to compare these 2 dicts
# also compare the runtime complexities, cuz its possible that its working so well because of sth wrong in the implementation whiich results in the runtime
# just being the same as the brute force method

In [9]:
'''
things to check: 
- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity
- varying the parameters like N, m, hash size, etc. (try bigger/more data)
- using lineres_memristive_vmm not naive_memristive_vmm
- experiment with varying non-idealities
- experiment 2, create visualizations
- write section 4 of the paper    (rn highest priority is finalizing the experiment and section 4 of the paper)
- is the change made in StaticMemristor fine?
'''

'\nthings to check: \n- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity\n- varying the parameters like N, m, hash size, etc. (try bigger/more data)\n- using lineres_memristive_vmm not naive_memristive_vmm\n'