In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# experiment 1:
# first, creating a set of N randomly generated (m, 1) vectors:
import numpy as np
m = 10
N = 1000
# vectors = [np.random.rand(*(m, 1)) for _ in range(N)] # uniform distribution between 0 and 1
vectors = [np.random.randn(*(m, 1)) for _ in range(N)] # normal dist so can get negative vals too
# print (vectors)

In [3]:
# finding nearest neighbor of each vector using O(N^2) brute force approach:
'''
nearest_neighbors = {} # the keys will be the indices of the vectors from 0 to N-1, and the corresponding values will be the indices of its nearest neighbor
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors[i] = None
    for j in range(len(vectors)):
        if j != i:
            dist = np.linalg.norm(vectors[i] - vectors[j])
            if dist < nearest_dist:
                nearest_dist = dist
                nearest_neighbors[i] = j
print (nearest_neighbors) 
'''
# more efficient approach (although technically same big O runtime complexiity i think cuz N^2 vs N(N+1)/2) which will also allow us to find top 5 nearest neighbors easily
pairwise_distances = {} # {0: {1: 0.1, 2: 0.3, 3: 0.6}, 1: {0: 0.1, 2: 0.7, 3: 0.8}}
for i in range(len(vectors)):
    for j in range(i+1, len(vectors)):
        dist = np.linalg.norm(vectors[i] - vectors[j])
        #pairwise_distances[(i, j)] = dist
        try:
            pairwise_distances[i][j] = dist
        except:
            pairwise_distances[i] = {j: dist}
        try:
            pairwise_distances[j][i] = dist
        except:
            pairwise_distances[j] = {i: dist}
nearest_neighbors = {}
five_nearest_neighbors = {}
for i in list(pairwise_distances.keys()):
    nearest_neighbors[i] = min(pairwise_distances[i], key=pairwise_distances[i].get)
    five_nearest_neighbors[i] = list(dict(sorted(pairwise_distances[i].items(), key=lambda item: item[1])).keys())[:5]
print (nearest_neighbors)
print (five_nearest_neighbors)

{0: 624, 1: 711, 2: 1, 3: 27, 4: 857, 5: 727, 6: 68, 7: 304, 8: 418, 9: 367, 10: 504, 11: 479, 12: 437, 13: 328, 14: 555, 15: 770, 16: 700, 17: 790, 18: 806, 19: 668, 20: 330, 21: 763, 22: 433, 23: 750, 24: 445, 25: 688, 26: 375, 27: 3, 28: 668, 29: 809, 30: 187, 31: 483, 32: 681, 33: 534, 34: 909, 35: 996, 36: 944, 37: 86, 38: 202, 39: 354, 40: 300, 41: 265, 42: 480, 43: 970, 44: 647, 45: 668, 46: 329, 47: 523, 48: 432, 49: 188, 50: 856, 51: 405, 52: 841, 53: 709, 54: 840, 55: 856, 56: 753, 57: 593, 58: 709, 59: 37, 60: 666, 61: 81, 62: 252, 63: 359, 64: 807, 65: 523, 66: 775, 67: 749, 68: 6, 69: 594, 70: 982, 71: 247, 72: 227, 73: 131, 74: 185, 75: 660, 76: 879, 77: 326, 78: 815, 79: 940, 80: 257, 81: 61, 82: 471, 83: 796, 84: 126, 85: 238, 86: 860, 87: 792, 88: 240, 89: 799, 90: 406, 91: 846, 92: 318, 93: 284, 94: 487, 95: 758, 96: 321, 97: 418, 98: 469, 99: 500, 100: 205, 101: 208, 102: 880, 103: 242, 104: 958, 105: 520, 106: 520, 107: 517, 108: 620, 109: 217, 110: 540, 111: 384, 1

In [4]:
# finding nearest neighbor of each vector using NaiveLSH:
from memristor.engine.model import NaiveLSH
from memristor.crossbar.model import LineResistanceCrossbar
from memristor.devices import StaticMemristor
# naive_lsh = NaiveLSH(
#     hash_size=10, # adjustable hyperparameter
#     crossbar_class=LineResistanceCrossbar,
#     crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
#     memristor_model_class=StaticMemristor,
#     memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
#     m=m,
#     r=1, # adjustable hyperparameter
# )
reps = 3 # adjustable hyperparameter (repetitions of the hashing)
all_bins = []
for _ in range(reps):
    naive_lsh = NaiveLSH(
        hash_size=3, # adjustable hyperparameter
        crossbar_class=LineResistanceCrossbar,
        crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
        memristor_model_class=StaticMemristor,
        memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
        m=m,
        r=1, # adjustable hyperparameter
    )
    bins = {}
    for i in range(len(vectors)):
        hash = naive_lsh.inference(vectors[i])
        # print (hash)
        if hash not in bins.keys():
            bins[hash] = [i]
        else:
            bins[hash].append(i)
    for bin in list(bins.values()):
        all_bins.append(bin)
#      {010:[1,5,7], 111:[5,6,7]}

# now at this point all_bins is a list like [[1,2], [1,3,5], ... ] where each element of all_bins is a bin containing indices of vectors that are likely
# to be close to each other. so now to find the nearest neighbor for each vector, we simply iterate through and check only those vectors that share a bin
# with it, so in this case for 1 we would check 2, 3, and 5, to find the nearest neighbor
nearest_neighbors_approx = {}
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors_approx[i] = None
    for bin in all_bins:
        if len(bin) > N/3:
            continue
        if i in bin:
            for j in bin:
                if j != i:
                    dist = np.linalg.norm(vectors[i] - vectors[j])
                    if dist < nearest_dist:
                        nearest_dist = dist 
                        nearest_neighbors_approx[i] = j
print (nearest_neighbors_approx)

{0: 624, 1: 711, 2: None, 3: None, 4: None, 5: 670, 6: None, 7: 304, 8: None, 9: 367, 10: None, 11: 479, 12: 437, 13: 550, 14: None, 15: None, 16: 700, 17: 965, 18: 806, 19: 784, 20: 787, 21: 763, 22: 657, 23: 750, 24: 708, 25: None, 26: 375, 27: 888, 28: 19, 29: 809, 30: 187, 31: 483, 32: 681, 33: 534, 34: 909, 35: None, 36: None, 37: 86, 38: None, 39: 551, 40: 300, 41: 265, 42: 480, 43: 970, 44: None, 45: 876, 46: None, 47: 523, 48: 432, 49: None, 50: 856, 51: 405, 52: 860, 53: 758, 54: None, 55: 856, 56: 753, 57: None, 58: None, 59: None, 60: 666, 61: 81, 62: 252, 63: 359, 64: None, 65: 523, 66: 775, 67: 749, 68: None, 69: None, 70: None, 71: None, 72: None, 73: 622, 74: None, 75: 660, 76: 107, 77: 326, 78: None, 79: None, 80: 970, 81: 61, 82: 471, 83: 154, 84: 126, 85: 271, 86: 860, 87: None, 88: None, 89: None, 90: 406, 91: None, 92: 318, 93: None, 94: 487, 95: 758, 96: 321, 97: 735, 98: None, 99: 500, 100: 205, 101: None, 102: 929, 103: 62, 104: 958, 105: 520, 106: None, 107: 517

In [5]:
print (nearest_neighbors)
print (nearest_neighbors_approx)

{0: 624, 1: 711, 2: 1, 3: 27, 4: 857, 5: 727, 6: 68, 7: 304, 8: 418, 9: 367, 10: 504, 11: 479, 12: 437, 13: 328, 14: 555, 15: 770, 16: 700, 17: 790, 18: 806, 19: 668, 20: 330, 21: 763, 22: 433, 23: 750, 24: 445, 25: 688, 26: 375, 27: 3, 28: 668, 29: 809, 30: 187, 31: 483, 32: 681, 33: 534, 34: 909, 35: 996, 36: 944, 37: 86, 38: 202, 39: 354, 40: 300, 41: 265, 42: 480, 43: 970, 44: 647, 45: 668, 46: 329, 47: 523, 48: 432, 49: 188, 50: 856, 51: 405, 52: 841, 53: 709, 54: 840, 55: 856, 56: 753, 57: 593, 58: 709, 59: 37, 60: 666, 61: 81, 62: 252, 63: 359, 64: 807, 65: 523, 66: 775, 67: 749, 68: 6, 69: 594, 70: 982, 71: 247, 72: 227, 73: 131, 74: 185, 75: 660, 76: 879, 77: 326, 78: 815, 79: 940, 80: 257, 81: 61, 82: 471, 83: 796, 84: 126, 85: 238, 86: 860, 87: 792, 88: 240, 89: 799, 90: 406, 91: 846, 92: 318, 93: 284, 94: 487, 95: 758, 96: 321, 97: 418, 98: 469, 99: 500, 100: 205, 101: 208, 102: 880, 103: 242, 104: 958, 105: 520, 106: 520, 107: 517, 108: 620, 109: 217, 110: 540, 111: 384, 1

In [7]:
print (nearest_neighbors == nearest_neighbors_approx)
count = 0
cnt = 0
for i in range(N):
    if nearest_neighbors_approx[i] == nearest_neighbors[i]:
        count += 1
    if nearest_neighbors_approx[i] in five_nearest_neighbors[i]:
        cnt += 1
accuracy = count/N
top5_accuracy = cnt/N
print (accuracy)
print (top5_accuracy)

False
0.459
0.66


In [7]:
print (len(all_bins))
print (all_bins)
print ([len(bin) for bin in all_bins])

22
[[0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 48, 50, 51, 52, 53, 54, 55, 56, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 100, 101, 102, 103, 106, 107, 108, 109, 110, 111, 112, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 155, 156, 157, 158, 159, 160, 161, 162, 163, 166, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 182, 183, 184, 185, 186, 187, 188, 189, 190, 193, 194, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, 208, 210, 212, 213, 214, 215, 217, 218, 220, 221, 222, 223, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 2

In [8]:
# when experimenting with like a bigger dataset and stuff come up with  a metric to compare these 2 dicts
# also compare the runtime complexities, cuz its possible that its working so well because of sth wrong in the implementation whiich results in the runtime
# just being the same as the brute force method

In [9]:
'''
things to check: 
- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity
- varying the parameters like N, m, hash size, etc. (try bigger/more data)
- using lineres_memristive_vmm not naive_memristive_vmm
- experiment with varying non-idealities
- experiment 2, create visualizations
- write section 4 of the paper    (rn highest priority is finalizing the experiment and section 4 of the paper)
- is the change made in StaticMemristor fine?
'''

'\nthings to check: \n- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity\n- varying the parameters like N, m, hash size, etc. (try bigger/more data)\n- using lineres_memristive_vmm not naive_memristive_vmm\n'