In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# experiment 1:
# first, creating a set of N randomly generated (m, 1) vectors:
import numpy as np
m = 10
N = 1000
# vectors = [np.random.rand(*(m, 1)) for _ in range(N)] # uniform distribution between 0 and 1
vectors = [np.random.randn(*(m, 1)) for _ in range(N)] # normal dist so can get negative vals too
# print (vectors)

In [3]:
# finding nearest neighbor of each vector using O(N^2) brute force approach:
nearest_neighbors = {} # the keys will be the indices of the vectors from 0 to N-1, and the corresponding values will be the indices of its nearest neighbor
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors[i] = None
    for j in range(len(vectors)):
        if j != i:
            dist = np.linalg.norm(vectors[i] - vectors[j])
            if dist < nearest_dist:
                nearest_dist = dist
                nearest_neighbors[i] = j
print (nearest_neighbors) 

{0: 687, 1: 360, 2: 688, 3: 302, 4: 584, 5: 621, 6: 380, 7: 672, 8: 527, 9: 233, 10: 336, 11: 366, 12: 221, 13: 719, 14: 950, 15: 199, 16: 156, 17: 239, 18: 870, 19: 45, 20: 703, 21: 476, 22: 93, 23: 556, 24: 784, 25: 733, 26: 775, 27: 655, 28: 315, 29: 419, 30: 987, 31: 852, 32: 984, 33: 698, 34: 317, 35: 170, 36: 148, 37: 257, 38: 828, 39: 870, 40: 619, 41: 346, 42: 914, 43: 506, 44: 349, 45: 255, 46: 670, 47: 315, 48: 402, 49: 165, 50: 724, 51: 482, 52: 521, 53: 885, 54: 452, 55: 701, 56: 212, 57: 959, 58: 75, 59: 344, 60: 492, 61: 842, 62: 618, 63: 138, 64: 286, 65: 602, 66: 484, 67: 637, 68: 144, 69: 477, 70: 472, 71: 241, 72: 55, 73: 998, 74: 633, 75: 58, 76: 276, 77: 513, 78: 195, 79: 647, 80: 104, 81: 690, 82: 170, 83: 572, 84: 610, 85: 185, 86: 683, 87: 654, 88: 415, 89: 464, 90: 637, 91: 17, 92: 921, 93: 806, 94: 900, 95: 731, 96: 933, 97: 378, 98: 40, 99: 263, 100: 299, 101: 791, 102: 815, 103: 14, 104: 17, 105: 336, 106: 250, 107: 243, 108: 16, 109: 806, 110: 865, 111: 729,

In [4]:
# finding nearest neighbor of each vector using NaiveLSH:
from memristor.engine.model import NaiveLSH
from memristor.crossbar.model import LineResistanceCrossbar
from memristor.devices import StaticMemristor
# naive_lsh = NaiveLSH(
#     hash_size=10, # adjustable hyperparameter
#     crossbar_class=LineResistanceCrossbar,
#     crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
#     memristor_model_class=StaticMemristor,
#     memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
#     m=m,
#     r=1, # adjustable hyperparameter
# )
reps = 3 # adjustable hyperparameter (repetitions of the hashing)
all_bins = []
for _ in range(reps):
    naive_lsh = NaiveLSH(
        hash_size=3, # adjustable hyperparameter
        crossbar_class=LineResistanceCrossbar,
        crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
        memristor_model_class=StaticMemristor,
        memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
        m=m,
        r=1, # adjustable hyperparameter
    )
    bins = {}
    for i in range(len(vectors)):
        hash = naive_lsh.inference(vectors[i])
        # print (hash)
        if hash not in bins.keys():
            bins[hash] = [i]
        else:
            bins[hash].append(i)
    for bin in list(bins.values()):
        all_bins.append(bin)
#      {010:[1,5,7], 111:[5,6,7]}

# now at this point all_bins is a list like [[1,2], [1,3,5], ... ] where each element of all_bins is a bin containing indices of vectors that are likely
# to be close to each other. so now to find the nearest neighbor for each vector, we simply iterate through and check only those vectors that share a bin
# with it, so in this case for 1 we would check 2, 3, and 5, to find the nearest neighbor
nearest_neighbors_approx = {}
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors_approx[i] = None
    for bin in all_bins:
        if len(bin) > N/3:
            continue
        if i in bin:
            for j in bin:
                if j != i:
                    dist = np.linalg.norm(vectors[i] - vectors[j])
                    if dist < nearest_dist:
                        nearest_dist = dist 
                        nearest_neighbors_approx[i] = j
print (nearest_neighbors_approx)

{0: 687, 1: 360, 2: 606, 3: 302, 4: 584, 5: 621, 6: 380, 7: 14, 8: 527, 9: 682, 10: 336, 11: 366, 12: 221, 13: 863, 14: 283, 15: 381, 16: 943, 17: 239, 18: 870, 19: 45, 20: 703, 21: 476, 22: 93, 23: 322, 24: 784, 25: 733, 26: 775, 27: 655, 28: 315, 29: 419, 30: 987, 31: 852, 32: 984, 33: 933, 34: 710, 35: 170, 36: 238, 37: 74, 38: 907, 39: 870, 40: 619, 41: 346, 42: 914, 43: 966, 44: 957, 45: 255, 46: 670, 47: 315, 48: 402, 49: 165, 50: 724, 51: 482, 52: 521, 53: 885, 54: 346, 55: 701, 56: 212, 57: 959, 58: 75, 59: 344, 60: 492, 61: 790, 62: 618, 63: 138, 64: 286, 65: 602, 66: 484, 67: 71, 68: 144, 69: 477, 70: 472, 71: 241, 72: 55, 73: 998, 74: 633, 75: 58, 76: 276, 77: 513, 78: 195, 79: 722, 80: 104, 81: 690, 82: 170, 83: 572, 84: 610, 85: 185, 86: 683, 87: 654, 88: 415, 89: 464, 90: 637, 91: 340, 92: 921, 93: 806, 94: 900, 95: 731, 96: 203, 97: 16, 98: 40, 99: 263, 100: 934, 101: 148, 102: 815, 103: 14, 104: 17, 105: 787, 106: 245, 107: 243, 108: 487, 109: 348, 110: 865, 111: 534, 1

In [5]:
print (nearest_neighbors)
print (nearest_neighbors_approx)

{0: 687, 1: 360, 2: 688, 3: 302, 4: 584, 5: 621, 6: 380, 7: 672, 8: 527, 9: 233, 10: 336, 11: 366, 12: 221, 13: 719, 14: 950, 15: 199, 16: 156, 17: 239, 18: 870, 19: 45, 20: 703, 21: 476, 22: 93, 23: 556, 24: 784, 25: 733, 26: 775, 27: 655, 28: 315, 29: 419, 30: 987, 31: 852, 32: 984, 33: 698, 34: 317, 35: 170, 36: 148, 37: 257, 38: 828, 39: 870, 40: 619, 41: 346, 42: 914, 43: 506, 44: 349, 45: 255, 46: 670, 47: 315, 48: 402, 49: 165, 50: 724, 51: 482, 52: 521, 53: 885, 54: 452, 55: 701, 56: 212, 57: 959, 58: 75, 59: 344, 60: 492, 61: 842, 62: 618, 63: 138, 64: 286, 65: 602, 66: 484, 67: 637, 68: 144, 69: 477, 70: 472, 71: 241, 72: 55, 73: 998, 74: 633, 75: 58, 76: 276, 77: 513, 78: 195, 79: 647, 80: 104, 81: 690, 82: 170, 83: 572, 84: 610, 85: 185, 86: 683, 87: 654, 88: 415, 89: 464, 90: 637, 91: 17, 92: 921, 93: 806, 94: 900, 95: 731, 96: 933, 97: 378, 98: 40, 99: 263, 100: 299, 101: 791, 102: 815, 103: 14, 104: 17, 105: 336, 106: 250, 107: 243, 108: 16, 109: 806, 110: 865, 111: 729,

In [6]:
print (nearest_neighbors == nearest_neighbors_approx)
count = 0
for i in range(N):
    if nearest_neighbors_approx[i] == nearest_neighbors[i]:
        count += 1
accuracy = count/N
print (accuracy)

False
0.759


In [7]:
print (len(all_bins))
print (all_bins)
print ([len(bin) for bin in all_bins])

22
[[0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 48, 50, 51, 52, 53, 54, 55, 56, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 100, 101, 102, 103, 106, 107, 108, 109, 110, 111, 112, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 155, 156, 157, 158, 159, 160, 161, 162, 163, 166, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 182, 183, 184, 185, 186, 187, 188, 189, 190, 193, 194, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, 208, 210, 212, 213, 214, 215, 217, 218, 220, 221, 222, 223, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 2

In [8]:
# when experimenting with like a bigger dataset and stuff come up with  a metric to compare these 2 dicts
# also compare the runtime complexities, cuz its possible that its working so well because of sth wrong in the implementation whiich results in the runtime
# just being the same as the brute force method

In [9]:
'''
things to check: 
- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity
- varying the parameters like N, m, hash size, etc. (try bigger/more data)
- using lineres_memristive_vmm not naive_memristive_vmm
- experiment with varying non-idealities
- experiment 2, create visualizations
- write section 4 of the paper    (rn highest priority is finalizing the experiment and section 4 of the paper)
- is the change made in StaticMemristor fine?
'''

'\nthings to check: \n- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity\n- varying the parameters like N, m, hash size, etc. (try bigger/more data)\n- using lineres_memristive_vmm not naive_memristive_vmm\n'