In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# experiment 1:
# first, creating a set of N randomly generated (m, 1) vectors:
import numpy as np
m = 10
N = 1000
# vectors = [np.random.rand(*(m, 1)) for _ in range(N)] # uniform distribution between 0 and 1
vectors = [np.random.randn(*(m, 1)) for _ in range(N)] # normal dist so can get negative vals too
# print (vectors)

In [3]:
# finding nearest neighbor of each vector using O(N^2) brute force approach:
'''
nearest_neighbors = {} # the keys will be the indices of the vectors from 0 to N-1, and the corresponding values will be the indices of its nearest neighbor
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors[i] = None
    for j in range(len(vectors)):
        if j != i:
            dist = np.linalg.norm(vectors[i] - vectors[j])
            if dist < nearest_dist:
                nearest_dist = dist
                nearest_neighbors[i] = j
print (nearest_neighbors) 
'''
# more efficient approach (although technically same big O runtime complexiity i think cuz N^2 vs N(N+1)/2) which will also allow us to find top 5 nearest neighbors easily
pairwise_distances = {} # {0: {1: 0.1, 2: 0.3, 3: 0.6}, 1: {0: 0.1, 2: 0.7, 3: 0.8}}
for i in range(len(vectors)):
    for j in range(i+1, len(vectors)):
        dist = np.linalg.norm(vectors[i] - vectors[j])
        #pairwise_distances[(i, j)] = dist
        try:
            pairwise_distances[i][j] = dist
        except:
            pairwise_distances[i] = {j: dist}
        try:
            pairwise_distances[j][i] = dist
        except:
            pairwise_distances[j] = {i: dist}
nearest_neighbors = {}
five_nearest_neighbors = {}
for i in list(pairwise_distances.keys()):
    nearest_neighbors[i] = min(pairwise_distances[i], key=pairwise_distances[i].get)
    five_nearest_neighbors[i] = list(dict(sorted(pairwise_distances[i].items(), key=lambda item: item[1])).keys())[:5]
print (nearest_neighbors)
print (five_nearest_neighbors)

{0: 953, 1: 903, 2: 705, 3: 293, 4: 730, 5: 993, 6: 419, 7: 834, 8: 432, 9: 989, 10: 102, 11: 71, 12: 228, 13: 350, 14: 980, 15: 121, 16: 948, 17: 627, 18: 436, 19: 807, 20: 281, 21: 908, 22: 343, 23: 287, 24: 743, 25: 971, 26: 717, 27: 373, 28: 442, 29: 170, 30: 885, 31: 961, 32: 873, 33: 618, 34: 671, 35: 626, 36: 194, 37: 527, 38: 86, 39: 436, 40: 221, 41: 743, 42: 328, 43: 741, 44: 227, 45: 969, 46: 6, 47: 196, 48: 233, 49: 638, 50: 83, 51: 897, 52: 233, 53: 897, 54: 424, 55: 606, 56: 426, 57: 426, 58: 317, 59: 857, 60: 330, 61: 301, 62: 174, 63: 48, 64: 795, 65: 858, 66: 621, 67: 736, 68: 871, 69: 295, 70: 529, 71: 131, 72: 311, 73: 370, 74: 24, 75: 270, 76: 506, 77: 952, 78: 19, 79: 586, 80: 910, 81: 186, 82: 375, 83: 20, 84: 890, 85: 736, 86: 345, 87: 249, 88: 482, 89: 975, 90: 327, 91: 440, 92: 510, 93: 793, 94: 552, 95: 265, 96: 128, 97: 880, 98: 197, 99: 757, 100: 819, 101: 928, 102: 642, 103: 219, 104: 939, 105: 584, 106: 115, 107: 858, 108: 393, 109: 413, 110: 476, 111: 290

In [4]:
# finding nearest neighbor of each vector using NaiveLSH:
from memristor.engine.model import NaiveLSH
from memristor.crossbar.model import LineResistanceCrossbar
from memristor.devices import StaticMemristor
# naive_lsh = NaiveLSH(
#     hash_size=10, # adjustable hyperparameter
#     crossbar_class=LineResistanceCrossbar,
#     crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
#     memristor_model_class=StaticMemristor,
#     memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
#     m=m,
#     r=1, # adjustable hyperparameter
# )
reps = 3 # adjustable hyperparameter (repetitions of the hashing)
all_bins = []
for _ in range(reps):
    naive_lsh = NaiveLSH(
        hash_size=5, # adjustable hyperparameter
        crossbar_class=LineResistanceCrossbar,
        crossbar_params={'r_wl': 20, 'r_bl': 20, 'r_in':10, 'r_out':10, 'V_SOURCE_MODE':'|_|'},
        memristor_model_class=StaticMemristor,
        memristor_params={'frequency': 1e8, 'temperature': 273 + 40},
        m=m,
        r=1, # adjustable hyperparameter
    )
    bins = {}
    for i in range(len(vectors)):
        hash = naive_lsh.inference(vectors[i])
        # print (hash)
        if hash not in bins.keys():
            bins[hash] = [i]
        else:
            bins[hash].append(i)
    for bin in list(bins.values()):
        all_bins.append(bin)
#      {010:[1,5,7], 111:[5,6,7]}

# now at this point all_bins is a list like [[1,2], [1,3,5], ... ] where each element of all_bins is a bin containing indices of vectors that are likely
# to be close to each other. so now to find the nearest neighbor for each vector, we simply iterate through and check only those vectors that share a bin
# with it, so in this case for 1 we would check 2, 3, and 5, to find the nearest neighbor
nearest_neighbors_approx = {}
for i in range(len(vectors)):
    nearest_dist = float('inf')
    nearest_neighbors_approx[i] = None
    for bin in all_bins:
        if len(bin) > N/reps:
            continue
        if i in bin:
            for j in bin:
                if j != i:
                    dist = np.linalg.norm(vectors[i] - vectors[j])
                    if dist < nearest_dist:
                        nearest_dist = dist 
                        nearest_neighbors_approx[i] = j
print (nearest_neighbors_approx)

  self.fitted_w = torch.tensor([[self.memristors[i][j].g_linfit for j in range(ideal_w.shape[1])]


{0: 953, 1: 222, 2: 705, 3: 293, 4: 730, 5: 993, 6: 419, 7: 834, 8: 432, 9: 336, 10: 102, 11: 71, 12: 113, 13: 350, 14: 980, 15: 196, 16: 948, 17: 97, 18: 39, 19: 78, 20: 281, 21: 179, 22: 343, 23: 287, 24: 743, 25: 971, 26: 717, 27: 361, 28: 442, 29: 170, 30: 885, 31: 882, 32: 961, 33: 618, 34: 671, 35: 626, 36: 73, 37: 573, 38: 56, 39: 436, 40: 924, 41: 743, 42: 114, 43: 741, 44: 227, 45: 969, 46: 6, 47: 15, 48: 63, 49: 638, 50: 157, 51: 897, 52: 233, 53: 897, 54: 440, 55: 606, 56: 186, 57: 935, 58: 21, 59: 857, 60: 330, 61: 923, 62: 95, 63: 48, 64: 795, 65: 202, 66: 210, 67: 736, 68: 218, 69: 295, 70: 322, 71: 131, 72: 197, 73: 370, 74: 24, 75: 270, 76: 550, 77: 13, 78: 19, 79: 586, 80: 910, 81: 118, 82: 375, 83: 864, 84: 890, 85: 736, 86: 293, 87: 249, 88: 482, 89: 975, 90: 327, 91: 440, 92: 166, 93: 652, 94: 327, 95: 265, 96: 128, 97: 880, 98: 197, 99: 757, 100: 819, 101: 928, 102: 10, 103: 880, 104: 939, 105: 584, 106: 915, 107: 858, 108: 161, 109: 413, 110: 476, 111: 290, 112: 1

In [5]:
print (nearest_neighbors)
print (nearest_neighbors_approx)

{0: 953, 1: 903, 2: 705, 3: 293, 4: 730, 5: 993, 6: 419, 7: 834, 8: 432, 9: 989, 10: 102, 11: 71, 12: 228, 13: 350, 14: 980, 15: 121, 16: 948, 17: 627, 18: 436, 19: 807, 20: 281, 21: 908, 22: 343, 23: 287, 24: 743, 25: 971, 26: 717, 27: 373, 28: 442, 29: 170, 30: 885, 31: 961, 32: 873, 33: 618, 34: 671, 35: 626, 36: 194, 37: 527, 38: 86, 39: 436, 40: 221, 41: 743, 42: 328, 43: 741, 44: 227, 45: 969, 46: 6, 47: 196, 48: 233, 49: 638, 50: 83, 51: 897, 52: 233, 53: 897, 54: 424, 55: 606, 56: 426, 57: 426, 58: 317, 59: 857, 60: 330, 61: 301, 62: 174, 63: 48, 64: 795, 65: 858, 66: 621, 67: 736, 68: 871, 69: 295, 70: 529, 71: 131, 72: 311, 73: 370, 74: 24, 75: 270, 76: 506, 77: 952, 78: 19, 79: 586, 80: 910, 81: 186, 82: 375, 83: 20, 84: 890, 85: 736, 86: 345, 87: 249, 88: 482, 89: 975, 90: 327, 91: 440, 92: 510, 93: 793, 94: 552, 95: 265, 96: 128, 97: 880, 98: 197, 99: 757, 100: 819, 101: 928, 102: 642, 103: 219, 104: 939, 105: 584, 106: 115, 107: 858, 108: 393, 109: 413, 110: 476, 111: 290

In [6]:
print (nearest_neighbors == nearest_neighbors_approx)
count = 0
cnt = 0
for i in range(N):
    if nearest_neighbors_approx[i] == nearest_neighbors[i]:
        count += 1
    if nearest_neighbors_approx[i] in five_nearest_neighbors[i]:
        cnt += 1
accuracy = count/N
top5_accuracy = cnt/N
print (accuracy)
print (top5_accuracy)

False
0.626
0.966


In [7]:
print (len(all_bins))
print (all_bins)
print ([len(bin) for bin in all_bins])
print ([len(bin) for bin in all_bins if len(bin) <= N/reps])

96
[[0, 70, 87, 141, 150, 161, 162, 186, 198, 235, 249, 250, 304, 306, 310, 322, 330, 352, 418, 430, 457, 459, 465, 502, 503, 547, 552, 554, 559, 688, 717, 723, 816, 850, 851, 939], [1, 26, 39, 117, 129, 132, 164, 199, 205, 206, 231, 256, 264, 281, 386, 396, 425, 432, 436, 439, 453, 471, 486, 499, 500, 561, 602, 614, 627, 628, 671, 676, 683, 687, 699, 701, 735, 738, 768, 774, 779, 815, 833, 852, 866, 875, 945, 970], [2, 8, 43, 51, 53, 67, 72, 85, 93, 97, 98, 100, 101, 113, 114, 135, 144, 151, 160, 193, 194, 197, 211, 214, 217, 225, 229, 234, 251, 257, 260, 278, 280, 284, 299, 305, 313, 324, 326, 333, 340, 348, 351, 356, 357, 359, 360, 368, 374, 401, 440, 447, 449, 496, 501, 511, 513, 518, 519, 533, 553, 557, 571, 582, 596, 610, 624, 650, 652, 666, 667, 694, 700, 704, 719, 736, 741, 745, 746, 750, 759, 813, 824, 825, 826, 827, 830, 859, 871, 883, 891, 897, 901, 907, 928, 932], [3, 5, 7, 13, 20, 23, 31, 52, 54, 78, 80, 86, 138, 139, 140, 165, 176, 209, 213, 233, 252, 258, 271, 282, 291, 

In [8]:
# when experimenting with like a bigger dataset and stuff come up with  a metric to compare these 2 dicts
# also compare the runtime complexities, cuz its possible that its working so well because of sth wrong in the implementation whiich results in the runtime
# just being the same as the brute force method

In [9]:
'''
things to check: 
- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity
- varying the parameters like N, m, hash size, etc. (try bigger/more data)
- using lineres_memristive_vmm not naive_memristive_vmm
- experiment with varying non-idealities
- experiment 2, create visualizations
- write section 4 of the paper    (rn highest priority is finalizing the experiment and section 4 of the paper)
- is the change made in StaticMemristor fine?
'''

'\nthings to check: \n- sizes of the bins, should not be big because otherwise there would be no runtime improvement -> get metrics for reduction of search space/runtime or space complexity\n- varying the parameters like N, m, hash size, etc. (try bigger/more data)\n- using lineres_memristive_vmm not naive_memristive_vmm\n- experiment with varying non-idealities\n- experiment 2, create visualizations\n- write section 4 of the paper    (rn highest priority is finalizing the experiment and section 4 of the paper)\n- is the change made in StaticMemristor fine?\n'