In [1]:
import numpy as np
from PIL import Image
import csv
import os
from os import listdir
from os.path import isfile, join, splitext
import shutil
import random
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow

import faiss

%matplotlib inline

In [2]:
# generate test np
d = 2                           # dimension
nb = 10                      # database size
nq = 1                     # nb of queries
np.random.seed(1234)             # make reproducible

In [3]:
xb = np.random.random((nb, d)).astype('float32')

In [4]:
xq = np.random.random((nq, d)).astype('float32')

In [5]:
# check np shape
print('xb.shape', xb.shape)
print('xq.shape', xq.shape)
print('xb[0]', xb[0])
print('xb[1]', xb[1])
#print('xb[99]', xb[99])

xb.shape (10, 2)
xq.shape (1, 2)
xb[0] [0.19151945 0.62210876]
xb[1] [0.43772775 0.7853586 ]


# 4096d - 1M dataset test on CPU machine

In [6]:
index = None

In [7]:
# generate test np
d = 4096                           # dimension
nb = 1000000                      # database size
nq = 1                     # nb of queries
np.random.seed(1234)             # make reproducible

In [8]:
%time xb = np.random.random((nb, d)).astype('float32')

CPU times: user 40.6 s, sys: 9.77 s, total: 50.3 s
Wall time: 50.3 s


In [9]:
%time xq = np.random.random((nq, d)).astype('float32')

CPU times: user 552 µs, sys: 0 ns, total: 552 µs
Wall time: 308 µs


In [10]:
# check np shape
print('xb.shape', xb.shape)
print('xq.shape', xq.shape)
print('xb[0]', xb[0])
print('xb[1]', xb[1])


xb.shape (1000000, 4096)
xq.shape (1, 4096)
xb[0] [0.19151945 0.62210876 0.43772775 ... 0.9750933  0.9089286  0.8868944 ]
xb[1] [0.9369065  0.95599407 0.38951334 ... 0.6071783  0.3248492  0.75957   ]


In [11]:
index = faiss.IndexFlatL2(d)   # build the index
print('index.is_trained : ', index.is_trained)

index.is_trained :  True


In [12]:
%time index.add(xb)                  # add vectors to the index
print('index.ntotal : ', index.ntotal)

CPU times: user 2.77 s, sys: 3.12 s, total: 5.9 s
Wall time: 5.89 s
index.ntotal :  1000000


In [13]:
k = 100                          
%time D, I = index.search(xq, k)

CPU times: user 6.04 s, sys: 306 ms, total: 6.35 s
Wall time: 2 s


In [14]:
k = 1000                          
%time D, I = index.search(xq, k)

CPU times: user 5.5 s, sys: 225 ms, total: 5.72 s
Wall time: 1.96 s


In [15]:
print('I[0] : ', I[0])
print('I.shape', I.shape)

I[0] :  [288594 318967 288749 715358 626433 717708 859066  28088  17516 586587
  56920  81632 996792 530640 774878 159668 254013 631636 952097  94876
 108618 893410 308878 876131 704912 452180 892849 149782  19689 624368
  45894  47102 522086 547961 199763 999605 729942 603030 499108 924934
 204112 165712 611590 243190 230616 804612  95617 841005 544177  69768
  13269  60364 169889 912615 596901 735385 172814 747071 939262 448870
  38643 973502 456762 687696 245976 872831 780143 992709 539491 284597
 191090 988863 273677 591019 199717 409464 416125 407840 709507  86768
 886744 155440 850327  86562 616932 418898 503567 669668 563134 321301
  55910 173758 741647 952302 123849  55554 499498 764378 469128 326968
 173676 176368 194994 804002  13622 369099 560295 491733 537778 183278
  47802 823482 638972 500194 671224 607460 180233 311296 748047 740674
  22107 483437 856042  90932 763179 677189  66732 288396  51200 536942
 491706 657952 390758 954419 888594 267569 494699 225104 553111 20081

In [17]:
k = 1000000                     
%time D, I = index.search(xq, k)

CPU times: user 5.67 s, sys: 204 ms, total: 5.88 s
Wall time: 1.94 s


# consume total 32G memory - 1M

# 4096d - 1M dataset - IndexIVFFlat - test on CPU machine
### https://github.com/facebookresearch/faiss/wiki/Faster-search

In [18]:
index = None
quantizer = None

In [19]:
nlist = 100
%time quantizer = faiss.IndexFlatL2(d)  # the other index


CPU times: user 42 µs, sys: 8 µs, total: 50 µs
Wall time: 42.9 µs


In [20]:
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
       # here we specify METRIC_L2, by default it performs inner-product search
%time index.train(xb)
print('index.is_trained', index.is_trained)


CPU times: user 18.9 s, sys: 300 ms, total: 19.2 s
Wall time: 8.73 s
index.is_trained True


In [21]:
# check np shape
print('xb.shape', xb.shape)

xb.shape (1000000, 4096)


In [22]:
%time index.add(xb)                  # add may be a bit slower as well

CPU times: user 39.3 s, sys: 7.99 s, total: 47.2 s
Wall time: 16.7 s


In [23]:
k = 100
%time D, I = index.search(xq, k)     # actual search
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 1 s, sys: 45.6 ms, total: 1.05 s
Wall time: 50.2 ms
I[:5] :  [[ 17516 996792 704912 176368 954419 494699 496710 517952 607979 979484
  105983 295458 449051 311091 526109 342930 195438 237684 582749 967920
  992917 602159 889923 254086 839513 627050 334330 198850 351587 655281
  343611 658278 176434 961115 884359  34328 440934 436925 273581 312648
  282765 744637 209712 735236 660071 210883 481372 857491 386408 436200
  472475 181153 639618 411730 570182 350001 635997  40831 417408 712217
  441586 972743 716412 161942 246741  75285 803035 447512 220153 298677
  203491 298138 836838  66162 581952 960145 590166 167672 171610 997327
  513954 335598 962371 100995 858014 216834   2967 198443 595122 999550
  961407 173950 133647 965664 851552 314198 713407 867221 250371 323723]]
I[-5:] :  [[ 17516 996792 704912 176368 954419 494699 496710 517952 607979 979484
  105983 295458 449051 311091 526109 342930 195438 237684 582749 967920
  992917 602159 889923 254086 839513 627050 334

In [24]:
k = 1000000
%time D, I = index.search(xq, k)     # actual search
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 1.91 s, sys: 79.5 ms, total: 1.99 s
Wall time: 91.7 ms
I[:5] :  [[ 17516 996792 704912 ...     -1     -1     -1]]
I[-5:] :  [[ 17516 996792 704912 ...     -1     -1     -1]]


In [25]:
k = 100
index.nprobe = 10              # default nprobe is 1, try a few more - getting slow
%time D, I = index.search(xq, k)
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 4.68 s, sys: 241 ms, total: 4.92 s
Wall time: 370 ms
I[:5] :  [[288749 715358  17516 586587  81632 996792 530640 774878 952097  94876
  108618 308878 704912 924934 611590 230616 841005 245976 872831 780143
  539491 284597 988863 591019 407840  86768 850327 669668  55910 173758
  499498 176368 804002 823482 180233 311296 748047 856042 288396 657952
  390758 954419 494699 553111 496710 559531 847603 603161 121768 864780
  517952 216702 472374 607979 979484 424780 759982 575505 105983 295458
  556660 219738 449051 858834 614005  76661 471826 281163  82897 311091
  111533 522424 445932 526109 342930 254962 379530 994310 577652 462512
   87598 365283 736914 482052 370448 917392 195438 445633 683551 496163
   99154 237684  73447 112189 707413 582749 223062 552328 286381 776169]]
I[-5:] :  [[288749 715358  17516 586587  81632 996792 530640 774878 952097  94876
  108618 308878 704912 924934 611590 230616 841005 245976 872831 780143
  539491 284597 988863 591019 407840  86768 85

# 4096d - 1M dataset - IndexIVFPQ - test on CPU machine
### https://github.com/facebookresearch/faiss/wiki/Lower-memory-footprint

In [28]:
index = None
nlist = None
quantizer = None

In [29]:
nlist = 100
m = 8    # number of subquantizers

%time quantizer = faiss.IndexFlatL2(d)  # the other index


CPU times: user 49 µs, sys: 8 µs, total: 57 µs
Wall time: 56.3 µs


In [30]:
%time index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8)  # 8 specifies that each sub-vector is encoded as 8 bits

CPU times: user 1.26 ms, sys: 15 µs, total: 1.27 ms
Wall time: 634 µs


In [31]:
# check np shape
print('xb.shape', xb.shape)

xb.shape (1000000, 4096)


In [32]:
%time index.train(xb)

CPU times: user 2min 38s, sys: 3.61 s, total: 2min 42s
Wall time: 15.2 s


In [33]:
%time index.add(xb)

CPU times: user 3min 55s, sys: 14 s, total: 4min 9s
Wall time: 10.6 s


In [34]:
k = 100
%time D, I = index.search(xq, k)     # actual search
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 22.6 ms, sys: 0 ns, total: 22.6 ms
Wall time: 1.38 ms
I[:5] :  [[199919 340340  91140 713512 183752 594059 975714 112048  96505 717906
   75887 488887 469044 512110 452174 471055 191368 199666 564851  65659
  132355 353112 167011 899298 158584 818025 862080 580126 538092 893713
  725392 190954 801996 189966 389895 548246 286146 539814 813684 620266
  274178 123704 502409 918559 162100  31422 438089 364434  23518 780041
  208336 808067 810240 179281  55161 530371 392873 685115 670155 572745
  753152  54222 948126 305006 882575 245977 457150  54554 685301 676330
   34225  81059 155335 637668 284496 782473 593372  86513 937290 230319
  728355 144320 424594 590320 220452 485670 783078 231253 583545 430058
  715585 596134 616401 681591 907550 498283 763835 679566 610939 288143]]
I[-5:] :  [[199919 340340  91140 713512 183752 594059 975714 112048  96505 717906
   75887 488887 469044 512110 452174 471055 191368 199666 564851  65659
  132355 353112 167011 899298 158584 818025 8

In [35]:
k = 1000000
%time D, I = index.search(xq, k)     # actual search
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 1.68 s, sys: 71 ms, total: 1.75 s
Wall time: 83.3 ms
I[:5] :  [[199919 340340  91140 ...     -1     -1     -1]]
I[-5:] :  [[199919 340340  91140 ...     -1     -1     -1]]


In [36]:
k = 100
index.nprobe = 10              # default nprobe is 1, try a few more - getting slow
%time D, I = index.search(xq, k)
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 48.3 ms, sys: 3.67 ms, total: 52 ms
Wall time: 2.37 ms
I[:5] :  [[199919 285575 322689 965989 840684 340340  91140 229851 938232  82932
   49674 713512  85096 482200 119518  61531 183752 192290 598008 773300
  594059 975714 112048 607620  96505 167648  50810 200338 417405 717906
   75887 732674 609554   4111 634213 488887 522623 469044 930319 482553
  573536 619599 512110  53978 702952 355486 225490 545308 452174 618477
  292144 931499 851760  39895 446821 332741  31538 536004 471055 271920
  191368 725724 100032 199666 150974 502402  65718 705080 564851 497452
  529849  65659 132355 353112 167011 477925 741933 899298 472911 158584
  818025 104773 531868 946913 783975 347427 646562 722187 192737 862080
  534491 933672 664357 740837 234146 912336 662716 580126 538092 960134]]
I[-5:] :  [[199919 285575 322689 965989 840684 340340  91140 229851 938232  82932
   49674 713512  85096 482200 119518  61531 183752 192290 598008 773300
  594059 975714 112048 607620  96505 167648 

In [37]:
k = 1000000
index.nprobe = 10              # default nprobe is 1, try a few more - getting slow
%time D, I = index.search(xq, k)
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 2.99 s, sys: 189 ms, total: 3.18 s
Wall time: 144 ms
I[:5] :  [[199919 285575 322689 ...     -1     -1     -1]]
I[-5:] :  [[199919 285575 322689 ...     -1     -1     -1]]


# 4096d - 4M dataset - test on CPU machine

In [38]:
xb = None
xq = None
index = None

In [39]:
# generate test np
d = 4096                           # dimension
nb = 4000000                      # database size
nq = 1                     # nb of queries
np.random.seed(1234)             # make reproducible

In [40]:
%time xb = np.random.random((nb, d)).astype('float32')

CPU times: user 2min 50s, sys: 55.4 s, total: 3min 45s
Wall time: 3min 45s


In [41]:
%time xq = np.random.random((nq, d)).astype('float32')

CPU times: user 216 µs, sys: 30 µs, total: 246 µs
Wall time: 165 µs


In [42]:
# check np shape
print('xb.shape', xb.shape)
print('xq.shape', xq.shape)
print('xb[0]', xb[0])
print('xb[1]', xb[1])


xb.shape (4000000, 4096)
xq.shape (1, 4096)
xb[0] [0.19151945 0.62210876 0.43772775 ... 0.9750933  0.9089286  0.8868944 ]
xb[1] [0.9369065  0.95599407 0.38951334 ... 0.6071783  0.3248492  0.75957   ]


In [43]:
%time index = faiss.IndexFlatL2(d)   # build the index
print('index.is_trained : ', index.is_trained)

CPU times: user 89 µs, sys: 12 µs, total: 101 µs
Wall time: 125 µs
index.is_trained :  True


In [44]:
%time index.add(xb)                  # add vectors to the index
print('index.ntotal : ', index.ntotal)

CPU times: user 14.2 s, sys: 16 s, total: 30.2 s
Wall time: 30.2 s
index.ntotal :  4000000


In [45]:
k = 100                          
%time D, I = index.search(xq, k)
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 10.3 s, sys: 204 ms, total: 10.5 s
Wall time: 6.7 s
I[:5] :  [[3057487  716869 3084379 3505606 2185077  635044  705889 1865186  964585
  2664931 3921233   54454 2842296  571173 3434553 3112684 1710949 2534542
   180132 2162286 2933891 2188603  893567 1094423  506668 2729159  255549
  2564707 2122217 3512425 2502319 1907766  372070 1395569 2866383 3463632
  1303025 1202304  305182  596818 2626111 1066545 1834009  209481  600638
  1074454 2897013 1534513 3444934 1398511 2260664 3651408 3844585 2981741
  3954363  287530  961951 1422127 3261712  582604 2470413 2029108  611851
   392155 1777019 1397797 2436344 1837349 3209708 2906991 3613809 2610169
  2911444    6424 2112550 2589890 2388781 1641046  632738  971782 3920287
  2430397 2931081 1433168 3945308 3784734 3369130 3811010 1634299 3048104
  2537950 2728971 1037771  519337 2841816 1621513 1122071 1704052 2801067
  2402870]]
I[-5:] :  [[3057487  716869 3084379 3505606 2185077  635044  705889 1865186  964585
  2664931 392

In [46]:
k = 1000                          
%time D, I = index.search(xq, k)
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 10.5 s, sys: 268 ms, total: 10.7 s
Wall time: 6.71 s
I[:5] :  [[3057487  716869 3084379 3505606 2185077  635044  705889 1865186  964585
  2664931 3921233   54454 2842296  571173 3434553 3112684 1710949 2534542
   180132 2162286 2933891 2188603  893567 1094423  506668 2729159  255549
  2564707 2122217 3512425 2502319 1907766  372070 1395569 2866383 3463632
  1303025 1202304  305182  596818 2626111 1066545 1834009  209481  600638
  1074454 2897013 1534513 3444934 1398511 2260664 3651408 3844585 2981741
  3954363  287530  961951 1422127 3261712  582604 2470413 2029108  611851
   392155 1777019 1397797 2436344 1837349 3209708 2906991 3613809 2610169
  2911444    6424 2112550 2589890 2388781 1641046  632738  971782 3920287
  2430397 2931081 1433168 3945308 3784734 3369130 3811010 1634299 3048104
  2537950 2728971 1037771  519337 2841816 1621513 1122071 1704052 2801067
  2402870 3663948 2265677 3794947 2764181 3215228 2548378 2116930 1004419
  3610295 1237210 3848766 1468472 

In [47]:
k = 4000000                     
%time D, I = index.search(xq, k)
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 11.8 s, sys: 242 ms, total: 12.1 s
Wall time: 7.8 s
I[:5] :  [[3057487  716869 3084379 ... 1856354 1996634 1788903]]
I[-5:] :  [[3057487  716869 3084379 ... 1856354 1996634 1788903]]


In [48]:
print('I.shape : ', I.shape)

I.shape :  (1, 4000000)


# consume total 124G memory - 4M

# 4096d - 4M dataset - IndexIVFFlat - test on CPU machine

In [49]:
index = None
quantizer = None

In [50]:
nlist = 100
%time quantizer = faiss.IndexFlatL2(d)  # the other index

CPU times: user 47 µs, sys: 7 µs, total: 54 µs
Wall time: 46.7 µs


In [51]:
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
       # here we specify METRIC_L2, by default it performs inner-product search
%time index.train(xb)
print('index.is_trained', index.is_trained)

CPU times: user 49.2 s, sys: 6.69 s, total: 55.9 s
Wall time: 46.7 s
index.is_trained True


In [52]:
%time index.add(xb)                  # add may be a bit slower as well

CPU times: user 3min 11s, sys: 1min 53s, total: 5min 5s
Wall time: 1min 20s


In [53]:
k = 100
%time D, I = index.search(xq, k)     # actual search
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 2.96 s, sys: 168 ms, total: 3.13 s
Wall time: 133 ms
I[:5] :  [[2981741 2548378 2295913 1615482 2508481 2218092 3614159 1391405  383819
  3799023 1814045  385915 3375246 3259845 3278480 3163458 3505257 3954395
  2435506  991520 3727924 1816786 2006609 1181040 3564101 3176004 1985086
  3330271 1510851  390924  171154 3742376 1354284 3040453 1685494  586215
  3816614 3930961 1337360 1900960  713529  438288 3788055  432560 1268729
   549979 3074791 1730809 3048415 2234315 3860825 2794775    2736 1604327
   732467  885047  801733 1068648 1500871 3454131 3561801 2954973 3650788
  2018773 3424945 1820780  971421  877238 3339956    9699  957028 3830937
  3370128 1875925  150965 2540835 1162513  910962 2796849 3801824 1396258
  2092375 2164395 2254578 3441380  251742 3221552 2952328  740202  132821
  3814534 3705566  597476 1445075 1795773 2671895 1214598 3462817 3625924
  3513964]]
I[-5:] :  [[2981741 2548378 2295913 1615482 2508481 2218092 3614159 1391405  383819
  3799023 18

In [54]:
k = 4000000
%time D, I = index.search(xq, k)     # actual search
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 4.63 s, sys: 235 ms, total: 4.87 s
Wall time: 312 ms
I[:5] :  [[2981741 2548378 2295913 ...      -1      -1      -1]]
I[-5:] :  [[2981741 2548378 2295913 ...      -1      -1      -1]]


In [55]:
print('I.shape : ', I.shape)

I.shape :  (1, 4000000)


In [56]:
k = 100
index.nprobe = 10              # default nprobe is 1, try a few more - getting slow
%time D, I = index.search(xq, k)
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 6.71 s, sys: 252 ms, total: 6.97 s
Wall time: 2.41 s
I[:5] :  [[3084379  635044 2664931 3112684 1710949  893567 1094423  372070 2866383
  1202304  209481 1398511 3844585 2981741  287530 2470413  611851  392155
  2906991    6424 2589890 3048104  519337 1704052 2548378 2116930 1004419
  1468472  868104 1682308 3918233  580660  852756 2422099 1210681 3133462
  2971871 3375968 2295913 1952157 1156852 1868846 1077897  315169 3739974
  3036041 2734053 2224222 1203557 2494437 1352655 2319937 1413803 2457798
  3149634 3024316 1316542 1615482 1100538 3463449 2627077 1513193 2508481
  3496922 3515392  488576  973571 2608497  972546 3084535 1205286  223328
   847236  973642  839132 1544645  926482 3421090 3681122 1937255 2823441
  3459752 3229873 3659407 3269189 1290480 1233174 3245192  906425 1216803
  2200457 2676142 2218092 3614159 1617936 2651628  876088 3199565 1598095
  2422860]]
I[-5:] :  [[3084379  635044 2664931 3112684 1710949  893567 1094423  372070 2866383
  1202304  2

# 4096d - 4M dataset - IndexIVFPQ - test on CPU machine

In [57]:
index = None
quantizer = None

In [58]:
nlist = 100
m = 8    # number of subquantizers

%time quantizer = faiss.IndexFlatL2(d)  # the other index

CPU times: user 42 µs, sys: 9 µs, total: 51 µs
Wall time: 50.8 µs


In [59]:
%time index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8)  # 8 specifies that each sub-vector is encoded as 8 bits

CPU times: user 1.43 ms, sys: 89 µs, total: 1.51 ms
Wall time: 672 µs


In [60]:
%time index.train(xb)

CPU times: user 3min 15s, sys: 9.71 s, total: 3min 24s
Wall time: 51.7 s


In [61]:
%time index.add(xb)

CPU times: user 15min 45s, sys: 1min 14s, total: 16min 59s
Wall time: 43.3 s


In [62]:
k = 100
%time D, I = index.search(xq, k)     # actual search
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 34 ms, sys: 5.01 ms, total: 39 ms
Wall time: 1.63 ms
I[:5] :  [[ 127409 2342266 1037896 1521163 1393329 2439532 2299464 1534446 3372148
  1970457 3978389  778330 1504218  835479 3486445  987769 2475158  893435
  2697521 3359094 3776263 2796849 2581001 2788703 2967623 1228360 1905467
   291850 2847726  368892 1988905 3472724 1638806 1245538 2913663 3790735
  2422512 3510363 1697489 3606340 3857401 3509099 3834708  742257  184484
  1297577 1567648 1826241 3065655 2250837 1544324 3558343 2780172  874134
  3701899  395462 3215684 1672258 3456573 2421592 3253209  406453 1027972
  2506054 3681487 3124800 3129955 1126289 2403071 2540322 2820147 3152535
  3316044 1266132 1412230 3184104  340067 3590882 2266437 3716270  146135
   725439  317242 1751432 1502555 2980895 2908823  969731  382774 3439147
  3040912  132119  356195 3322525 1745964 2531157  273240 1843714 3361213
  2204547]]
I[-5:] :  [[ 127409 2342266 1037896 1521163 1393329 2439532 2299464 1534446 3372148
  1970457 39

In [63]:
k = 4000000
%time D, I = index.search(xq, k)     # actual search
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 4.09 s, sys: 177 ms, total: 4.27 s
Wall time: 178 ms
I[:5] :  [[ 127409 2342266 1037896 ...      -1      -1      -1]]
I[-5:] :  [[ 127409 2342266 1037896 ...      -1      -1      -1]]


In [64]:
print('I.shape : ', I.shape)

I.shape :  (1, 4000000)


In [65]:
k = 100
index.nprobe = 10              # default nprobe is 1, try a few more - getting slow
%time D, I = index.search(xq, k)
print('I[:5] : ', I[:5])                  # neighbors of the 5 first queries
print('I[-5:] : ', I[-5:])                  # neighbors of the 5 last queries

CPU times: user 201 ms, sys: 10.5 ms, total: 211 ms
Wall time: 8.81 ms
I[:5] :  [[ 829986  606382 1238934  127409 3887834 3281491 3338736 3247951 3881079
  2738662 3656462 2342266 1395944 1753345 3138655 1037896 1156135 3439174
  2881988 2468678 1769993 3262899   53189 2947183 3713404 1521163 1393329
   253935 1694870 3184420 3335537 2214994  221952  362559 2128623  523339
  1105613  731038 2372959 3427204  950106 1057591 3781549 2439532 2299464
  1060821 1534446 1744762 1796497 2084371 3865922  735461 3368827 3122146
  1070554 3682704  496433 1198678 1801430 2624039  645067 3372148 1095576
   971787 1999256  770126 3390214 3329901 3142749  685005  199908  687829
   520091  377197 1248020 2520758 2193186 2824952 2625454 2056869  155162
  1659685 1970457 1879888 1491207 2124764  634829 3375757 1989306 2480441
  2369585 2610616 1597392  410207 3246157 3196901 3978389  204538 2199991
  2274433]]
I[-5:] :  [[ 829986  606382 1238934  127409 3887834 3281491 3338736 3247951 3881079
  2738662 

# consume total 63G memory - 4M

In [None]:
# remove mem

In [66]:
index = None
quantizer = None
xb = None
xq = None