In [1]:
import numpy as np
from tqdm import tqdm
from faissDB import faissDB
import pickle
import faiss
from sklearn.metrics import accuracy_score

In [2]:
"""
type_name_map = {
    "0": "IndexFlatL2",
    "1": "IndexFlatIP",
    "2": "IndexHNSWFlat",
    "3": "IndexIVFFlat",
    "4": "IndexLSH",
    "5": "IndexScalarQuantizer",
    "6": "IndexPQ",
    "7": "IndexIVFScalarQuantizer",
    "8": "IndexIVFPQ",
    "9": "IndexIVFPQR"
}

"""

'\ntype_name_map = {\n    "0": "IndexFlatL2",\n    "1": "IndexFlatIP",\n    "2": "IndexHNSWFlat",\n    "3": "IndexIVFFlat",\n    "4": "IndexLSH",\n    "5": "IndexScalarQuantizer",\n    "6": "IndexPQ",\n    "7": "IndexIVFScalarQuantizer",\n    "8": "IndexIVFPQ",\n    "9": "IndexIVFPQR"\n}\n\n'

# cosine

## on same vectors

In [3]:
data_name = "same_data_test"
metric = "cosine"
dimension = 2000

with open(f'../test_data/{data_name}.pkl', 'rb') as f:
    cosine_reals, euclidean_reals, train_data, test_data, train_ids = pickle.load(f)

### type 0 IndexFlatL2

In [4]:
%%time

index_type="0"
db = faissDB(index_type, metric=metric, d=dimension)
db.add(train_data, train_ids)

CPU times: user 1.69 s, sys: 206 ms, total: 1.9 s
Wall time: 1.87 s


In [5]:
db.save()
db2 = faissDB.load(db.index_name)

In [6]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 8.01 s, sys: 224 ms, total: 8.23 s
Wall time: 7.51 s


In [7]:
accuracy_score(cosine_reals, preds)

1.0

### type 1 IndexFlatIP

In [20]:
%%time

index_type="1"
db = faissDB(index_type, metric=metric, d=dimension)
db.add(train_data, train_ids)

CPU times: user 1.73 s, sys: 197 ms, total: 1.92 s
Wall time: 1.9 s


In [21]:
db.save()
db2 = faissDB.load(db.index_name)

In [22]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 8.29 s, sys: 165 ms, total: 8.45 s
Wall time: 7.7 s


In [23]:
accuracy_score(cosine_reals, preds)

1.0

### type 2 IndexHNSWFlat

In [33]:
%%time

index_type="2"
db = faissDB(index_type, metric=metric, d=dimension, M=100)
db.add(train_data, train_ids)

CPU times: user 3min 33s, sys: 213 ms, total: 3min 33s
Wall time: 59.6 s


In [34]:
db.save()
db2 = faissDB.load(db.index_name)

In [35]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 34.2 s, sys: 200 ms, total: 34.4 s
Wall time: 10 s


In [36]:
accuracy_score(cosine_reals, preds)

0.5346

### type 3 IndexIVFFlat

In [41]:
%%time

index_type="3"
quantizer = faiss.IndexFlatL2(dimension)
db = faissDB(index_type, metric=metric, d=dimension, nlists=10, quantizer=quantizer)
db.add(train_data, train_ids)

CPU times: user 2.34 s, sys: 292 ms, total: 2.64 s
Wall time: 2.25 s


In [42]:
db.save()
db2 = faissDB.load(db.index_name)

In [43]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 1min 31s, sys: 256 ms, total: 1min 31s
Wall time: 24.7 s


In [44]:
accuracy_score(cosine_reals, preds)

1.0

### type 4 IndexLSH

In [62]:
%%time

index_type="4"
db = faissDB(index_type, metric=metric, d=dimension, nbits=10)
db.add(train_data, train_ids)

CPU times: user 1.8 s, sys: 116 ms, total: 1.92 s
Wall time: 1.87 s


In [63]:
db.save()
db2 = faissDB.load(db.index_name)

In [64]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 2.88 s, sys: 176 ms, total: 3.05 s
Wall time: 2.14 s


In [65]:
accuracy_score(cosine_reals, preds)

0.0005

### type 5 IndexScalarQuantizer

In [67]:
%%time

index_type="5"

qtype = faiss.ScalarQuantizer.QT_8bit
db = faissDB(index_type, metric=metric, d=dimension, qtype=qtype)
db.add(train_data, train_ids)

CPU times: user 1.94 s, sys: 232 ms, total: 2.17 s
Wall time: 2.04 s


In [68]:
db.save()
db2 = faissDB.load(db.index_name)

In [69]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 2min 26s, sys: 153 ms, total: 2min 27s
Wall time: 38.9 s


In [70]:
accuracy_score(cosine_reals, preds)

0.7376

### type 6 IndexPQ

In [75]:
%%time

index_type="6"

db = faissDB(index_type, metric=metric, d=dimension, M=5, nbits=10)
db.add(train_data, train_ids)



CPU times: user 28.3 s, sys: 287 ms, total: 28.6 s
Wall time: 19 s


In [76]:
db.save()
db2 = faissDB.load(db.index_name)

In [77]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 8.08 s, sys: 192 ms, total: 8.27 s
Wall time: 3.84 s


In [78]:
accuracy_score(cosine_reals, preds)

0.0

### type 7 IndexIVFScalarQuantizer

In [79]:
%%time

index_type="7"
quantizer = faiss.IndexFlatL2(dimension)
qtype = faiss.ScalarQuantizer.QT_8bit
db = faissDB(index_type, metric=metric, d=dimension, quantizer=quantizer, qtype=qtype, nlists=3)
db.add(train_data, train_ids)

CPU times: user 2.32 s, sys: 324 ms, total: 2.64 s
Wall time: 2.31 s


In [80]:
db.save()
db2 = faissDB.load(db.index_name)

In [81]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 2min 26s, sys: 237 ms, total: 2min 26s
Wall time: 39.9 s


In [82]:
accuracy_score(cosine_reals, preds)

0.7458

### type 8 IndexIVFPQ

In [85]:
%%time

index_type="8"
quantizer = faiss.IndexFlatL2(dimension)
db = faissDB(index_type, metric=metric, d=dimension, quantizer=quantizer, M=1, nlists=10, nbits=10)
db.add(train_data, train_ids)



CPU times: user 20.7 s, sys: 432 ms, total: 21.1 s
Wall time: 18 s


In [86]:
db.save()
db2 = faissDB.load(db.index_name)

In [87]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 9.25 s, sys: 184 ms, total: 9.44 s
Wall time: 3.82 s


In [88]:
accuracy_score(cosine_reals, preds)

0.0

### type 9 IndexIVFPQR

In [89]:
%%time

index_type="9"
quantizer = faiss.IndexFlatL2(dimension)
db = faissDB(index_type, metric=metric, d=dimension, quantizer=quantizer, M=1, nlists=10, nbits=10, M_refine=8, nbits_refine=8)
db.add(train_data, train_ids)



CPU times: user 39 s, sys: 401 ms, total: 39.4 s
Wall time: 24.2 s


In [90]:
db.save()
db2 = faissDB.load(db.index_name)

In [91]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 9.42 s, sys: 164 ms, total: 9.58 s
Wall time: 3.79 s


In [92]:
accuracy_score(cosine_reals, preds)

0.0

## on new vectors

In [2]:
data_name = "diff_data_test"
metric = "cosine"
dimension = 2000

with open(f'../test_data/{data_name}.pkl', 'rb') as f:
    cosine_reals, euclidean_reals, train_data, test_data, train_ids = pickle.load(f)

### type 0 IndexFlatL2

In [3]:
%%time

index_type="0"
db = faissDB(index_type, metric=metric, d=dimension)
db.add(train_data, train_ids)

CPU times: user 2.12 s, sys: 170 ms, total: 2.29 s
Wall time: 2.26 s


In [4]:
db.save()
db2 = faissDB.load(db.index_name)

In [5]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 8.51 s, sys: 194 ms, total: 8.7 s
Wall time: 7.96 s


In [6]:
accuracy_score(cosine_reals, preds)

1.0

### type 1 IndexFlatIP

In [7]:
%%time

index_type="1"
db = faissDB(index_type, metric=metric, d=dimension)
db.add(train_data, train_ids)

CPU times: user 2.19 s, sys: 118 ms, total: 2.31 s
Wall time: 2.27 s


In [8]:
db.save()
db2 = faissDB.load(db.index_name)

In [9]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 8.44 s, sys: 143 ms, total: 8.59 s
Wall time: 7.89 s


In [10]:
accuracy_score(cosine_reals, preds)

1.0

### type 2 IndexHNSWFlat

In [11]:
%%time

index_type="2"
db = faissDB(index_type, metric=metric, d=dimension, M=100)
db.add(train_data, train_ids)

CPU times: user 3min 31s, sys: 373 ms, total: 3min 32s
Wall time: 59.9 s


In [12]:
db.save()
db2 = faissDB.load(db.index_name)

In [13]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 31 s, sys: 164 ms, total: 31.2 s
Wall time: 9.92 s


In [14]:
accuracy_score(cosine_reals, preds)

0.5412

### type 3 IndexIVFFlat

In [15]:
%%time

index_type="3"
quantizer = faiss.IndexFlatL2(dimension)
db = faissDB(index_type, metric=metric, d=dimension, nlists=10, quantizer=quantizer)
db.add(train_data, train_ids)

CPU times: user 2.69 s, sys: 268 ms, total: 2.96 s
Wall time: 2.59 s


In [16]:
db.save()
db2 = faissDB.load(db.index_name)

In [17]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 1min 33s, sys: 292 ms, total: 1min 33s
Wall time: 25.2 s


In [18]:
accuracy_score(cosine_reals, preds)

1.0

### type 4 IndexLSH

In [19]:
%%time

index_type="4"
db = faissDB(index_type, metric=metric, d=dimension, nbits=10)
db.add(train_data, train_ids)

CPU times: user 2.19 s, sys: 144 ms, total: 2.34 s
Wall time: 2.29 s


In [20]:
db.save()
db2 = faissDB.load(db.index_name)

In [21]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 3.35 s, sys: 116 ms, total: 3.46 s
Wall time: 2.55 s


In [22]:
accuracy_score(cosine_reals, preds)

0.0005

### type 5 IndexScalarQuantizer

In [23]:
%%time

index_type="5"

qtype = faiss.ScalarQuantizer.QT_8bit
db = faissDB(index_type, metric=metric, d=dimension, qtype=qtype)
db.add(train_data, train_ids)

CPU times: user 2.29 s, sys: 124 ms, total: 2.42 s
Wall time: 2.29 s


In [24]:
db.save()
db2 = faissDB.load(db.index_name)

In [25]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 2min 26s, sys: 127 ms, total: 2min 26s
Wall time: 39.5 s


In [26]:
accuracy_score(cosine_reals, preds)

0.7376

### type 6 IndexPQ

In [27]:
%%time

index_type="6"

db = faissDB(index_type, metric=metric, d=dimension, M=5, nbits=10)
db.add(train_data, train_ids)



CPU times: user 29.4 s, sys: 336 ms, total: 29.7 s
Wall time: 19.5 s


In [28]:
db.save()
db2 = faissDB.load(db.index_name)

In [29]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 8.41 s, sys: 256 ms, total: 8.67 s
Wall time: 4.24 s


In [30]:
accuracy_score(cosine_reals, preds)

0.0

### type 7 IndexIVFScalarQuantizer

In [31]:
%%time

index_type="7"
quantizer = faiss.IndexFlatL2(dimension)
qtype = faiss.ScalarQuantizer.QT_8bit
db = faissDB(index_type, metric=metric, d=dimension, quantizer=quantizer, qtype=qtype, nlists=3)
db.add(train_data, train_ids)

CPU times: user 2.69 s, sys: 172 ms, total: 2.87 s
Wall time: 2.54 s


In [32]:
db.save()
db2 = faissDB.load(db.index_name)

In [33]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 2min 26s, sys: 255 ms, total: 2min 26s
Wall time: 39.7 s


In [34]:
accuracy_score(cosine_reals, preds)

0.7458

### type 8 IndexIVFPQ

In [35]:
%%time

index_type="8"
quantizer = faiss.IndexFlatL2(dimension)
db = faissDB(index_type, metric=metric, d=dimension, quantizer=quantizer, M=1, nlists=10, nbits=10)
db.add(train_data, train_ids)



CPU times: user 21.1 s, sys: 240 ms, total: 21.4 s
Wall time: 18.2 s


In [36]:
db.save()
db2 = faissDB.load(db.index_name)

In [37]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 9.28 s, sys: 184 ms, total: 9.46 s
Wall time: 4.03 s


In [38]:
accuracy_score(cosine_reals, preds)

0.0

### type 9 IndexIVFPQR

In [39]:
%%time

index_type="9"
quantizer = faiss.IndexFlatL2(dimension)
db = faissDB(index_type, metric=metric, d=dimension, quantizer=quantizer, M=1, nlists=10, nbits=10, M_refine=8, nbits_refine=8)
db.add(train_data, train_ids)



CPU times: user 39.4 s, sys: 379 ms, total: 39.8 s
Wall time: 24.6 s


In [40]:
db.save()
db2 = faissDB.load(db.index_name)

In [41]:
%%time
preds = [f"{i[0]}" for i in db2.query(test_data, 1)[1]]

CPU times: user 9.45 s, sys: 184 ms, total: 9.64 s
Wall time: 4.07 s


In [42]:
accuracy_score(cosine_reals, preds)

0.0