# Faiss
## https://github.com/facebookresearch/faiss
 - A library for efficient similarity search and clustering of dense vectors.

In [1]:
import numpy as np
import os
import pandas as pd
import urllib.request
import faiss
import time
from sentence_transformers import SentenceTransformer

In [4]:
data_path = "../data/"
urllib.request.urlretrieve("https://raw.githubusercontent.com/ukairia777/tensorflow-nlp-tutorial/main/19.%20Topic%20Modeling%20(LDA%2C%20BERT-Based)/dataset/abcnews-date-text.csv", filename=data_path+"abcnews-date-text.csv")

df = pd.read_csv(data_path+"abcnews-date-text.csv")
print(len(df))
print(df.head(5))

1082168
   publish_date                                      headline_text
0      20030219  aba decides against community broadcasting lic...
1      20030219     act fire witnesses must be aware of defamation
2      20030219     a g calls for infrastructure protection summit
3      20030219           air nz staff in aust strike for pay rise
4      20030219      air nz strike to affect australian travellers


In [13]:
data = df['headline_text'].values.tolist()[:10000] # use only 10,000 data

# Tokenizing

In [14]:
model = SentenceTransformer('distilbert-base-nli-mean-tokens')
encoded = model.encode(data)
print(encoded.shape)
print(len(encoded))

(10000, 768)
10000


# Faiss Indexing

In [26]:
model_path = '../model/'
index = faiss.IndexIDMap(faiss.IndexFlatIP(encoded.shape[-1])) # encoded.shape[-1] = 768
index.add_with_ids(encoded, np.array(range(0, len(encoded))))
faiss.write_index(index, model_path+"abc_news_index_faiss")

# Querying

In [23]:
def search(query):
    t = time.time()
    query_vector = model.encode([query])
    k = 5
    top_k = index.search(query_vector, k)
    print('total time: {}'.format(time.time() - t))
    return [data[_id] for _id in top_k[1].tolist()[0]]

In [24]:
query = str(input())
results = search(query)

print('results :')
for result in results:
    print('\t', result)


total time: 0.04181694984436035
results :
	 portland centre moves closer to underwater display
	 scud powers through in miami
	 moya moves into miami quarters
	 boy drowns on hinterland property
	 tourist drowns on reef
