In [18]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

In [2]:
df = pd.read_csv("sample_text.csv")

print(df.shape)
print(df)

(8, 2)
                                                text category
0      Meditation and yoga can improve mental health   Health
1  Fruits, whole grains and vegetables helps cont...   Health
2  These are the latest fashion trends for this week  Fashion
3  Vibrant color jeans for male are becoming a trend  Fashion
4                 The concert starts at 7 PM tonight    Event
5  Navaratri dandiya program at Expo center in Mu...    Event
6  Exciting vacation destinations for your next trip   Travel
7  Maldives and Srilanka are gaining popularity i...   Travel


# Step 1: Create source embeddings for the text column -> Encoder

In [3]:
encoder = SentenceTransformer("all-mpnet-base-v2")

In [4]:
vectors = encoder.encode(df.text)

In [7]:
vectors.shape

(8, 768)

In [8]:
dim = vectors.shape[1]

print(dim)

768


# Step 2: Build a FAISS index for vectors -> Vector database

In [10]:
index = faiss.IndexFlatL2(dim)

# Step 3: Normalise the source vectors
We are using LS distances to measure similarity, and add to the index

In [12]:
index.add(vectors)

In [13]:
index

<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x12b07aee0> >

# Step 4: Encode search query using same encoder and normalise the output vector

In [42]:
search_query = "I want to go travelling"

search_vec = encoder.encode(search_query)

In [43]:
search_vec.shape

(768,)

In [44]:
search_vec_np = np.array(search_vec).reshape(1, -1)

search_vec_np.shape

(1, 768)

# Step 5: Search for similar vector(s) in the FAISS index

In [45]:
distances, I = index.search(search_vec_np, k=3)

In [46]:
distances

array([[1.0216081, 1.2387117, 1.487431 ]], dtype=float32)

In [47]:
I

array([[6, 7, 5]])

In [48]:
for row_index in I.tolist()[0]:
    print("Question: ", search_query)
    print("Suggested answer: ", df.loc[row_index].text)

Question:  I want to go travelling
Suggested answer:  Exciting vacation destinations for your next trip
Question:  I want to go travelling
Suggested answer:  Maldives and Srilanka are gaining popularity in terms of low budget vacation places
Question:  I want to go travelling
Suggested answer:  Navaratri dandiya program at Expo center in Mumbai this october
