In [1]:
import pandas as pd

In [2]:
data = [
    ['Mahatma Gandhi was born in Porbandar, Gujarat', 'location'],
    ['Natalie ate a deep-dish pizza today', 'random'],
    ['The AWS Serverless Architecture makes use of the AWS API Gateway and the AWS Lambda', 'networking'],
    ['Address', 'location']
]

df = pd.DataFrame(data, columns = ['text', 'category'])

In [3]:
df.head()

Unnamed: 0,text,category
0,"Mahatma Gandhi was born in Porbandar, Gujarat",location
1,Natalie ate a deep-dish pizza today,random
2,The AWS Serverless Architecture makes use of t...,networking
3,Address,location


In [4]:
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
text = df['text']

encoder = SentenceTransformer("paraphrase-mpnet-base-v2")
vectors = encoder.encode(text)

In [6]:
import faiss

In [7]:
vector_dimension = vectors.shape[1]
index = faiss.IndexFlatL2(vector_dimension)
faiss.normalize_L2(vectors)
index.add(vectors)

In [8]:
import numpy as np

In [9]:
search_text = 'Do you like pasta?'

search_vector = encoder.encode(search_text)
_vector = np.array([search_vector])
faiss.normalize_L2(_vector)

In [10]:
k = index.ntotal
distances, ann = index.search(_vector, k=k)

In [11]:
results = pd.DataFrame({'distances': distances[0], 'ann': ann[0]})

In [12]:
results.head()

Unnamed: 0,distances,ann
0,1.04856,1
1,1.905714,3
2,2.047615,0
3,2.068537,2


In [13]:
merge_df = pd.merge(results, df, left_on="ann", right_index=True)
merge_df.head()

Unnamed: 0,distances,ann,text,category
0,1.04856,1,Natalie ate a deep-dish pizza today,random
1,1.905714,3,Address,location
2,2.047615,0,"Mahatma Gandhi was born in Porbandar, Gujarat",location
3,2.068537,2,The AWS Serverless Architecture makes use of t...,networking


In [14]:
labels  = df['category']
category = labels[ann[0][0]]

In [16]:
print(f"Text: {search_text}\nCategory: {category}")

Text: Do you like pasta?
Category: random
