# Loading SBERT Model

In [None]:
from sentence_transformers import SentenceTransformer

# Load pre-trained SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

print("Model loaded successfully!")


# Encoding Sentences into Vectors

In [9]:
dataset = [
    "The cat sits on the mat.",
    "A dog is running in the park.",
    "The sun is shining brightly.",
    "A young kitten plays with a ball of yarn.",
    "I love eating ice cream in summer.",
    "Machine learning is transforming industries.",
    "The Eiffel Tower is a popular tourist attraction.",
    "Neural networks are the backbone of deep learning.",
    "She enjoys reading science fiction books.",
    "The football match was exciting to watch."
]

# Encode all sentences into embeddings
sentence_embeddings = model.encode(dataset)

print(f"Dataset size: {len(dataset)} sentences")
print(f"Embedding shape: {sentence_embeddings.shape}")  # (10, 384)


Dataset size: 10 sentences
Embedding shape: (10, 384)


# Encode a Query Sentence

In [10]:
query = "Deep learning is a key AI technology."
query_embedding = model.encode([query])

print("Query Embedding Shape:", query_embedding.shape)  # (1, 384)


Query Embedding Shape: (1, 384)


# Find the Most Similar Sentences

In [19]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity between query and dataset
similarities = cosine_similarity(query_embedding, sentence_embeddings)[0]
topk = 3
topindices = similarities.argsort()[::-1][:topk]

print (f" Query : {query}")
print ("Most similar sentences ")
for idx in topindices:
    print (f" Similar sentence {idx} : {dataset[idx]}")

 Query : Deep learning is a key AI technology.
Most similar sentences 
 Similar sentence 7 : Neural networks are the backbone of deep learning.
 Similar sentence 5 : Machine learning is transforming industries.
 Similar sentence 8 : She enjoys reading science fiction books.


# Using FAISS for Large-Scale Retrieval

In [18]:
import faiss
import numpy as np

# Convert embeddings to FAISS index format
sentence_embeddings = np.array(sentence_embeddings).astype('float32')
query_embedding = np.array(query_embedding).astype('float32')



# Create a FAISS index
index = faiss.IndexFlatL2(sentence_embeddings.shape[1])  # L2 (Euclidean) distance index
index.add(sentence_embeddings)  # Add dataset embeddings

# Search for the top-3 most similar sentences
D, I = index.search(query_embedding, topk)

print (f" Query : {query}")
print ("Most similar sentences ")
for idx in I[0]:
    print (f" Similar sentence {idx} : {dataset[idx]}")

 Query : Deep learning is a key AI technology.
Most similar sentences 
 Similar sentence 7 : Neural networks are the backbone of deep learning.
 Similar sentence 5 : Machine learning is transforming industries.
 Similar sentence 8 : She enjoys reading science fiction books.
