In [14]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [15]:
# ----------------------
# 1. Sample Data
# ----------------------
chunks = [
    "Applicants must be at least 21 years old.",
    "Minimum monthly salary is ₹25,000.",
    "Applicants must have a credit score above 700.",
    "Applicants must be at least 21 years old and earn ₹25,000.",
    "Minimum credit score of 700 is required.",
    "Personal loans are not available to self-employed applicants.",
    "EMI defaults attract a penalty of 2% per month.",
]
query = "What are the age, salary, and credit score requirements for a personal loan?"

In [16]:
# ----------------------
# 2. Embedding
# ----------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
chunk_embeddings = model.encode(chunks)
query_embedding = model.encode([query])[0]

In [18]:
# ----------------------
# 3. Vanilla Top-k
# ----------------------
def vanilla_top_k(query_embedding, chunk_embeddings, k=3):
    similarities = cosine_similarity([query_embedding], chunk_embeddings)[0]
    top_k_idx = similarities.argsort()[-k:][::-1]
    return [chunks[i] for i in top_k_idx], [similarities[i] for i in top_k_idx]
vanilla_results, vanilla_scores = vanilla_top_k(query_embedding, chunk_embeddings)
print("\n Vanilla Top-3 Results:")
for text,score in zip(vanilla_results, vanilla_scores):
    print(f"Text: {text}\nScore: {score}\n")


 Vanilla Top-3 Results:
Text: Applicants must have a credit score above 700.
Score: 0.5747789144515991

Text: Minimum credit score of 700 is required.
Score: 0.5359175205230713

Text: Applicants must be at least 21 years old and earn ₹25,000.
Score: 0.5154130458831787



In [25]:
# ----------------------
# 4. MMR-based Retrieval
# ----------------------
def mmr(chunk_embeddings, query_embedding, k=3, lambda_param=0.7):
  """
  Returns k chunks using Maximum Marginal Relevance (MMR)
  """
  similarities = cosine_similarity([query_embedding], chunk_embeddings)[0]
  selected = []
  selected_idx=[]
  candidate_idx = list(range(len(chunks)))

  for _ in range(k):
    if len(candidate_idx) == 0:
      break # No more candidates to select from

    if not selected_idx: # Handle the first iteration where selected_idx is empty
      idx = candidate_idx[np.argmax(similarities[candidate_idx])]
    else:
      diversity = np.array([max(cosine_similarity([chunk_embeddings[i],chunk_embeddings[j]])[0][0] for j in selected_idx) for i in candidate_idx])
      mmr_scores = lambda_param * similarities[candidate_idx] + (1 - lambda_param) * diversity
      idx = candidate_idx[np.argmax(mmr_scores)]

    selected_idx.append(idx)
    selected.append(chunks[idx])
    candidate_idx.remove(idx)

  return selected

In [26]:
mmr_results = mmr(chunk_embeddings, query_embedding, k=3, lambda_param=0.7)
print("\nMMR Top-3 Results:")
for text in mmr_results:
  print(f" - {text}")


MMR Top-3 Results:
 - Applicants must have a credit score above 700.
 - Minimum credit score of 700 is required.
 - Applicants must be at least 21 years old and earn ₹25,000.
