# **Semantic Search**
---



In [None]:
import torch
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer

In [None]:
Query = 'How can I reduce overfitting in a deep learning model?'

In [None]:
docs = [
"Applying dropout layers during training helps prevent overfitting in neural networks.",


"The Eiffel Tower is one of the most visited monuments in the world.",


"Using regularization techniques such as L2 weight decay can reduce overfitting in deep learning models.",


"Basketball is played by two teams of five players each.",


"Artificial intelligence systems rely on large datasets to make predictions.",


"Mount Everest is the highest mountain above sea level.",


"Data augmentation is an effective way to improve generalization and reduce overfitting.",


"The Amazon rainforest is home to millions of species.",


"Neural networks are composed of layers of interconnected neurons.",

"Cooking rice requires boiling water and letting it simmer.",

"Deep learning models require significant computational resources for training.",

"Soccer is the most popular sport worldwide.",

"Gradient descent is used to minimize loss functions during training.",

"Photosynthesis allows plants to convert sunlight into energy.",

"Hyperparameter tuning can significantly impact model performance.",

"The Pacific Ocean is the largest ocean on Earth.",

"Increasing batch size may speed up training but affects convergence.",

"Ancient Rome was one of the greatest civilizations in history.",

"Machine learning models can suffer from bias if data is unbalanced.",

"Shakespeare wrote many famous plays and sonnets."
]

In [None]:
st_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [None]:
def emb_text_st(text, st_model):
    if isinstance(text, str):
        text = [text]

    emb = st_model.encode(
        text,
        convert_to_tensor=True,
        normalize_embeddings=True
    )  # (N, dim)
    return emb

def Semantic_Search(query_emb, doc_emb, docs, top_k=3):
    scores = torch.matmul(query_emb, doc_emb.T).squeeze(0)  # (N,)
    values, indices = torch.topk(scores, k=top_k)

    print(f"Top {top_k} results:\n")
    for rank, (idx, score) in enumerate(zip(indices, values), start=1):
        idx = int(idx)
        print(f"{rank}) score={float(score):.4f}")
        print(docs[idx])
        print("-" * 50)

In [None]:
query_emb = emb_text_st(Query, st_model)     # (1, dim)
doc_emb   = emb_text_st(docs, st_model)      # (20, dim)

Semantic_Search(query_emb, doc_emb, docs, top_k=3)