# Sentence Transformers Demo Notebook

Demo for embeddings and semantic search.

In [None]:
!pip install -q sentence-transformers


In [None]:
from sentence_transformers import SentenceTransformer, util
import numpy as np


In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')
print('Model loaded:', model.__class__.__name__)


In [None]:
sentences = [
    "I love playing football.",
    "Soccer is my favorite sport.",
    "Artificial intelligence is transforming the world.",
    "Deep learning and neural networks are part of AI.",
    "How to fix a bike tire?"
]

embeddings = model.encode(sentences, convert_to_tensor=True)
print('Embeddings shape:', embeddings.shape)

sim_score = util.pytorch_cos_sim(embeddings[0], embeddings[1])
print(f"Similarity between '{sentences[0]}' and '{sentences[1]}' -> {sim_score.item():.4f}")


In [None]:
corpus = [
    "How do I replace a punctured bicycle tire?",
    "Best ways to repair a flat bike tire.",
    "Tips for maintaining your bicycle chain.",
    "Where to learn Python programming.",
    "What is the difference between AI and machine learning?",
    "Steps to change a car tire.",
    "Soccer techniques for beginners."
]

queries = [
    "How to fix a punctured bike tire?",
    "I want to learn about AI vs machine learning"
]

corpus_embeddings = model.encode(corpus, convert_to_tensor=True)
query_embeddings = model.encode(queries, convert_to_tensor=True)

hits = util.semantic_search(query_embeddings, corpus_embeddings, top_k=3)

for i, query in enumerate(queries):
    print(f"\nQuery: {query}")
    for rank, hit in enumerate(hits[i], start=1):
        cid = hit['corpus_id']
        score = hit['score']
        print(f"  {rank}. (score: {score:.4f}) {corpus[cid]}")


In [None]:
cosine_scores = util.cos_sim(corpus_embeddings, corpus_embeddings).cpu().numpy()
print('\nCosine similarity matrix shape:', cosine_scores.shape)

# Top similar document per corpus item (excluding self)
for idx, row in enumerate(cosine_scores):
    row[idx] = -1.0
    top_idx = np.argmax(row)
    print(f"  '{corpus[idx]}' -> '{corpus[top_idx]}' (score: {row[top_idx]:.4f})")
