<a href="https://colab.research.google.com/github/UtkarshRasal/MusicSearchAI/blob/master/EmbeddingsExample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import faiss
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer

# 1. Load sentence transformer model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embedding_dim = 384  # dimensions of this model

# 2. Sample documents (e.g., research paper abstracts)
documents = [
    {"id": "doc1", "text": "Deep learning techniques applied to computer vision tasks."},
    {"id": "doc2", "text": "Quantum computing promises exponential speed-up for some problems."},
    {"id": "doc3", "text": "Natural language processing in healthcare applications."},
]

# 3. Create embeddings
texts = [doc["text"] for doc in documents]
ids = [doc["id"] for doc in documents]
embeddings = model.encode(texts, convert_to_numpy=True)

# 4. Create FAISS index
index = faiss.IndexFlatL2(embedding_dim)  # L2 distance (Euclidean)
index.add(embeddings)  # Store vectors

# 5. Map vector index to document metadata
id_map = {i: {"id": ids[i], "text": texts[i]} for i in range(len(ids))}

# 6. Semantic search function
def search(query, top_k=3):
    query_vec = model.encode([query], convert_to_numpy=True)
    D, I = index.search(query_vec, top_k)  # distances, indices
    results = []
    for score, idx in zip(D[0], I[0]):
        results.append({
            "score": float(score),
            "id": id_map[idx]["id"],
            "text": id_map[idx]["text"]
        })
    return results

# 7. Try semantic search
results = search("tasks")
for r in results:
    print(f"\nID: {r['id']}")
    print(f"Score: {r['score']:.4f}")
    print(f"Text: {r['text']}")



ID: doc1
Score: 1.4572
Text: Deep learning techniques applied to computer vision tasks.

ID: doc3
Score: 1.6505
Text: Natural language processing in healthcare applications.

ID: doc2
Score: 1.7976
Text: Quantum computing promises exponential speed-up for some problems.
