In [1]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Sample documents from two sources
documents_source1 = [
    "Machine learning is a core part of AI.",
    "AI is transforming industries."
]

documents_source2 = [
    "Deep learning is used in image and speech recognition.",
    "Neural networks are a type of machine learning model."
]

# Precompute embeddings
emb_source1 = [model.encode(doc) for doc in documents_source1]
emb_source2 = [model.encode(doc) for doc in documents_source2]

# Hybrid RAG function
def hybrid_rag(query, top_k=2):
    query_emb = model.encode(query)
    
    # Similarity with both sources
    scores1 = [cosine_similarity([query_emb], [emb])[0][0] for emb in emb_source1]
    scores2 = [cosine_similarity([query_emb], [emb])[0][0] for emb in emb_source2]
    
    # Combine documents and scores
    all_docs = documents_source1 + documents_source2
    all_scores = scores1 + scores2
    
    # Select top-k documents
    top_indices = np.argsort(all_scores)[-top_k:][::-1]
    top_docs = [all_docs[i] for i in top_indices]
    
    # Generate final answer (simplified)
    response = " ".join(top_docs)
    return response

# Example usage
query = "What is core to AI?"
answer = hybrid_rag(query, top_k=2)

print("Query:", query)
print("Hybrid RAG Answer:", answer)


  from .autonotebook import tqdm as notebook_tqdm


Query: What is core to AI?
Hybrid RAG Answer: Machine learning is a core part of AI. AI is transforming industries.
