In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# 1. Load models
text_model = SentenceTransformer('all-MiniLM-L6-v2')  # For text
# image_model = SentenceTransformer('clip-ViT-B-32') # For real image embeddings (optional)  <-- commented (committed)

# 2. Sample data
documents_text = [
    "Machine learning is a core part of AI.",
    "AI is transforming industries."
]
# documents_images = [
#     "image1.png",  # Example placeholders
#     "image2.png"
# ]  <-- commented (committed)

# 3. Generate embeddings for text
text_embeddings = [text_model.encode(doc) for doc in documents_text]

# 4. Simple Multimodal RAG function
def multimodal_rag_simple(query):
    # Convert query to embedding
    query_emb = text_model.encode(query)
    
    # Compare query with text documents
    text_scores = [cosine_similarity([query_emb], [doc_emb])[0][0] for doc_emb in text_embeddings]
    
    # For simplicity, assign random scores to images (replace with real image-query similarity)
    # image_scores = np.random.rand(len(documents_images))  <-- commented (committed)
    
    # Combine scores (only text for now)
    all_scores = text_scores  # + list(image_scores)  <-- commented (committed)
    
    # Find the index of the best result
    best_idx = all_scores.index(max(all_scores))
    
    # Return the corresponding text or image
    return documents_text[best_idx]
    # if best_idx < len(documents_text):
    #     return documents_text[best_idx]
    # else:
    #     return documents_images[best_idx - len(documents_text)]  <-- commented (committed)

# 5. Example usage
query = "What is core to AI?"
answer = multimodal_rag_simple(query)
print("Query:", query)
print("Multimodal RAG Answer:", answer)


  from .autonotebook import tqdm as notebook_tqdm


Query: What is core to AI?
Multimodal RAG Answer: Machine learning is a core part of AI.
