# Multimodal RAG Demo
This notebook demonstrates the full pipeline: PDF ingestion → embeddings → FAISS → retrieval → generation.

In [None]:
from src.config import Config
from src.embedding import EmbeddingService
from src.generator import GeneratorService
from src.vectorstore import FaissVectorStore
from src.rag import retrieve, rag_ask
from src.data_processing import download_pdf, create_directories, process_text_chunks, process_tables, process_images, process_page_images
import pymupdf
from tqdm import tqdm
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [None]:
config = Config()
embedder = EmbeddingService()
generator = GeneratorService()
print('Embedding provider:', config.get_embedding_provider())
print('LLM provider:', config.get_llm_provider())

In [None]:
paths = config.get_data_paths()
pdf_url = 'https://arxiv.org/pdf/1706.03762.pdf'
filename = 'attention.pdf'
filepath = download_pdf(pdf_url, paths['input_dir'], filename)
create_directories(paths['output_dir'])

In [None]:
doc = pymupdf.open(filepath)
items = []
splitter = RecursiveCharacterTextSplitter(chunk_size=config.get_pipeline_config()['chunk_size'], chunk_overlap=config.get_pipeline_config()['chunk_overlap'])
for page_num,page in enumerate(doc):
    text = page.get_text()
    if text.strip(): process_text_chunks(filepath,text,splitter,page_num,paths['output_dir'],items)
    process_tables(filepath,doc,page_num,paths['output_dir'],items)
    process_images(doc,page,page_num,paths['output_dir'],items)
    process_page_images(page,page_num,paths['output_dir'],items)
print('Extracted',len(items),'items')

In [None]:
embeddings=[]
with tqdm(total=len(items),desc='Embedding items') as bar:
    for it in items:
        if it['type']=='text': emb=embedder.embed(text=it['text'])
        elif it['type'] in ['image','page']: emb=embedder.embed(image_b64=it['image'])
        else: emb=None
        it['embedding']=emb; embeddings.append(emb); bar.update(1)
vs_cfg=config.get_vectorstore_config()
store=FaissVectorStore(index_path=vs_cfg['index_path'],metadata_path=vs_cfg['metadata_path'])
store.build(embeddings,items); store.save(); store.load()
print('✅ Index built and loaded')

In [None]:
results=retrieve(store,'Which optimizer was used for training?',top_k=3)
for r in results:
    print(f"Page {r['page']} ({r['type']})")
    if 'text' in r: print(r['text'][:300])
    print('---')

In [None]:
answer=rag_ask(store,'Which optimizer was used for training?',top_k=3)
print('🤖 Answer:\n',answer)