In [None]:
from embeddings.titan_embedder import TitanTextEmbedder
from vectorstore.chroma_wrapper import ChromaVectorStore
from llm.bedrock_llm import BedrockLLM
from rag.pipeline import RAGPipeline
from ingestion.pdf_ingestion import extract_text_from_pdf_s3
from ingestion.chunking import chunk_text

In [None]:
bucket = "your-bucket"
key = "medical-reports/aws-only.pdf"
query = "Summarize the MRI findings"

In [None]:
# Load + chunk
text = extract_text_from_pdf_s3(bucket, key)
chunks = chunk_text(text)

In [None]:
# Titan embedding
embedder = TitanTextEmbedder()
embeddings = embedder.embed_texts(chunks)

In [None]:
# Store + Search
vs = ChromaVectorStore("aws-only")
vs.add_texts(chunks, embeddings)
query_vec = embedder.embed_texts([query])[0]
top_chunks = vs.similarity_search(query=query, query_embedding=query_vec)

In [None]:
# Generate
llm = BedrockLLM()
from llm.prompt_templates import qa_prompt_template
prompt = qa_prompt_template(" ".join(top_chunks), query)
print(llm.generate(prompt))
