### Setup Asyncio 


In [1]:
import nest_asyncio
nest_asyncio.apply()

### Set up Qdrant vector DB

In [1]:
import qdrant_client

collection_name="chat_with_docs"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

### Read the documents

In [2]:
from llama_index.core import SimpleDirectoryReader
input_dir_path = './docs'
loader = SimpleDirectoryReader(input_dir=input_dir_path, required_exts=['.pdf'], recursive=True)

In [3]:
docs = loader.load_data()

In [None]:
type(docs), len(docs)

In [None]:
docs[0]

### Indexing Data 

In [10]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext

def create_index(documents):
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
    return index



### Loading Embedding Model & Indexing the Data

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                   trust_remote_code=True)

# Ensuring same model is used throughout the rag pipeline
Settings.embed_model = embed_model

# Convert each document into an embedding using the embed model
index = create_index(docs)

### Load the LLM 

In [None]:
from llama_index.llms.ollama import Ollama
llm = Ollama(model = 'llama3.2:1b', request_timeout=120.0, base_url="http://172.18.176.1:11434")
Settings.llm = llm

### Define the Prompt Template 

In [18]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner,
              incase you don't know the answer say 'I don't know!'

              Query: {query_str}

              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

### Reranking

In [19]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2",
    top_n=3
)

### Querying the Document

In [20]:
query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank])

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

response = query_engine.query("What exactly is DSPy?")

In [None]:
from IPython.display import Markdown, display
display(Markdown(str(response)))

## Ollama Ocr

In [None]:
import cv2
a = cv2.imread(r"../rewe_2025/01.jpg")
import matplotlib.pyplot as plt
plt.imshow(a)

In [None]:
%load_ext autoreload
%autoreload 2

In [15]:
from ollama_ocr import OCRProcessor

# Initialize OCR processor
ocr = OCRProcessor(model_name='llama3.2-vision:latest')  # You can use any vision model available on Ollama

# Process an image


In [22]:
result = ocr.process_image(
    image_path="../rewe_2025/04.png",
    format_type="json"  # Options: markdown, text, json, structured, key_value
)
print(result)

Here is the structured data extracted from the German supermarket receipt in a JSON format:

```
{
    "receipt": {
        "items": [
            {
                "name": "Kart. SP.",
                "quantity": 1,
                "total": "0,842"
            },
            {
                "name": "APFEL SALA",
                "quantity": 4.94,
                "total": "2,99"
            },
            {
                "name": LIMETTE",
                "quantity": 4.49,
                "total": "3,66"
            },
            {
                "name": RISPENTONATE BIO",
                "quantity": 2.26,
                "total": "2,39"
            },
            {
                "name": ESL MILCH 3,5% GEMÜSESMISCHUNG",
                "quantity": 2.19,
                "total": "1,99"
            },
            {
                "name": RÖHRZUCKER",
                "quantity": 2.85,
                "total": "2,09"
            },
            {
                "name": PENE VOLKORN-