In [1]:
import numpy as np
print(np.__version__)

1.26.4


In [None]:
import llama_index
from importlib.metadata import version
print("LlamaIndex Version:", version("llama_index"))

Langchain Version: 0.12.19


In [None]:
import langchain
from importlib.metadata import version
print("Langchain Version:", version("llama_index"))

In [None]:
from langchain_community.llms import Ollama
from langchain_community.embeddings import HuggingFaceEmbeddings


In [24]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [26]:
# Load PDF
pdf_loader = PyPDFLoader("chunks/DSPy.pdf")
pdf_docs = pdf_loader.load()

In [27]:

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(pdf_docs)

In [28]:
# Ensure 'chunks/' directory exists
os.makedirs("chunks", exist_ok=True)

In [29]:
# Save chunks to files
for i, chunk in enumerate(chunks):
    chunk_path = f"chunks/chunk_{i}.txt"
    with open(chunk_path, "w", encoding="utf-8") as f:
        f.write(chunk.page_content)
print(f"✅ Extracted and saved {len(chunks)} chunks in 'chunks/' directory.")

✅ Extracted and saved 2 chunks in 'chunks/' directory.


In [35]:
from langchain.vectorstores import Qdrant
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

In [31]:

# Initialize Qdrant client
qdrant = QdrantClient(":memory:")  # Use persistent DB if needed


In [32]:
# Define collection name
collection_name = "naive_rag_embeddings"

In [33]:
# Create Qdrant Collection (Ensuring it Exists)
qdrant.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)

  qdrant.recreate_collection(


True

In [38]:
# Load Embedding Model
embedding_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5"
)

In [45]:
# Define Vector Store
vector_store = Qdrant(
    client=qdrant,
    collection_name=collection_name,
    embeddings=embedding_model
)

In [46]:
# Convert text chunks into LangChain Document format
docs_to_store = [Document(page_content=chunk.page_content, metadata={"source": f"chunk_{i}.txt"}) for i, chunk in enumerate(chunks)]


In [47]:
# Store Documents in Qdrant
vector_store.add_documents(docs_to_store)

print(f"✅ Successfully stored {len(docs_to_store)} chunks in Qdrant.")

✅ Successfully stored 2 chunks in Qdrant.


In [64]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.question_answering import load_qa_chain
from langchain_core.prompts import PromptTemplate

In [65]:
# Define a custom prompt (optional, but recommended)
custom_prompt = PromptTemplate(
    template="""Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know. Don't try to make up an answer.
Context:
{context}
Question: {question}
Helpful Answer:""",
    input_variables=["context", "question"],
)

In [66]:
# Load a standard QA chain
combine_documents_chain = load_qa_chain(llm, chain_type="stuff", prompt=custom_prompt)

# Create Naive RAG pipeline correctly
qa_chain = RetrievalQAWithSourcesChain(
    retriever=retriever,  # Correct way to pass retriever
    combine_documents_chain=combine_documents_chain,  # Required in latest versions
)

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  combine_documents_chain = load_qa_chain(llm, chain_type="stuff", prompt=custom_prompt)


In [67]:
# Run a query
query = "What is DSPy?"
response = qa_chain.invoke({"question": query})

print("📌 Answer:", response)

📌 Answer: {'question': 'What is DSPy?', 'answer': '<think>\nOkay, so I need to figure out what DSPy is based on the context provided. Let me read through it carefully.\n\nThe first sentence says, "DSPy - Programming—not prompting—LMs." So right away, I know that DSPy isn\'t about prompting language models directly but instead something else related to programming them or working with them in a different way.\n\nNext, it explains that DSPy is the framework for programming language models. Instead of using prompts like you would in a traditional AI setup, you use declarative Python code. That makes me think it\'s more about writing code than giving commands through text prompts.\n\nThen it mentions thatDSPy allows iterating quickly on building modular AI systems. Modular systems are those that can be built by connecting different parts or components together without too much interference from each other. So this framework probably helps in creating such systems efficiently.\n\nIt also ta

In [None]:
#From Here on, I've implemented using LlamaIndex

In [None]:
import llama_index
from importlib.metadata import version
print("LlamaIndex Version:", version("llama_index"))

In [1]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings

Settings.llm = Ollama(model='deepseek-r1:latest', request_timeout=120.0)
Settings.embed_model = HuggingFaceEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2', trust_remote_code=True)


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#Define the directory path
input_dir_path = "chunks"


In [5]:
from llama_index.core import SimpleDirectoryReader
#Load Data 
loader = SimpleDirectoryReader(input_dir=input_dir_path, required_exts=['.pdf'], recursive=True)

docs = loader.load_data()


In [6]:
docs

[Document(id_='c9755c72-75d2-4cf4-ade0-379285232e57', embedding=None, metadata={'page_label': '1', 'file_name': 'DSPy.pdf', 'file_path': '/Users/macbookpro/Projects/python-ml/langchain-ollama-streamlit/chunks/DSPy.pdf', 'file_type': 'application/pdf', 'file_size': 401852, 'creation_date': '2025-02-23', 'last_modified_date': '2025-02-23'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text="DSPy - Programming—not prompting—LMs \nDSPy is the framework for programming—rather than prompting—language models. It allows \nyou to iterate fast on building modular AI systems and offers algorithms for optimizing their \nprompts and weights, whether you'r

In [7]:
import numpy as np
print("Numpy is available:", np.__version__)
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
try:
    import torch
    print("PyTorch is available:", torch.__version__)
except ImportError: 
    print("PyTorch is not available")               

try:
    #Create vector store index
    index = VectorStoreIndex.from_documents(docs)
    print("Index created successfully")
except RuntimeError as e:
    print("Error creating index:", e)
except Exception as e:
    print("Unexpected error:", e)
    

Numpy is available: 1.26.4
PyTorch is available: 2.2.2
Index created successfully


In [8]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x138650df0>

In [9]:
query_engine = index.as_query_engine(streaming=True, similarity_top_k=3)

In [10]:
from llama_index.core import PromptTemplate

In [11]:
qa_prompt_tmpl_str = (
            "Context information is below.\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\n"
            "Query: {query_str}\n"
            "Answer: "
            )

qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})

response = query_engine.query('What exactly is DSPy?')
print(response)

<think>
Okay, so I need to figure out what DSPy is based on the context provided. Let me read through the information again.

The context starts by introducing a file calledDSPy.pdf and setting up some prerequisites like installing Ollama and using pip to install dspy. Then it explains what DSPy does. 

From the first paragraph, I gather that DSPy is a framework for programming language models in a different way than just prompting them. Instead of telling the model what to do with prompts, you write Python code. So it's about building modular AI systems and optimizing their prompts and weights.

DSPy stands for Declarative Self-improving Python, which I think means that instead of using rigid prompts, you write Python code that's more declarative, letting the model figure things out on its own through iteration and optimization. This allows for faster development and better performance in tasks like classification, RAG pipelines, or agent loops.

The "Getting Started" part mentions in

In [12]:
response = query_engine.query('How to install DSPy')
print(response)

<think>
Okay, so I need to figure out how to install DSPy. From what I remember, the context provided mentions that DSPy is a framework for programming language models instead of prompting them. But right now, my focus is on installation.

Looking at the context, under "Getting Started I," it says that you can install DSPy using pip with the command "pip install -U dspy." That seems straightforward enough. So step one would be to open my terminal or command prompt and run that pip install command.

Wait, but sometimes package names can change or there might be different ways depending on whether I'm on a Mac, Windows, or Linux. However, the context doesn't specify platform differences for installing DSPy, so I'll assume it's compatible across common platforms with the same installation method.

So to break it down: first, make sure my Python environment is set up correctly if using virtual environments because pip installs in the current environment by default. But the context doesn't 