In [1]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("./test.pdf")
pages = loader.load()


In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
splitted_docs = splitter.split_documents(pages)

### Remove previous container and create a new one
```
    docker rm pgvector-container
    docker run     --name pgvector-container     -e POSTGRES_USER=langchain     -e POSTGRES_PASSWORD=langchain     -e POSTGRES_DB=langchain     -p 6024:5432     -d pgvector/pgvector:pg16
```


In [3]:
from langchain_ollama import OllamaEmbeddings
from langchain_core.documents import Document
from langchain_postgres.vectorstores import PGVector

embeddings_model = OllamaEmbeddings(model="nomic-embed-text")

connection = 'postgresql+psycopg://langchain:langchain@localhost:6024/langchain'
db = PGVector.from_documents(splitted_docs, embeddings_model, connection=connection)

In [4]:
# create retriever
retriever = db.as_retriever(search_kwargs={"k": 5})

# fetch relevant documents
docs = retriever.invoke("""How much money did Tesla make in 2022?""")


In [5]:
print (docs)

[Document(id='d2fd08a1-10ec-40cf-b4ef-3a1bda3a1406', metadata={'page': 48, 'title': '', 'source': './test.pdf', 'creator': 'wkhtmltopdf 0.12.6', 'producer': 'Qt 5.15.2', 'page_label': '49', 'total_pages': 251, 'creationdate': '2023-01-31T11:10:39+00:00'}, page_content='Tesla,\tInc.\nConsolidated\tStatements\tof\tOperations\n(in\tmillions,\texcept\tper\tshare\tdata)\n\t\t\n\t\n\t\nYear\tEnded\tDecember\t31,\n\t\n\t\n\t\n2022\n\t\n\t\n2021\n\t\n\t\n2020\n\t\nRevenues\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\nAutomotive\tsales\n\t\n$\n67,210\n\t\n\t\n$\n44,125\n\t\n\t\n$\n24,604\n\t\nAutomotive\tregulatory\tcredits\n\t\n\t\n1,776\n\t\n\t\n\t\n1,465\n\t\n\t\n\t\n1,580\n\t\nAutomotive\tleasing\n\t\n\t\n2,476\n\t\n\t\n\t\n1,642\n\t\n\t\n\t\n1,052\n\t\nTotal\tautomotive\trevenues\n\t\n\t\n71,462\n\t\n\t\n\t\n47,232\n\t\n\t\n\t\n27,236\n\t\nEnergy\tgeneration\tand\tstorage\n\t\n\t\n3,909\n\t\n\t\n\t\n2,789\n\t\n\t\n\t\n1,994\n\t\nServices\tand\tother\n\t\n\t\n6,091\n\t\n\t\n\t\n3,802\n\t\n\t\n\t\n2

In [6]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

llm = ChatOllama(model="llama3.2:3b") 

retriever = db.as_retriever()

prompt = ChatPromptTemplate.from_template("""Answer the question based only on 
    the following context:
{context}

Question: {question}
""")

chain = prompt | llm

# fetch relevant documents 
docs = retriever.invoke("""What was Tesla's revenue increase in 2022 compared to 2021 ?""")

# run
chain.invoke({"context": docs,"question": """What was Tesla's revenue increase in 2022 compared to 2021 ?"""})

AIMessage(content='The data provided indicates that Tesla\'s used vehicle revenue increased by $2.29 billion (60% compared to the previous year). However, this is not the same as a "revenue increase" but rather an increase in "used vehicle revenue". The question asks for the increase in total revenue, which is stated to be 51% compared to the previous year.\n\nThe correct answer based on the data provided is: Tesla\'s used vehicle revenue increased by $2.29 billion (60%), and the overall revenue increased by 51%.', additional_kwargs={}, response_metadata={'model': 'llama3.2:3b', 'created_at': '2025-06-07T21:51:22.722527517Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1068904699, 'load_duration': 8955233, 'prompt_eval_count': 2048, 'prompt_eval_duration': 213000000, 'eval_count': 109, 'eval_duration': 845000000, 'model_name': 'llama3.2:3b'}, id='run--d681f39c-7699-4755-ad8c-a61ee391fa60-0', usage_metadata={'input_tokens': 2048, 'output_tokens': 109, 'total_tokens': 2157})

In [7]:
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_postgres.vectorstores import PGVector
from langchain_core.documents import Document
from operator import itemgetter

# --- 1. Set up the Embedding Model and Vector Store (Retriever) ---
# embeddings_model = OllamaEmbeddings(model="nomic-embed-text")

# connection = 'postgresql+psycopg://langchain:langchain@localhost:6024/langchain'

# Initialize the PGVector database and get the retriever
# db = PGVector.from_documents(splitted_docs, embeddings_model, connection=connection)
retriever = db.as_retriever()

# --- 2. Set up the Language Model (LLM) ---
llm = ChatOllama(model="llama3.2:3b")

# --- 3. Define the Prompt Template ---
prompt = ChatPromptTemplate.from_template("""Answer the question based only on
    the following context:
{context}

Question: {question}
""")

# --- 4. Construct the RAG Chain using LCEL ---

# This chain prepares the input for the prompt by fetching context and passing the question.
# It ensures that 'context' (from the retriever) and 'question' (the original query)
# are available as keys in the dictionary passed to the prompt.
rag_chain = (
    RunnableParallel(
        context=itemgetter("question") | retriever, # Takes the "question" from input, passes to retriever
        question=RunnablePassthrough()              # Passes the original input (the dictionary containing "question") through
    )
    | prompt                                        # Formats the prompt with context and question
    | llm                                           # Generates the answer using the LLM
    | StrOutputParser()                             # Parses the LLM's output to a string
)

# --- 5. Run the Chain ---
question_to_ask = "What was Tesla's revenue increase in 2022 compared to 2021 ?"

print(f"Asking: {question_to_ask}\n")
response = rag_chain.invoke({"question": question_to_ask})
print("Answer:")
print(response)

Asking: What was Tesla's revenue increase in 2022 compared to 2021 ?

Answer:
Tesla's revenue increased by 51% in 2022 compared to 2021. This is stated in the provided text:

"...Automotive leasing revenue increased $834 million, or 51%, in the year ended December 31, 2022..."


In [8]:
from langchain.prompts import ChatPromptTemplate

perspectives_prompt = ChatPromptTemplate.from_template("""You are an AI language 
    model assistant. Your task is to generate five different versions of the 
    given user question to retrieve relevant documents from a vector database. 
    By generating multiple perspectives on the user question, your goal is to 
    help the user overcome some of the limitations of the distance-based 
    similarity search. Provide these alternative questions separated by 
    newlines. Original question: {question}""")

def parse_queries_output(message):
    return message.content.split('\n')

query_gen = perspectives_prompt | llm | parse_queries_output

In [9]:
def get_unique_union(document_lists):
    # Flatten list of lists, and dedupe them
    deduped_docs = {
        doc.page_content: doc
        for sublist in document_lists for doc in sublist
    }
    # return a flat list of unique docs
    return list(deduped_docs.values())

retrieval_chain = query_gen | retriever.batch | get_unique_union

In [10]:
prompt = ChatPromptTemplate.from_template("""Answer the question based only on
    the following context:
{context}

Question: {question}
""")
chain = prompt | llm

input = {"question": """What was Tesla's revenue increase in 2022 compared to 2021 ?"""}
docs = retrieval_chain.invoke(input)
formatted = prompt.invoke({"context": docs, "question": input})
answer = llm.invoke(formatted)
# run
print(answer)

content="According to the document, Tesla's revenue increase in 2022 compared to 2021 is:\n\n* Automotive sales: $67,210 - $44,125 = $23,085 (increase of 52.3%)\n* Automotive regulatory credits: $1,776 - $1,465 = $311 (increase of 21.2%)\n* Automotive leasing: $2,476 - $1,642 = $834 (increase of 50.9%)\n* Total automotive revenues: $71,462 - $47,232 = $24,230 (increase of 51.3%)\n\nOverall, Tesla's total revenue increase in 2022 compared to 2021 is:\n\n$6,091 (services and other) + $24,230 (automotive) = $30,321 (increase of 60%)" additional_kwargs={} response_metadata={'model': 'llama3.2:3b', 'created_at': '2025-06-07T21:51:31.197180526Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1501696631, 'load_duration': 12364668, 'prompt_eval_count': 2048, 'prompt_eval_duration': 217000000, 'eval_count': 170, 'eval_duration': 1271000000, 'model_name': 'llama3.2:3b'} id='run--acd174a9-482f-4917-b33b-ca282c1c05b4-0' usage_metadata={'input_tokens': 2048, 'output_tokens': 170, 'total_to

### Semantic Routing

In [11]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import chain
from langchain_ollama import ChatOllama, OllamaEmbeddings

# Two prompts
physics_template = """You are a very smart physics professor. You are great at 
    answering questions about physics in a concise and easy-to-understand manner like your audience is an 8 years old kid. 
    When you don't know the answer to a question, you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering 
    math questions. You are so good because you are able to break down hard 
    problems into their component parts, answer the component parts, and then 
    put them together to answer the broader question.

Here is a question:
{query}"""

# Embed prompts
embeddings = OllamaEmbeddings(model="nomic-embed-text")
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

# Route question to prompt
@chain
def prompt_router(query):
    # Embed question
    query_embedding = embeddings.embed_query(query)
    # Compute similarity
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    # Pick the prompt most similar to the input question
    most_similar = prompt_templates[similarity.argmax()]
    return PromptTemplate.from_template(most_similar)

semantic_router = (
    prompt_router
    | ChatOllama(model="llama3.2:3b")
    | StrOutputParser()
)

print(semantic_router.invoke("What's an imaginary number"))

A fundamental concept in mathematics! An imaginary number is a complex number that, when squared, gives a negative result. In other words, it's a number that, when multiplied by itself, results in a "negative" value.

To understand why we need imaginary numbers, let's consider the real number system. We're familiar with numbers like 1, 2, 3, and so on, which are all positive or zero. However, some mathematical operations, such as solving quadratic equations, can result in values that don't fit this familiar pattern.

For example, consider the equation x^2 + 1 = 0. This equation has no real solutions, meaning there is no value of x that can make both sides equal to each other. When we try to solve for x using only real numbers, we run into a problem.

That's where imaginary numbers come in. We introduce a new number, often represented as "i" (imagine the word "imaginary" without the words...), which satisfies the following properties:

1. i^2 = -1

In other words, when we square the ima

In [12]:
print(semantic_router.invoke("What's gravity"))

Gravity is a really cool force that pulls everything towards each other!

Imagine you have a ball, and you throw it up in the air. What happens? The ball comes back down, right?

That's because the Earth is pulling on the ball with its gravity. The Earth wants to keep the ball close to itself, so it pulls on it with a strong force.

Gravity is like a magic string that connects everything in the world to each other. It's what keeps us on the ground and what makes things fall down instead of up.

But here's a secret: gravity isn't just about big things like Earth. Everything has gravity, even you! That means you have gravity too, and it's pulling on everything around you.

Isn't that awesome?


In [13]:
from langchain_ollama import ChatOllama
import os
from langchain_postgres.vectorstores import PGVector
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

llm = ChatOllama(model="llama3.2:3b", temperature=0)
embeddings = OllamaEmbeddings(model="nomic-embed-text")

# 2. Create some sample documents with metadata
# In a real application, you'd load these from a database, files, etc.
docs = [
    Document(
        page_content="The Lord of the Rings is an epic fantasy novel by J.R.R. Tolkien.",
        metadata={"genre": "fantasy", "year": 1954, "author": "J.R.R. Tolkien", "rating": 9.5}
    ),
    Document(
        page_content="Dune is a science fiction novel by Frank Herbert, set in a desert world.",
        metadata={"genre": "science fiction", "year": 1965, "author": "Frank Herbert", "rating": 8.8}
    ),
    Document(
        page_content="Pride and Prejudice is a romantic novel of manners written by Jane Austen.",
        metadata={"genre": "romance", "year": 1813, "author": "Jane Austen", "rating": 9.0}
    ),
    Document(
        page_content="Foundation is a science fiction novel series by Isaac Asimov.",
        metadata={"genre": "science fiction", "year": 1951, "author": "Isaac Asimov", "rating": 9.2}
    ),
    Document(
        page_content="The Hitchhiker's Guide to the Galaxy is a comedy science fiction series.",
        metadata={"genre": "comedy, science fiction", "year": 1979, "author": "Douglas Adams", "rating": 9.1}
    ),
    Document(
        page_content="1984 is a dystopian social science fiction novel by George Orwell.",
        metadata={"genre": "dystopian, science fiction", "year": 1949, "author": "George Orwell", "rating": 8.7}
    ),
]

connection = 'postgresql+psycopg://langchain:langchain@localhost:6024/langchain'

# Initialize the PGVector database and get the retriever
vectorstore = PGVector.from_documents(docs, embeddings, connection=connection)

# 4. Define the metadata fields and their types
# This is crucial for the SelfQueryRetriever to understand how to parse queries.
metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the book. Can be 'fantasy', 'science fiction', 'romance', 'comedy', or 'dystopian'. It can also contain multiple genres separated by commas.",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the book was published",
        type="integer",
    ),
    AttributeInfo(
        name="author",
        description="The author of the book",
        type="string",
    ),
    AttributeInfo(
        name="rating",
        description="A 1-10 rating for the book",
        type="float",
    ),
]

# 5. Provide a description of the document content
document_content_description = "Brief summary of a book"

# 6. Initialize the SelfQueryRetriever
# The `verbose=True` argument can be very helpful for debugging.
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    verbose=True # Set to True to see the structured query generated by the LLM
)

# 7. Test the retriever with various natural language queries

print("--- Query 1: Classic fantasy novels ---")
query_1 = "Classic fantasy novels"
results_1 = retriever.invoke(query_1)
for doc in results_1:
    print(f"- {doc.page_content} (Metadata: {doc.metadata})")

print("\n--- Query 2: Sci-fi books published after 1960 ---")
query_2 = "Sci-fi books published after 1960"
results_2 = retriever.invoke(query_2)
for doc in results_2:
    print(f"- {doc.page_content} (Metadata: {doc.metadata})")

print("\n--- Query 3: Books by Jane Austen with a rating above 8.5 ---")
query_3 = "Books by Jane Austen with a rating above 8.5"
results_3 = retriever.invoke(query_3)
for doc in results_3:
    print(f"- {doc.page_content} (Metadata: {doc.metadata})")

print("\n--- Query 4: Comedy science fiction books ---")
query_4 = "Comedy science fiction books"
results_4 = retriever.invoke(query_4)
for doc in results_4:
    print(f"- {doc.page_content} (Metadata: {doc.metadata})")

print("\n--- Query 5: Dystopian novels from the 1950s ---")
query_5 = "Dystopian novels from the 1950s"
results_5 = retriever.invoke(query_5)
for doc in results_5:
    print(f"- {doc.page_content} (Metadata: {doc.metadata})")



--- Query 1: Classic fantasy novels ---


OutputParserException: Parsing text
```json
{
    "query": "classic fantasy",
    "filter": "and(eq(\"genre\", \"fantasy\"), eq(\"year\", 19|20|21))"
}
```
 raised following error:
No terminal matches '|' in the current parser context, at line 1 col 42

nd(eq("genre", "fantasy"), eq("year", 19|20|21))
                                        ^
Expected one of: 
	* COMMA
	* RPAR
	* RSQB

Previous tokens: Token('SIGNED_INT', '19')

For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

In [14]:
from langchain_community.tools import QuerySQLDatabaseTool
from langchain_community.utilities import SQLDatabase
from langchain.chains import create_sql_query_chain
from langchain_ollama import ChatOllama

# replace this with the connection details of your db
db = SQLDatabase.from_uri("sqlite:///Chinook.db")
llm = ChatOllama(model="llama3.2:3b", temperature=0)

# convert question to sql query
write_query = create_sql_query_chain(llm, db)

# Execute SQL query
execute_query = QuerySQLDatabaseTool(db=db)

# combined
chain = write_query | execute_query

# invoke the chain with a dictionary input
# The key 'question' is the default expected by create_sql_query_chain
result = chain.invoke({'question': 'How many employees are there?'})

print(result)

Error: (sqlite3.OperationalError) near "Question": syntax error
[SQL: Question: How many employees are there?
SQLQuery: SELECT COUNT(*) FROM "employees";]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
