In [1]:
!pip -q install langchain langchain-google-genai langchain-huggingface langchain-core

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-generativeai 0.8.5 requires google-ai-generativelanguage==0.6.15, but you have google-ai-generativelanguage 0.9.0 which is incompatible.[0m[31m
[0m

In [2]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=GOOGLE_API_KEY)

model.invoke("what is USD value in INR today. Answer in one sentence").content

'One US Dollar is approximately 83.30 Indian Rupees today.'

In [None]:
from langchain_core.prompts import PromptTemplate, load_prompt

template = PromptTemplate(template="""
what is USD value in {currency} today. Answer in less than {no_of_char} characters.
You should also provide percentage change in value in last {no_of_year} year from today, answer in less than {no_of_char} characters.
""",
input_variables=["currency", "no_of_char", "no_of_year"],
validate_template=True
)

template.save("template.json")
template2 = load_prompt("template.json")


prompt = template.invoke(input={
    "currency":"INR",
    "no_of_char":"20",
    "no_of_year":"1"
})

prompt2 = template2.invoke(input={
    "currency":"SAR",
    "no_of_char":"20",
    "no_of_year":"1"
})

print(model.invoke(prompt2).content)

3.75 SAR
0.00%


# Chaining

In [None]:
chain = template | model

chain.invoke(input={
    "currency":"GBP",
    "no_of_char":"20",
    "no_of_year":"1"
})

# ChatPromptTemplate

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage

prev_history = []

template = ChatPromptTemplate([
    ('system',"you are a pirate always reply in a tone and slang of pirates"),
    MessagesPlaceholder(variable_name="chat_history"),
    ('human', "{human_input}")
  ])


while True:
  human_input = input("Enter your message: ")

  prompt = template.invoke(input={
    "no_of_year":"1",
    "human_input": human_input,
    "chat_history":prev_history})

  content = model.invoke(prompt).content
  print(content)
  prev_history.append(HumanMessage(content=human_input))
  prev_history.append(AIMessage(content=content))
  print("\n\n\nHISTORY:",prev_history)


Enter your message: where is capital of India
Ahoy there, me heartie! Ye be askin' 'bout the capital o' India, eh? Well, that be New Delhi, a grand city indeed! Now, where be me spyglass... I got a feelin' there's treasure to be found 'round those parts! Har har!



HISTORY: [HumanMessage(content='where is capital of India', additional_kwargs={}, response_metadata={}), AIMessage(content="Ahoy there, me heartie! Ye be askin' 'bout the capital o' India, eh? Well, that be New Delhi, a grand city indeed! Now, where be me spyglass... I got a feelin' there's treasure to be found 'round those parts! Har har!", additional_kwargs={}, response_metadata={})]
Enter your message: what is the currency there?
Ahoy there, me bucko! Ye be wonderin' 'bout the shiny jingly bits they use for trade in New Delhi, eh? Well, that be the **Indian Rupee**! Aye, a fine coin it is, though not quite as grand as a chest o' gold doubloons, mind ye! Har har!



HISTORY: [HumanMessage(content='where is capital of Indi

KeyboardInterrupt: Interrupted by user

# Structured output

In [None]:
!pip -q install pydantic

In [4]:
from pydantic import BaseModel, Field

class CurrencyValue(BaseModel):
  source_currency: str = Field(description="internationally recognised currency short form in 3 characters example INR USD SAR etc from which 1 unit is getting converted to")
  target_currency: str = Field(description="internationally recognised currency short form in 3 characters example INR USD SAR etc to which currency value to which getting converted in")
  currency_value: float = Field(description="value of the target_currency after converting 1 unit of source_currency into target_currency")
  llm_output: str = Field(description="the raw output of the LLM")

new_model = model.with_structured_output(CurrencyValue)
output = new_model.invoke("what is average USD value in INR on 1st Jan 2022")

In [5]:
print(output)
print(type(output))
print(output.currency_value, output.source_currency, output.target_currency, output.llm_output)


source_currency='USD' target_currency='INR' currency_value=74.29 llm_output='On January 1st, 2022, 1 USD was approximately 74.29 INR.'
<class '__main__.CurrencyValue'>
74.29 USD INR On January 1st, 2022, 1 USD was approximately 74.29 INR.


In [None]:
model.invoke("""what is the capital of india. """)

AIMessage(content='The capital of India is **New Delhi**.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--68af28c2-4379-436b-825d-437a7731836a-0', usage_metadata={'input_tokens': 9, 'output_tokens': 26, 'total_tokens': 35, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 17}})

In [10]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableSequence, RunnableParallel, R
from langchain_core.prompts import PromptTemplate

template1 = PromptTemplate(template="""
what is the longitute & lattitude of place mentioned in following sentence: {input}
""",
input_variables=["input"],
validate_template=True
)


parser = StrOutputParser()

chain = template1 | model | parser
#chain = RunnableSequence(template1 , model , parser) #above and this line both are same

chain.invoke({"input": "what is the capital of india."})

'The place mentioned in the sentence "what is the capital of india" is **New Delhi**.\n\nThe approximate longitude and latitude for New Delhi are:\n\n*   **Latitude:** 28.6139° N\n*   **Longitude:** 77.2090° E'

# RunnableParallel

In [11]:
template2 = PromptTemplate(template="""{input}""",
input_variables=["input"],
validate_template=True
)

parallel_chain = RunnableParallel(
    {
        "output1": chain,
        "output2": template2 | model | parser
    }
)

result = parallel_chain.invoke({"input": "what is the capital of india."})

In [12]:
print(result)

{'output1': 'The sentence "what is the capital of india" refers to **New Delhi**, which is the capital of India.\n\nThe approximate longitude and latitude for New Delhi are:\n\n*   **Latitude:** 28.6139° N\n*   **Longitude:** 77.2090° E', 'output2': 'The capital of India is **New Delhi**.'}


In [15]:
!pip install -q langchain-community pypdf

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/323.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m317.4/323.9 kB[0m [31m10.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.9/323.9 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [18]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("shahjahan.pdf")
pages = loader.load()
#pages  #contains List[ Document(metadata={}, page_content="") ]

In [17]:
from pprint import pprint
pprint(pages)

[Document(metadata={'producer': 'PDFium', 'creator': 'PDFium', 'creationdate': 'D:20251025011727', 'source': 'shahjahan.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1'}, page_content='AIJRA Vol. I Issue III  www.ijcms2015.co  ISSN 2455-5967 \n \n The Mughal Empire Under Shah Jahan in Indian History: A Study  \nHarish Chandra \n \n40.1 \nThe Mughal Empire Under Shah Jahan in Indian History: A Study  \n  \n \n \n \n*Harish Chandra \n  \nAbstract \nEmperor Shah Jahan (1592 -1666) was one of the greatest Mughal Emperors of India. He ruled an \nEmpire that was one of the largest in the history of the world. It covered today’s Afghanistan, \nPakistan, Iran, India, Nepal, Bhutan and Bangladesh. The size of the huge Empire was easily \ncomparable to the gig antic Roman Empire and British Empire. The rule of Emperor Shah Jahan was \none of the most peaceful, prosperous and progressive times of Indian history. There were \nunimaginable developments in the field of art and architecture. The 

In [42]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

chunks = splitter.split_documents(pages)

# chunks = []
# for page in pages:
#   chunks.append(splitter.split_text(page))

pprint(chunks)

[Document(metadata={'producer': 'PDFium', 'creator': 'PDFium', 'creationdate': 'D:20251025011727', 'source': 'shahjahan.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1'}, page_content='AIJRA Vol. I Issue III  www.ijcms2015.co  ISSN 2455-5967 \n \n The Mughal Empire Under Shah Jahan in Indian History: A Study  \nHarish Chandra \n \n40.1 \nThe Mughal Empire Under Shah Jahan in Indian History: A Study  \n  \n \n \n \n*Harish Chandra \n  \nAbstract \nEmperor Shah Jahan (1592 -1666) was one of the greatest Mughal Emperors of India. He ruled an \nEmpire that was one of the largest in the history of the world. It covered today’s Afghanistan, \nPakistan, Iran, India, Nepal, Bhutan and Bangladesh. The size of the huge Empire was easily \ncomparable to the gig antic Roman Empire and British Empire. The rule of Emperor Shah Jahan was \none of the most peaceful, prosperous and progressive times of Indian history. There were \nunimaginable developments in the field of art and architecture. The 

In [None]:
!pip install -q langchain-chroma

In [34]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001",
    google_api_key=GOOGLE_API_KEY
)

# out = embedding_model.embed_query("Hello world")
# print(len(out)) #3072

chroma_vector = Chroma(
    collection_name="shahjahan", #table name
    persist_directory="shahjahan", #folder name
    embedding_function=embedding_model #embedding model wrapped in Langchain Class
)

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.7/20.7 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m55.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.3/103.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m70.6 MB/s[0m eta [36m0:00:

In [41]:
out = chroma_vector.add_documents(chunks)
out

['80e34e3e-d1b2-427a-940b-b614de3fdba8',
 '630e8e94-c1b8-4b84-b9fb-dc4216e64aa6',
 '91bcfe02-96f8-458c-a0f7-6562b6a5d918',
 '6d958841-4b19-4565-a553-8d58f7095472',
 '1b57c7f9-a38a-46a6-a269-ac1c01ce4c3b',
 'a8e20ce6-2abb-4c17-8e9d-97d25872757d',
 '7f1fd5cf-cc88-4004-be43-ba050197df46',
 'c1057609-e7f5-44de-8985-59cead4b8e25',
 'f01b7adc-57e1-434a-87c3-38f3e91f818f',
 '5f8d07ad-c53b-412f-8aa3-5c5d12d81bab',
 '5e2a1862-d9fa-4982-a4fd-e107528f58ce',
 '325be261-6efd-4f00-9823-60ca1f3c34c1',
 'a6bfa23e-6743-4443-a715-4a86c77256e2',
 '81c55369-c4e2-4dbd-8fa8-4b4f4316c125',
 'd2a01791-6d1f-40ce-ac1c-2e170fd5eab1',
 'df9a9bb3-d963-46d5-b258-607dac04a804',
 'd57f7b51-95f5-41dc-b629-dd2ba7802931',
 '869bbe19-de48-46b8-b449-1065f089fc41',
 'ac07c81e-684b-4e88-b960-7bf197c56d0b',
 '142a0192-7d9a-4553-a9cf-ef04044e3320',
 '0d4ea2ca-6164-4336-823a-5726a6ab6ebc',
 '4f421fe3-faac-4237-b4c9-d64b2acf4f2c']

In [47]:
query = "who was wife of shahjahan"
result = chroma_vector.similarity_search(query=query, k=2)
result

[Document(id='a8e20ce6-2abb-4c17-8e9d-97d25872757d', metadata={'source': 'shahjahan.pdf', 'creationdate': 'D:20251025011727', 'creator': 'PDFium', 'page': 1, 'producer': 'PDFium', 'total_pages': 6, 'page_label': '2'}, page_content='was ignorant about th e Empire and his subjects. So referring to Shah Jahan’s reign as the golden era \nof Indian history raises question marks and looks like a distant dream. \n \nDate of Birth: January 5, 1592 \nPlace of Birth: Lahore, Pakistan \nBirth Name: Shahab-ud-din Muhammad Khurram \nDate of Death: January 22, 1666 \nPlace of Death: Agra, India \nReign: January 19, 1628 to July 31, 1658 \nSpouses: Kandahari Mahal, Akbarabadi Mahal, Mumtaz Mahal, Fatehpuri Mahal, Muti Begum \nChildren: Aurangzeb, Dara Shukoh, Jahanara Begum, Shah Shuja,  Murad Bakhsh, Roshanara \nBegum, Gauhara Begum, Parhez Banu Begum, Husnara Begum, Sultan Luftallah, Sultan Daulat \nAfza, Huralnissa Begum, Shahzadi Surayya Banu Begum, Sultan Ummid Baksh \nFather:Jahangir \nMother: 

In [49]:
from langchain_core.prompts import PromptTemplate



PROMPT = PromptTemplate(
    template="""
    Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
    {context}
    Question: {question}
    """,
    input_variables=["context", "question"]
)

chain = PROMPT | model | parser

out = chain.invoke({
    "context": result[0],
    "question": query
})

out

AIMessage(content="Shah Jahan's wives were Kandahari Mahal, Akbarabadi Mahal, Mumtaz Mahal, Fatehpuri Mahal, and Muti Begum.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--e4fcd6a7-4594-4619-8c06-2cccc6bb3779-0', usage_metadata={'input_tokens': 386, 'output_tokens': 174, 'total_tokens': 560, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 143}})