In [17]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document

# Initialize HuggingFace embeddings (local)
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Documents with longer content, metadata, and timing references
docs = [
    Document(
        page_content="""Artificial intelligence (AI) has transformed modern technology over the past decade (2010-2025). 
Machine learning algorithms can process and analyze vast amounts of data in seconds, something that used to take hours in the early 2010s. 
Natural language processing allows computers to understand and generate human language with near-human accuracy in 2024. 
Computer vision enables machines to interpret visual information in real time, with applications in surveillance and autonomous vehicles since 2015. 
AI systems are increasingly sophisticated, capable of reasoning, problem-solving, and decision-making in complex environments as of 2025.""",
        metadata={"source": "ai_file.txt"}
    ),
    Document(
        page_content="""Climate change has accelerated in the past 50 years (1970-2025), becoming one of the most urgent global challenges. 
Rising temperatures have caused polar ice caps to melt faster since the 1990s, leading to a 20 cm increase in sea levels over the past three decades. 
Extreme weather events, such as floods and hurricanes, have increased in frequency by 30% since 2000. 
Transitioning to renewable energy sources such as solar, wind, and hydroelectric power is critical by 2030 to mitigate environmental damage. 
International collaboration and policy reforms are ongoing, with major climate summits held annually since 1992.""",
        metadata={"source": "climate_file.txt"}
    ),
    Document(
        page_content="""Maintaining a healthy lifestyle has become increasingly emphasized over the last 20 years (2005-2025). 
Regular exercise strengthens the cardiovascular system and muscles, with recommendations of at least 150 minutes per week. 
Balanced nutrition provides essential vitamins and minerals, a focus of dietary guidelines updated every five years. 
Adequate sleep, typically 7-9 hours per night, allows the body to recover physically and mentally. 
Stress management techniques, including mindfulness and meditation, have been widely adopted in workplaces since 2015, enhancing quality of life and preventing burnout.""",
        metadata={"source": "health_file.txt"}
    ),
    Document(
        page_content="""Renewable energy technologies have advanced rapidly worldwide over the last 15 years (2010-2025). 
Solar panels convert sunlight directly into electricity, with efficiency improving by 50% since 2010. 
Wind turbines harness atmospheric currents, producing record-breaking energy outputs in 2023. 
Hydroelectric power generates electricity from flowing water, with new plants built between 2015-2022. 
Geothermal energy has been utilized increasingly since 2018, contributing to 2% of global electricity in 2024. 
Adopting these sustainable technologies by 2030 is critical to combat climate change.""",
        metadata={"source": "renewable_file.txt"}
    ),
    Document(
        page_content="""Space exploration has progressed significantly from 1960 to 2025, revealing many cosmic mysteries. 
Telescopes launched in the 1990s and 2000s observe distant galaxies, nebulae, and other celestial phenomena. 
Rovers exploring Mars since 2004 continue to search for signs of past or present life. 
The International Space Station, operational since 2000, supports microgravity research and international collaboration. 
Private companies have been developing commercial space travel since 2010, with several manned missions planned between 2023-2025.""",
        metadata={"source": "space_file.txt"}
    ),
    Document(
        page_content="""Digital transformation has reshaped business operations over the past decade (2010-2025). 
Cloud computing provides scalable infrastructure and has grown at an annual rate of 25% since 2015. 
Big data analytics started booming in the early 2010s, enabling insights for smarter decision-making. 
Cybersecurity measures have become critical, with incidents increasing yearly since 2012. 
Automation and AI-driven tools streamline repetitive tasks, increasing efficiency by up to 40% in large enterprises from 2020-2025.""",
        metadata={"source": "digital_file.txt"}
    )
]

# Create Chroma vector database
db = Chroma.from_documents(docs, embedding_function)

print("Chroma database created successfully with 6 documents, each including metadata and timing references!")


Chroma database created successfully with 6 documents, each including metadata and timing references!


In [18]:
retriever = db.as_retriever(search_type="mmr", search_kwargs={"k":3})

In [19]:
retriever.invoke("When was telescope launched?")

[Document(metadata={'source': 'space_file.txt'}, page_content='Space exploration continues to reveal the mysteries of the universe. \nTelescopes allow observation of distant galaxies, nebulae, and other celestial phenomena. \nRovers and landers explore planetary surfaces, searching for signs of past or present life. \nThe International Space Station supports microgravity research and international collaboration in space science. \nPrivate companies are developing commercial space travel, aiming to make space more accessible for humanity in the near future.'),
 Document(metadata={'source': 'digital_file.txt'}, page_content='Digital transformation has reshaped business operations over the past decade (2010-2025). \nCloud computing provides scalable infrastructure and has grown at an annual rate of 25% since 2015. \nBig data analytics started booming in the early 2010s, enabling insights for smarter decision-making. \nCybersecurity measures have become critical, with incidents increasing 

In [None]:
from langchain_core.prompts import ChatMessagePromptTemplate

template = """
Answer the question based only on the following context: {context}
Question: {question}"""

prompt = ChatMessagePromptTemplate.from_template(template, role="human")

print(prompt)


ValidationError: 1 validation error for ChatMessagePromptTemplate
role
  Field required [type=missing, input_value={'prompt': PromptTemplate...nQuestion: {question}')}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
import os
from dotenv import load_dotenv

load_dotenv()

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY")
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

qa_chain = (
    {
        "context": lambda x: format_docs(retriever.invoke(x)),
        "question": lambda x: x
    }
    | prompt
    | llm
    | StrOutputParser()
)

qa_chain.invoke("")
