In [3]:
! pip install langchain
! pip install langchain_community
! pip install langchain_openai

^C


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting langchain
  Downloading langchain-0.3.1-py3-none-any.whl.metadata (7.1 kB)
Collecting PyYAML>=5.3 (from langchain)
  Downloading PyYAML-6.0.2-cp310-cp310-win_amd64.whl.metadata (2.1 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading SQLAlchemy-2.0.35-cp310-cp310-win_amd64.whl.metadata (9.9 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.10.8-cp310-cp310-win_amd64.whl.metadata (7.8 kB)
Collecting async-timeout<5.0.0,>=4.0.0 (from langchain)
  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)
Collecting langchain-core<0.4.0,>=0.3.6 (from langchain)
  Downloading langchain_core-0.3.8-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Downloading langchain_text_splitters-0.3.0-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.130-

In [11]:
from typing import List
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

class DocumentRelevance(BaseModel):
    """Evaluate the relevance of multiple documents to a question."""
    relevant_indices: List[int] = Field(..., description="Indices of documents that are relevant to the question")
    explanation: str = Field(..., description="Brief explanation of why these documents were selected as relevant")

# Define the prompt to grade documents
grade_documents_prompt = """
You are a grader tasked with assessing the relevance of retrieved documents to a set of user questions.
Instructions:
1. Carefully examine each user question.
2. Analyze the content of each provided document.
3. Evaluate if each document contains keywords or semantic meaning related to AT LEAST ONE of the user questions.
4. The evaluation should not be overly strict. The goal is to filter out erroneous retrievals.
5. For each document, give a binary score 'yes' or 'no' to indicate whether it is relevant to AT LEAST ONE of the questions.
6. Consider linguistic variations and possible synonyms between the questions and the documents.
7. Take into account the overall context of each document, not just word-for-word matches.
8. If a document provides partial or indirect information related to a question, consider it relevant. Evaluate if the partial information is substantial enough to be useful to the user.
9. Pay attention to temporal aspects - a document might be relevant even if it doesn't exactly match the time frame of the question. Consider if dated information could still be relevant or provide useful historical context.
10. Consider cultural and contextual nuances that might affect the relevance of the documents to the questions.
11. If multiple questions are asked, a document is considered relevant if it substantially answers at least one of the questions.
12. If a document directly contradicts the premise of a question but provides correct and relevant information, consider it relevant.
13. While assessing reliability is not the primary focus, note any information that is clearly false or misleading.
14. For long documents, focus on the sections most relevant to the question.
15. For complex questions requiring synthesis, evaluate if a document provides key elements to construct an answer, even if it doesn't directly answer the question.
Note: A document is considered relevant if it addresses at least one of the questions, even if it doesn't address all of them.

User questions: {question}
Documents:
{documents}
Relevant document indices:
"""

# Initialize the language model
OPENAI_API_KEY = "sx-xxx"
llm_grade_docs = ChatOpenAI(model="gpt-4o-mini", temperature=0, openai_api_key=OPENAI_API_KEY)
structured_llm_grader = llm_grade_docs.with_structured_output(DocumentRelevance)

# Create the grading prompt
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", grade_documents_prompt)
    ]
)

# Create the retrieval grader
retrieval_grader = grade_prompt | structured_llm_grader

# Example usage
question = "What are the primary factors contributing to climate change, and how do they interact with global economic policies? Additionally, what are some proposed solutions and their potential impacts on developing nations?"

documents = [
    "The Intergovernmental Panel on Climate Change (IPCC) identifies greenhouse gas emissions, primarily CO2 from fossil fuel combustion, as the main driver of climate change. These emissions are closely tied to economic activities, particularly in energy-intensive industries.",
    "Economic globalization has led to increased industrial production in developing nations, often with less stringent environmental regulations. This shift in manufacturing has contributed to higher global emissions while also driving economic growth in these countries.",
    "Deforestation, particularly in tropical regions, contributes significantly to climate change by reducing the Earth's capacity to absorb CO2. Many developing nations face pressure to clear forests for agriculture or resource extraction to support their economies.",
    "The Paris Agreement aims to limit global temperature increase to well below 2°C above pre-industrial levels. It requires nations to set their own emissions reduction targets, considering their economic circumstances.",
    "Carbon pricing mechanisms, such as cap-and-trade systems or carbon taxes, are proposed as market-based solutions to reduce emissions. However, implementing these in developing nations can be challenging due to concerns about economic competitiveness.",
    "Renewable energy technologies, including solar and wind power, are becoming increasingly cost-competitive with fossil fuels. This trend presents opportunities for developing nations to leapfrog traditional fossil fuel infrastructure.",
    "The history of ancient Rome is fascinating, spanning over a thousand years. From its founding as a small village to becoming a vast empire, Rome's influence on Western civilization cannot be overstated.", # not relevant
    "Climate change disproportionately affects developing nations, exacerbating existing socio-economic challenges such as food security, water scarcity, and vulnerability to extreme weather events.",
    "The concept of 'just transition' emphasizes the need to ensure that the shift to a low-carbon economy doesn't unfairly burden vulnerable communities or nations. This includes providing support for workers and communities dependent on fossil fuel industries.",
    "Artificial Intelligence is rapidly evolving, with applications ranging from natural language processing to autonomous vehicles. As AI becomes more sophisticated, it raises important ethical and societal questions.",
    "International climate finance mechanisms aim to support developing nations in adopting clean technologies and adapting to climate change impacts. However, the scale of funding required remains a significant challenge.",
    "The global coffee industry faces numerous challenges, including price volatility, changing consumer preferences, and the impact of climate change on coffee-growing regions. Sustainability initiatives are becoming increasingly important in this sector." # not relevant
]

result = retrieval_grader.invoke({"question": question, "documents": documents})
print(f"Relevant documents: {result.relevant_indices}")
print(f"Explanation: {result.explanation}")

Relevant documents: [0, 1, 2, 3, 4, 5, 6, 8, 9, 10]
Explanation: These documents address various aspects of climate change, including its primary factors (greenhouse gas emissions, deforestation), the interaction with economic policies (economic globalization, carbon pricing), and proposed solutions (renewable energy technologies, climate finance mechanisms) that impact developing nations. They provide substantial information relevant to the user's questions.
