In [1]:
import glob
import json
import os

import boto3

from langchain.chains import LLMChain, StuffDocumentsChain
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.document_loaders import (
    PDFPlumberLoader,
    PyPDFLoader,
)
from langchain.llms.bedrock import Bedrock, LLMInputOutputAdapter
from langchain.output_parsers import (
    PydanticOutputParser,
)
from langchain.prompts import PromptTemplate

from langchain.schema.document import Document

from utils import ImpactAreas, ImpactAreaTags

VERBOSE = True

AZURE_OPENAI_API_BASE = "https://so-azure-openai-dev.openai.azure.com/"
AZURE_OPENAI_API_VERSION = "2023-07-01-preview"
AZURE_DEPLOYMENT_NAME = "gpt35-baseline"
AZURE_OPENAI_API_KEY = ""
AZURE_OPENAI_API_TYPE = "azure"

OPENAI_API_KEY = ""

session = boto3.Session(region_name='us-east-1')
boto3_bedrock = boto3.client(service_name='bedrock', region_name='us-east-1')
boto3_bedrock_stream = boto3.client(service_name='bedrock-runtime', region_name='us-east-1')

### Create Custom Bedrock class

In [2]:
from typing import Any, Iterator
from langchain.schema.output import GenerationChunk

class NewBedrock(Bedrock):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def _prepare_input_and_invoke_stream(self, prompt, stop=None, run_manager=None, **kwargs) -> Iterator[GenerationChunk]:
        _model_kwargs = {"prompt": Any, "max_tokens_to_sample": 8192, "temperature": 0.0}  #  "max_tokens_to_sample": Any
        provider = "anthropic"
        # _model_kwargs[self.provider_stop_sequence_key_name_map.get(provider)] = stop
        model_kwargs = {**_model_kwargs, **kwargs}
        input_body = LLMInputOutputAdapter.prepare_input(provider, prompt, model_kwargs)
        body = json.dumps(input_body)
        try:
            response = boto3_bedrock_stream.invoke_model(
                body=body,
                modelId="anthropic.claude-v2",
                accept="application/json",
                contentType="application/json",
            )
        except Exception as e:
            raise ValueError(f"Error raised by bedrock service: {e}")

        for chunk in LLMInputOutputAdapter.prepare_output_stream(
            provider, response, stop
        ):
            yield chunk

    def _prepare_input_and_invoke(self, prompt, stop=None, run_manager=None, **kwargs):
        _model_kwargs = {"prompt": Any, "max_tokens_to_sample": 8192, "temperature": 0.0}  # "max_tokens_to_sample": Any
       # _model_kwargs = {"prompt": Any, "max_tokens_to_sample": 8192}  # "max_tokens_to_sample": Any
        provider = "anthropic"
        # _model_kwargs[self.provider_stop_sequence_key_name_map.get(provider)] = stop
        model_kwargs = {**_model_kwargs, **kwargs}
        input_body = LLMInputOutputAdapter.prepare_input(provider, prompt, model_kwargs)
        body = json.dumps(input_body)
        print(body)
        try:
            response = boto3_bedrock_stream.invoke_model(
                body=body,
                modelId="anthropic.claude-v2",
                accept="application/json",
                contentType="application/json",
            )
            text = LLMInputOutputAdapter.prepare_output(provider, response)

        except Exception as e:
            raise ValueError(f"Error raised by bedrock service: {e}")

        return text

### Load Evidence

In [11]:
root_path = "/home/ubuntu/data/2023"

_folder_paths = [f.path for f in os.scandir(root_path) if f.is_dir()]
result_codes = [os.path.basename(f) for f in _folder_paths]
ID_CODE_EVIDENCE_MAP = {r: [] for r in result_codes}

for p in _folder_paths:
    base = os.path.basename(p)
    files = [f for f in (glob.glob(os.path.join(p, '**', '*.xlsx'), recursive=True) + \
                glob.glob(os.path.join(p, '**', '*.pdf'), recursive=True) + \
                glob.glob(os.path.join(p, '**', '*.pptx'), recursive=True)) \
                if not f.endswith('result.pdf')]
    ID_CODE_EVIDENCE_MAP[base].extend(files)

print(ID_CODE_EVIDENCE_MAP)

{'4747': ['/home/ubuntu/data/2023/4747/s42452-023-05499-1.pdf'], '5009': ['/home/ubuntu/data/2023/5009/0f68fd2242e2a7622578f1f480f45963.pdf'], '5316': ['/home/ubuntu/data/2023/5316/U23ArtKareemAntioxidantInthomDev.pdf'], '4959': ['/home/ubuntu/data/2023/4959/Soremi_2023_allEarth.pdf'], '4698': ['/home/ubuntu/data/2023/4698/journal%20article.pdf'], '5004': ['/home/ubuntu/data/2023/5004/journal%20article.pdf'], '4770': ['/home/ubuntu/data/2023/4770/s11250-023-03561-z.pdf'], '5326': ['/home/ubuntu/data/2023/5326/U23ArtEylandUnravellingInthomNodev.pdf'], '5294': ['/home/ubuntu/data/2023/5294/Genotypic%20variation%20in%20rice.pdf'], '4949': ['/home/ubuntu/data/2023/4949/H051684.pdf'], '5118': ['/home/ubuntu/data/2023/5118/journal%20article.pdf'], '5301': ['/home/ubuntu/data/2023/5301/article.pdf'], '5279': ['/home/ubuntu/data/2023/5279/U23ArtTsindiAnalysisInthomDev.pdf'], '5380': ['/home/ubuntu/data/2023/5380/U23ArtAbebeDiallelInthomNodev.pdf'], '5128': ['/home/ubuntu/data/2023/5128/journal

### Load Results

In [3]:
root_path = "/home/ubuntu/data/2023"

_folder_paths = [f.path for f in os.scandir(root_path) if f.is_dir()]
result_codes = [os.path.basename(f) for f in _folder_paths]
ID_CODE_RESULT_MAP = {r: None for r in result_codes}

# print(ID_CODE_RESULT_MAP)

for code in result_codes:
    path = f"/home/ubuntu/data/2023/{code}/result.pdf"
    if os.path.exists(path):
        ID_CODE_RESULT_MAP[code] = path

print(ID_CODE_RESULT_MAP)

{'4747': '/home/ubuntu/data/2023/4747/result.pdf', '5009': '/home/ubuntu/data/2023/5009/result.pdf', '5316': '/home/ubuntu/data/2023/5316/result.pdf', '4959': '/home/ubuntu/data/2023/4959/result.pdf', '4698': '/home/ubuntu/data/2023/4698/result.pdf', '5004': '/home/ubuntu/data/2023/5004/result.pdf', '4770': '/home/ubuntu/data/2023/4770/result.pdf', '5326': '/home/ubuntu/data/2023/5326/result.pdf', '5294': '/home/ubuntu/data/2023/5294/result.pdf', '4949': '/home/ubuntu/data/2023/4949/result.pdf', '5118': '/home/ubuntu/data/2023/5118/result.pdf', '5301': '/home/ubuntu/data/2023/5301/result.pdf', '5279': '/home/ubuntu/data/2023/5279/result.pdf', '5380': '/home/ubuntu/data/2023/5380/result.pdf', '5128': '/home/ubuntu/data/2023/5128/result.pdf', '5276': '/home/ubuntu/data/2023/5276/result.pdf', '5028': '/home/ubuntu/data/2023/5028/result.pdf', '5245': '/home/ubuntu/data/2023/5245/result.pdf', '5274': '/home/ubuntu/data/2023/5274/result.pdf', '5016': '/home/ubuntu/data/2023/5016/result.pdf',

### Load Azure OpenAI

In [4]:
llm_azure = AzureChatOpenAI(
    openai_api_base=AZURE_OPENAI_API_BASE,
    openai_api_version=AZURE_OPENAI_API_VERSION,
    deployment_name=AZURE_DEPLOYMENT_NAME,
    openai_api_key=AZURE_OPENAI_API_KEY,
    openai_api_type=AZURE_OPENAI_API_TYPE,
)

### Load OpenAI

In [5]:
llm_openai = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-4", temperature=0)
llm_openai_35 = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo-16k", temperature=0
)

### Load Anthropic Claude 2

In [6]:
llm_bedrock = NewBedrock(
    model_id="anthropic.claude-v2",
)

In [7]:
# result = llm_bedrock("What does a researcher at CGIAR do?")
# result = llm_bedrock("What regions are part of the United Nations geoscheme? List all subregions for all continents.")
# result = llm_bedrock("What are the WHO subnational regions for Ethiopia? List all subregions for the country.")
# result = llm_bedrock("What are the UN 2030 Sustainable Development Goals")
result = llm_bedrock("What is the Kunming-Montreal Global Biodiversity Framework?")

{"prompt": "\n\nHuman: What is the Kunming-Montreal Global Biodiversity Framework?\n\nAssistant:", "max_tokens_to_sample": 8192, "temperature": 0.0}


In [8]:
print(result)

 The Kunming-Montreal Global Biodiversity Framework is an international agreement aimed at halting and reversing the loss of biodiversity worldwide by 2030. Some key details about it:

- It was originally slated to be finalized and adopted at the 15th Conference of Parties (COP15) to the UN Convention on Biological Diversity in Kunming, China in 2020, but this was postponed due to Covid-19. The framework is now expected to be adopted at COP15 in Montreal, Canada in December 2022.

- The framework sets 21 targets for 2030 related to things like protecting ecosystems, reducing extinctions, managing invasive species, reducing pollution, ensuring sustainable use of natural resources, and equitably sharing benefits from genetic resources.

- It is meant to build on and replace the previous Aichi Biodiversity Targets that were set for 2020 under the UN's Strategic Plan for Biodiversity 2011-2020. Most Aichi Targets were not met.

- The framework aims to take a "whole of government" and "whol

### Load Documents

#### Load PRMS Result, Pdf & Excel Evidence

In [None]:
code = "4785"
evidence_list = ID_CODE_EVIDENCE_MAP.get(code)
result = PyPDFLoader(ID_CODE_RESULT_MAP.get(code)).load_and_split()

print(evidence_list)

evidence_docs = []
for path in evidence_list:
    if os.path.splitext(os.path.basename(path))[1] == ".pdf":
        _evidence_docs = PyPDFLoader(path).load_and_split()
    elif os.path.splitext(os.path.basename(path))[1] == ".pptx":
        _evidence_docs = PDFPlumberLoader(path).load_and_split()
    else:
        fname = os.path.basename(path)
        print(f"Error: Unable to load {fname}")
        continue
    for _doc in _evidence_docs:
        evidence_docs.append(_doc)

print("Result: \n", result)
print("Evidence: \n", evidence_docs)

In [13]:
print(len(evidence_docs))

31


### Extract PRMS Results

In [14]:
output_parser = PydanticOutputParser(pydantic_object=ImpactAreas)
format_instructions = output_parser.get_format_instructions()

In [15]:
document_prompt = PromptTemplate(
            input_variables=["page_content"],
            template="{page_content}"
        )

In [16]:
extraction_template = (
    "You are a researcher at CGIAR. Your task is to review reports submitted by other researchers and evaluate "
    "how innovative they are across a number of dimensions. A researcher has submitted the following research results "
    "containing key points about the research they conducted. Extract the necessary information from the document provided "
    "and returning this information as a JSON instance.\n\n"

    "{format_instructions}\n\n"

    "The text you are extracting information from can be found below.\n\n"

    "{text}"
)
extraction_prompt = PromptTemplate.from_template(template=extraction_template, partial_variables={"format_instructions": format_instructions})

In [17]:
llm_chain = LLMChain(llm=llm_openai_35, prompt=extraction_prompt, verbose=VERBOSE)

In [18]:
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_prompt=document_prompt, document_variable_name="text", verbose=VERBOSE)

In [None]:
output = stuff_chain.run({"input_documents": result})

In [20]:
print(output)

{
  "project_title": "ClimBeR: Building Systemic Resilience Against Climate Variability and Extremes",
  "description": {
    "description": "The increasing interconnected nature of our world means that failing to achieve coherence between climate- and peace and security-related policy domains forms a significant climate-related security risk. Poorly designed climate policies that are insensitive to pre-existing insecurities and conflict dynamics may undermine political stability, amplify social inequalities and grievances, and accelerate a loss of biodiversity and climate change-related impacts. Peace and security policies that do not account for climate risks may conversely promote ineffective and unresponsive interventions and risk locking communities into vicious cycles of insecurity and climate vulnerability. This work contributes to the climate security proofing of policy outputs by developing a methodological framework that assesses the degree to which policy outputs and strateg

In [21]:
result = json.loads(output)

### Summarize Evidence

In [23]:
project_title = result["project_title"]
description = result["description"]["description"]
geographic_focus = result["geographic_location"]["geographic_focus"]
region = result["geographic_location"]["region"]
country = result["geographic_location"]["country"]
ia_tag_gender = result["impact_areas"]["gender_tag"]
ia_tag_climate = result["impact_areas"]["climate_change_tag"]
ia_tag_nutrition = result["impact_areas"]["nutrition_tag"]
ia_tag_environment = result["impact_areas"]["environment_tag"]
ia_tag_poverty = result["impact_areas"]["poverty_tag"]

In [24]:
summary_template = (
    "Human:<admin>You are a researcher at CGIAR. Your task is to review projects submitted by other researchers and evaluate "
    "them across a number of dimensions.<admin>\n\nHuman: "
    "This task consist of the following steps:\n"
    "Step 1: Review the project title and its description. Stop and think about it. This will provide you a general understanding of the evidence that will be presented in the next step.\n"
     "Step 2: Review the evidence provided.\n"
    "Step 3: Review the texts inside the <geographic_focus></geographic_focus> and <topics_of_interest></topics_of_interest> XML tags. These texts will be useful in carrying out the next step.\n"
    "Step 4: Generate a list of quotes from the evidence that highlight the important activities that took place as well as the findings made and any results. "
    "Also include quotes that characterize the geographic focus of the aforementioned activities and any topics of interest that are explicitly referenced in the evidence. "
    "Write them down word for word inside <thinking></thinking> XML tags. This is a space for you to write down relevant content and will not be shown to the user.\n"
    "Step 5: Using the quotes inside <thinking></thinking>, write a 750 word summary in a professional, academic 3rd person voice. "
    "Make sure to note all major activities that took place within the evidence. "
    "In addition to all major activites, the summary should identify the geographic focus of these activities and make sure to explicitly state "
    "any of the topics of interest if and only if they are referenced in the evidence. Both geographic focus and topics of interest are clearly defined below."
    "Make sure to justify why all particular topics of interest are identified. Finally, close the summary by restating all key findings.\n"
    "Step 6: Review the summary written in step 5. If necessary re-write it to emphasize conciseness without losing any details. Return the summary without any introduction between the XML tag <summary>.\n"
    "IMPORTANT: Keep in mind that the audience consist of academic researchers. Never refer to yourself in the first person in this summary. "
    "Refer to all actions in the past tense. Be sure to read the entire set of instructions carefully before beginning. "
    "Do not go to the next step without making sure the previous step has been completed.\n"

    "<project_title>\n"
    "{project_title}\n"
    "</project_title>\n"

    "<description>\n"
    "{description}\n"
    "</description>\n"

    "<geographic_focus>\n"
    "'Geographic focus' consist of determining if the major activities are 'Global', 'Regional', 'National', or 'Sub-national'. "
    "'Regional' refers to whether the activities focus on a United Nations geoscheme subregion. "
    "'National' refers to whether the activities focus on a country. "
    "'Sub-national' refers to whether the activites focus on a WHO subnational region without a country. "
    "The geographic focus is the largest category that is explicitly mentioned in the context of an activity carried out in the evidence.\n"
    "</geographic_focus>\n"

    "<topics_of_interest>\n"
    "'Topics of interest' are topics that should be highlighted in the summary if they are discussed because the impact of activites on these topics "
    "are especially important. These topics are nutrition, health, food security, poverty reduction, livelihood, jobs, gender equality, youth inlcusion, social inclusion, "
    "climate adaptation, climate mitigation, environmental health, and biodiversity.\n"
    "</topics_of_interest>\n"

    "<evidence>\n"
    "{text}\n"
    "</evidence>"
    "\n\nAssistant:\n<summary>\n"
)
summary_prompt = PromptTemplate.from_template(template=summary_template, partial_variables={"project_title": project_title, "description": description})

In [25]:
print(summary_template)

Human:<admin>You are a researcher at CGIAR. Your task is to review projects submitted by other researchers and evaluate them across a number of dimensions.<admin>

Human: This task consist of the following steps:
Step 1: Review the project title and its description. Stop and think about it. This will provide you a general understanding of the evidence that will be presented in the next step.
Step 2: Review the evidence provided.
Step 3: Review the texts inside the <geographic_focus></geographic_focus> and <topics_of_interest></topics_of_interest> XML tags. These texts will be useful in carrying out the next step.
Step 4: Generate a list of quotes from the evidence that highlight the important activities that took place as well as the findings made and any results. Also include quotes that characterize the geographic focus of the aforementioned activities and any topics of interest that are explicitly referenced in the evidence. Write them down word for word inside <thinking></thinking>

In [40]:
llm_chain = LLMChain(llm=llm_bedrock, prompt=summary_prompt, verbose=VERBOSE)

In [41]:
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_prompt=document_prompt, document_variable_name="text", verbose=VERBOSE)

In [None]:
evidence_summary = stuff_chain.run({"input_documents": evidence_docs})
evidence_summary = "<summary>\n"+evidence_summary

In [45]:
print(evidence_summary)

<summary>
The evidence presented describes a project that developed a methodological framework to assess the degree to which policy outputs and strategy documents in eight African countries display awareness of climate-related security risks and climate peace opportunities. The key activities carried out include:

- Developing a methodological framework to assess the climate security sensitivity of policies and strategies from climate/environment and peace/security sectors. The framework evaluates awareness, strategic coherence, and implementation coherence.

- Applying the framework to analyze 85 national and regional policies/strategies from 8 African countries - Kenya, Mali, Nigeria, Senegal, Somalia, South Sudan, Uganda, and Zimbabwe. 

- Finding that despite some recognition of climate-conflict linkages, a clear conceptual understanding of climate security is lacking. Climate/environment policies demonstrate greater awareness and cross-sectoral engagement than peace/security polic

### Determine Geographic Location/Impact Area Tags

In [46]:
output_parser = PydanticOutputParser(pydantic_object=ImpactAreaTags)
format_instructions = output_parser.get_format_instructions()

In [47]:
print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"geographic_location": {"$ref": "#/definitions/GeographicLocationOutput"}, "impact_areas": {"$ref": "#/definitions/ImpactAreasOutput"}, "impact_justifications": {"$ref": "#/definitions/ImpactJustifications"}}, "required": ["geographic_location", "impact_areas", "impact_justifications"], "definitions": {"GeographicLocationOutput": {"title": "GeographicLocationOutput", "type": "object", "properties": {"geographic_focus": {"title": "Geographic Focus", "description": "The geographic focus of the project.", "type": "string"}, "regio

In [48]:
GEO_LOC_LABELS = """
The labels for Geographic focus are: 'Global', 'Regional', 'National', 'Sub-national'.
The labels for Region are all the United Nations geoscheme subregions".
The labels for Country are any of the countries in the world.
"""

IA_OBJECTIVES = """
Below is a lookup table of CGIAR strategic objectives and their definitions.

| Impact Area | Impact Area Abbreviation | Objective |
| Gender equality, youth and social inclusion | Gender | To close the gender gap in rights to economic resources, access to ownership and control over land and natural resources for over 500 million women who work in food, land and water systems. |
| Gender equality, youth and social inclusion | Gender | To offer rewardable opportunities to 267 million young people who are not in employment, education or training. |
| Climate adaptation and mitigation | Climate | Turn agriculture and forest systems into a net sink for carbon by 2050 (Climate Mitigation target). |
| Climate adaptation and mitigation | Climate | Equip 500 million small-scale producers to be more resilient by 2030 to climate shocks, with climate adaptation solutions available through national innovation systems (Climate Adaptation target). |
| Climate adaptation and mitigation | Climate | Support countries in implementing National Adaptation Plans and Nationally Determined Contributions, and increased ambition in climate actions by 2030 (Climate Policy target). This support could possibly be in the form of education or training. |
| Nutrition, health and food security | Nutrition | To end hunger for all and enable affordable, healthy diets for the 3 billion people who do not currently have access to safe and nutritious food. |
| Nutrition, health and food security | Nutrition | To reduce cases of foodborne illness (600 million annually) and zoonotic disease (1 billion annually) by one third. |
| Environmental health and biodiversity | Environment | Stay within planetary and regional environmental boundaries: 1. consumptive water use in food production of less than 2,500 km3 per year (with a focus on the most stressed basins) 2. zero net deforestation 3. nitrogen application of 90 Tg per year (with a redistribution towards low-input farming systems) and increased use efficiency; and phosphorus application of 10 Tg per year. |
| Environmental health and biodiversity | Environment | Maintain the genetic diversity of seed varieties, cultivated plants and farmed and domesticated animals and their related wild species, including through soundly managed genebanks at the national, regional, and international levels. |
| Environmental health and biodiversity | Environment | In addition, water conservation and management, restoration of degraded lands/soils, restoration of biodiversity in situ, and management of pollution related to food systems are key areas of environmental impacts to which the CGIAR should contribute. |
| Environmental health and biodiversity | Environment | All of the 2030 targets that are organized as part of the 4 long-term goals for 2050 included in the Kunming-Montreal Global Biodiversity Framework. |
| Environmental health and biodiversity | Environment | All of the targets and indicators included in UN Sustainable Development Goals 2, 6, 12, 14, 15 and 17 |
| Poverty reduction, livelihoods and jobs | Poverty | Lift at least 500 million people living in rural areas above the extreme poverty line of US $1.90 per day (2011 PPP). |
| Poverty reduction, livelihoods and jobs | Poverty | Reduce by at least half the proportion of men, women and children of all ages living in poverty in all its dimensions, according to national definitions. |
"""

IA_LABELS = """
The Impact Area labels are defined as follows:

| Tag | Objective |
| Principal | The research activity principally addressses one of the objectives for the impact area. The impact area is the main objective of the research activity and fundamental to its design and expected results. The research activity would not have been undertaken without consideration of this impact area. |
| Significant | The research activity contributes in significant ways to the impact area, even though it is not the principal focus of the activity. The impact area is an important and deliberate objective of the research activity but not the main reason for its undertaking. |
| Not Targeted | The research activity has not been found to target any aspect of the impact area. |
"""

# source - QA Assessor Guidance 2023_v5_pub.pdf; Guidelines for gender equality tagging.pdf - https://drive.google.com/file/d/13PSfIH-nN32hCsPTznACmwhU6N5ggoje/view

In [49]:
geo_loc_ia_tags_template = (
     "You are a researcher at CGIAR. Your task is to review projects submitted by other researchers and determine "
     "the appropriate label for these projects for 8 dimensions."
     "Three of these dimensions are 'Geographic focus', 'Region' and 'Country'. They are collectively referred to as 'Geographic Location'.\n"
     "The remaining 5 dimensions are 'Gender equality, youth and social inclusion', 'Climate adaptation and mitigation', "
     "'Nutrition, health and food security', 'Environmental health and biodiversity' and 'Poverty reduction, livelihoods and jobs'. "
     "These dimensions are collectively referred to as 'Impact Areas'.\n"
     "Another researcher has already prepared a summary of the project for you to use as a reference for selecting the appropriate label for each dimension.\n\n"

     "The steps for the first task are as follows:\n"
     "This task consist of two sub-tasks. The first is to determine the Geographical Location labels. The second sub-task is to determine the Impact Area labels.\n"
     "Step 1: Review the sections labeled Project Title, Description, and Geographic Location Labels. Geographic Location Labels provides a list of the labels for each label.\n"
     "Step 2: Review the summary provided and return the labels that corresponds with it for 'Geographic focus', 'Region' and 'Country'.\n"
     "The steps for the second task are as follows:\n"
     "Step 1: Review the section labeled Impact Area Objectives. It contains a table of the impact areas and the objectives that comprise each of them.\n"
     "Step 2: Review the section labeled Impact Area Labels. It provides definitions of the tags for tagging the Impact Areas.\n"

     "IMPORTANT: Keep in mind that the audience consist of academic researchers. Never refer to yourself in the first person in this summary. "
     "Refer to all actions in the past tense. Be sure to read the entire set of instructions carefully before beginning. "
     "Do not go to the next step without making sure the previous step has been completed.\n\n"

     "Geographic Location Labels:\n {geo_loc_labels}\n\n"
     "Impact Area Objectives:\n {ia_objectives}\n\n"
     "Impact Area Labels:\n {ia_labels}\n\n"
     "Summary:\n{text}\n\n"

     "{format_instructions}"
     "Only return the resulting JSON object. DO NOT return any other text."
)
geo_loc_ia_tags_prompt = PromptTemplate.from_template(template=geo_loc_ia_tags_template, partial_variables={"geo_loc_labels": GEO_LOC_LABELS, "ia_objectives": IA_OBJECTIVES, "ia_labels": IA_LABELS, "format_instructions": format_instructions})

In [50]:
llm_chain = LLMChain(llm=llm_openai, prompt=geo_loc_ia_tags_prompt, verbose=VERBOSE)

In [51]:
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_prompt=document_prompt, document_variable_name="text", verbose=VERBOSE)

In [52]:
summary = Document(page_content=evidence_summary, metadata={"source": "Claude v2"})

In [53]:
print(summary.page_content)

<summary>
The evidence presented describes a project that developed a methodological framework to assess the degree to which policy outputs and strategy documents in eight African countries display awareness of climate-related security risks and climate peace opportunities. The key activities carried out include:

- Developing a methodological framework to assess the climate security sensitivity of policies and strategies from climate/environment and peace/security sectors. The framework evaluates awareness, strategic coherence, and implementation coherence.

- Applying the framework to analyze 85 national and regional policies/strategies from 8 African countries - Kenya, Mali, Nigeria, Senegal, Somalia, South Sudan, Uganda, and Zimbabwe. 

- Finding that despite some recognition of climate-conflict linkages, a clear conceptual understanding of climate security is lacking. Climate/environment policies demonstrate greater awareness and cross-sectoral engagement than peace/security polic

In [54]:
geo_loc_ia_tags = stuff_chain.run({"input_documents": [summary]})



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a researcher at CGIAR. Your task is to review projects submitted by other researchers and determine the appropriate label for these projects for 8 dimensions.Three of these dimensions are 'Geographic focus', 'Region' and 'Country'. They are collectively referred to as 'Geographic Location'.
The remaining 5 dimensions are 'Gender equality, youth and social inclusion', 'Climate adaptation and mitigation', 'Nutrition, health and food security', 'Environmental health and biodiversity' and 'Poverty reduction, livelihoods and jobs'. These dimensions are collectively referred to as 'Impact Areas'.
Another researcher has already prepared a summary of the project for you to use as a reference for selecting the appropriate label for each dimension.

The steps for the first task are as follows:
This task consist of two sub-tasks. The first is to determine 

In [55]:
print(geo_loc_ia_tags)

{
  "geographic_location": {
    "geographic_focus": "Regional",
    "region": "Sub-Saharan Africa",
    "country": "Kenya, Mali, Nigeria, Senegal, Somalia, South Sudan, Uganda, Zimbabwe"
  },
  "impact_areas": {
    "gender_tag": "Not Targeted",
    "climate_change_tag": "Principal",
    "nutrition_tag": "Not Targeted",
    "environment_tag": "Principal",
    "poverty_tag": "Not Targeted"
  },
  "impact_justifications": {
    "gender_tag_just": "The project did not target any aspect of gender equality, youth and social inclusion.",
    "climate_change_tag_just": "The project principally addressed climate adaptation and mitigation by developing a framework to assess climate security sensitivity of policies and strategies.",
    "nutrition_tag_just": "The project did not target any aspect of nutrition, health and food security.",
    "environment_tag_just": "The project principally addressed environmental health and biodiversity by integrating climate security considerations into existi

In [56]:
result = json.loads(geo_loc_ia_tags)
print(result)

{'geographic_location': {'geographic_focus': 'Regional', 'region': 'Sub-Saharan Africa', 'country': 'Kenya, Mali, Nigeria, Senegal, Somalia, South Sudan, Uganda, Zimbabwe'}, 'impact_areas': {'gender_tag': 'Not Targeted', 'climate_change_tag': 'Principal', 'nutrition_tag': 'Not Targeted', 'environment_tag': 'Principal', 'poverty_tag': 'Not Targeted'}, 'impact_justifications': {'gender_tag_just': 'The project did not target any aspect of gender equality, youth and social inclusion.', 'climate_change_tag_just': 'The project principally addressed climate adaptation and mitigation by developing a framework to assess climate security sensitivity of policies and strategies.', 'nutrition_tag_just': 'The project did not target any aspect of nutrition, health and food security.', 'environment_tag_just': 'The project principally addressed environmental health and biodiversity by integrating climate security considerations into existing objectives, priorities, instruments, and activities.', 'pover