In [3]:
# %pip install markdown
# %pip install annoy
# %pip install openai

In [1]:
from openai import OpenAI # OpenAI API
import json
import requests # to download some resources
import os # file operations
import numpy as np # linear algebra
import pandas as pd # data processing
from markdown import markdown # to render markdown
from IPython.display import Markdown
import annoy # Approximate Nearest Neighbors Oh Yeah for fast searching
import pickle
from annoy import AnnoyIndex

In [2]:
# read in our api key
with open('../api-keys/our_api_key.txt', 'r') as file:
    api_key = file.read().replace('\n', '')

# read in finns api key (the one we'll use for testing)
with open('../api-keys/finns_api_key.txt', 'r') as file:
    finns_api_key = file.read().replace('\n', '')

# Setting the OpenAI
client = OpenAI(api_key=finns_api_key) # using finns for testing

In [None]:
# Sample of countries
sample_countries = [
                    "Albania", 
                    "Brunei"
                    "Buthan",
                    "Eswatini",
                    "Fiji",
                    "Kuwait",
                    "Lebanon",
                    "Liberia",
                    "Maldives",
                    "Micronesia",
                    "Nepal",
                    "Nigeria",
                    "Palestine",
                    "Saudi Arabia",
                    "Sri Lanka",
                    "Turkmenistan"
                    ]

# Folder with raw PDFs
folder_mds = "../data/3-naps-md"

# Importing MDs files
for file in os.listdir(folder_mds):
    for country in sample_countries:
        if country in file:  
            with open(os.path.join(folder_mds, file), "r", encoding="utf-8") as md_file:
                content = md_file.read()
            globals()[country.lower()] = content # Saving the MD file in lowercase

In [None]:
system_prompt = """
You are an assistant that extracts and summarizes national action plans for antimicrobial resistance (NAPs). Your task is to answer the following questions concisely while relying strictly on the document content. 

For each question, provide:
1. A 'yes' or 'no' answer.
2. The specific text from the document that supports the answer.

Here are the questions:

1. What is the period considered for the NAP? (Provide the exact years or date range)
2. Does the NAP adopt a One Health approach by addressing multiple priority sectors (such as human health, animal health, environment, and agriculture/food security) through strategies, policies, goals, or actions? (yes/no + supporting text)  
3. Does the NAP include any distinct strategy/policy/goal/action explicitly targeting the priority sector of 'Human Health'? (yes/no + supporting text)  
4. Does the NAP include any distinct strategy/policy/goal/action explicitly targeting the priority sector of 'Animal Health'? (yes/no + supporting text)  
5. Does the NAP include any distinct strategy/policy/goal/action explicitly targeting the priority sector of 'Environment'? (yes/no + supporting text)  
6. Does the NAP include any distinct strategy/policy/goal/action explicitly targeting the priority sector of 'Agriculture/Food Security'? (yes/no + supporting text)  
7. Does the NAP include any mechanisms for progress reporting to track how its objectives are being met, such as an annual or semi-annual progress report, a dashboard displaying the status, or similar documents/tools? (yes/no + supporting text)  
8. Does the NAP include any specific, time-bound targets to track progress? (yes/no + supporting text)  
9. Does the NAP outline quantitative targets for AMR prevalence and antimicrobial use? (yes/no + supporting text)  
10. Does the NAP specify any budget allocation assigned for the strategies/policies/goals/actions directed at tackling AMR? (yes/no + supporting text)  
11. Does the NAP contain an assessment of future budget requirements for different activities listed in the document? (yes/no + supporting text)  
12. Does the NAP establish a multisectoral committee or task force for AMR coordination? (yes/no + supporting text)  
13. Does the NAP include any strategy/policy/goal/action related to strengthening the legislative and regulatory framework for AMR, including laws and regulations for antimicrobial use? (yes/no + supporting text)  
14. Does the NAP include any strategy/policy/goal/action related to training and educational initiatives to increase awareness about AMR in school curricula or professional training programs for doctors and pharmacists? (yes/no + supporting text)  
15. Does the NAP include any strategy/policy/goal/action related to public awareness campaigns in combating AMR, such as media involvement and community engagement? (yes/no + supporting text)  
16. Does the NAP include any strategy/policy/goal/action related to international collaboration and support for AMR initiatives, such as partnerships with organizations like World Health Organization (WHO), Food and Agriculture Organization (FAO), and World Organisation for Animal Health (OIE)? (yes/no + supporting text)  
17. Does the NAP include any strategy/policy/goal/action related to data collection and analysis for AMR surveillance, such as developing surveillance systems, data collection tools, setting up laboratories for AMR monitoring, or other similar actions? (yes/no + supporting text)  
18. Does the NAP include any strategy/policy/goal/action related to developing the capacity to detect and report newly emerged resistance that may constitute a public health emergency of international concern, as required by the International Health Regulations (2005)? (yes/no + supporting text)  
19. Does the NAP include any strategy/policy/goal/action related to specific prevention strategies such as sanitation, hygiene measures, and infection prevention and control (IPC)? (yes/no + supporting text)  
20. Does the NAP include any strategy/policy/goal/action related to implementing international standards and guidelines for antimicrobial resistance (AMR) control, such as (but not limited to) the OIE Terrestrial and Aquatic Animal Health Codes or the FAO/WHO Codex Alimentarius Code of Practice to Minimize and Contain AMR? (yes/no + supporting text)  
21. Does the NAP include any strategy/policy/goal/action related to regulating the distribution, prescription, and dispensation of antibiotics, such as developing or maintaining essential medicine lists for antibiotics? (yes/no + supporting text)  
22. Does the NAP include any strategy/policy/goal/action related to requiring the rational use of antibiotics through regulations/laws? (yes/no + supporting text)  
23. Does the NAP include any strategy/policy/goal/action related to participating in international collaborative research to support the development of new medicines, diagnostic tools, and vaccines? (yes/no + supporting text)

Format the response as a JSON object:
{
  "period_start": "XXXX",
  "period_end":"YYYY",
  "questions": [
    {
      "question": "Does the NAP include any strategy/policy/goal/action related to the priority sector of 'Human Health'?",
      "answer": "yes",
      "supporting_text": "The NAP mentions specific actions in the human health sector, including..."
    },
    ...
  ]
}
"""

In [None]:
def run_prompt(country_name, country_test): 
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        temperature=0,  # Ensures deterministic output
        top_p=0.5,  # Controls diversity
        response_format={"type": "json_object"},
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": country_test}
        ]
    )

    # Formatting the JSON

    c = json.loads(response.choices[0].message.content)
    period_data = pd.DataFrame([
        {"question": "period_start", "answer": c["period_start"], "supporting_text": None},
        {"question": "period_end", "answer": c["period_end"], "supporting_text": None}
    ])

    df = pd.DataFrame(c["questions"])
    df = pd.concat([period_data, df], ignore_index=True)
    df['country']=country_name
    df = df.rename(columns={"question": "question_text", "answer": "answer_full"})
    df['question']=df['question_text']
    df.loc[2:, 'question'] = range(1, 1 + len(df) - 2)
    df = df.drop(columns=['question_text'])
    output_json = df.to_dict(orient="records")

    # Saving the JSON
    output_path = f"../data/4-summary-responses-json/version_yesno_updated/Testing_FullSample/summary_response_{country_name}.json"
    with open(output_path, "w") as f:
        json.dump(output_json, f, indent=4)

    print(f"Saved response to {output_path}")
