In [2]:
geo_hierarchy = {
    "Africa": {
        "Northern Africa": [
            "Algeria", "Egypt", "Libya", "Morocco", "Sudan", "Tunisia", 
            "Western Sahara (disputed)"
        ],
        "Sub-Saharan Africa": {
            "Eastern Africa": [
                "British Indian Ocean Territory", "Burundi", "Comoros", "Djibouti", 
                "Eritrea", "Ethiopia", "French Southern Territories", "Kenya", 
                "Madagascar", "Malawi", "Mauritius", "Mayotte", "Mozambique", 
                "Réunion", "Rwanda", "Seychelles", "Somalia", "South Sudan", 
                "Uganda", "United Republic of Tanzania", "Zambia", "Zimbabwe"
            ],
            "Middle Africa": [
                "Angola", "Cameroon", "Central African Republic", "Chad", 
                "Congo [Republic of the Congo]", "Democratic Republic of the Congo [DR Congo]", 
                "Equatorial Guinea", "Gabon", "Sao Tome and Principe"
            ],
            "Southern Africa": [
                "Botswana", "Eswatini [Swaziland]", "Lesotho", "Namibia", "South Africa"
            ],
            "Western Africa": [
                "Benin", "Burkina Faso", "Cabo Verde [Cape Verde]", "Côte d'Ivoire [Ivory Coast]", 
                "Gambia", "Ghana", "Guinea", "Guinea-Bissau", "Liberia", "Mali", 
                "Mauritania", "Niger", "Nigeria", "Saint Helena [Saint Helena, Ascension and Tristan da Cunha]", 
                "Senegal", "Sierra Leone", "Togo"
            ]
        }
    },
    "Americas": {
        "Latin America and the Caribbean": {
            "Caribbean": [
                "Anguilla", "Antigua and Barbuda", "Aruba", "Bahamas", "Barbados", 
                "Bonaire, Sint Eustatius and Saba [Caribbean Netherlands]", 
                "British Virgin Islands", "Cayman Islands", "Cuba", "Curaçao", 
                "Dominica", "Dominican Republic", "Grenada", "Guadeloupe", "Haiti", 
                "Jamaica", "Martinique", "Montserrat", "Puerto Rico", "Saint Barthélemy", 
                "Saint Kitts and Nevis", "Saint Lucia", "Saint Martin (French part)", 
                "Saint Vincent and the Grenadines", "Sint Maarten (Dutch part)", 
                "Trinidad and Tobago", "Turks and Caicos Islands", "United States Virgin Islands"
            ],
            "Central America": [
                "Belize", "Costa Rica", "El Salvador", "Guatemala", "Honduras", 
                "Mexico", "Nicaragua", "Panama"
            ],
            "South America": [
                "Argentina", "Bolivia (Plurinational State of)", "Bouvet Island", "Brazil", 
                "Chile", "Colombia", "Ecuador", "Falkland Islands (Malvinas)", "French Guiana", 
                "Guyana", "Paraguay", "Peru", "South Georgia and the South Sandwich Islands", 
                "Suriname", "Uruguay", "Venezuela (Bolivarian Republic of)"
            ]
        },
        "North America": {
            "Northern America": [
                "Bermuda", "Canada", "Greenland", "Saint Pierre and Miquelon", 
                "United States of America"
            ]
        }
    },
    "Antarctica": {
        "": [
            "Antarctic Treaty Antarctica"
        ]
    },
    "Asia": {
        "Central Asia": [
            "Kazakhstan", "Kyrgyzstan", "Tajikistan", "Turkmenistan", "Uzbekistan"
        ],
        "Eastern Asia": [
            "China", "China, Hong Kong Special Administrative Region", 
            "China, Macao Special Administrative Region", "Democratic People's Republic of Korea [North Korea]", 
            "Japan", "Mongolia", "Republic of Korea [South Korea]", "Taiwan Province of China [Taiwan]"
        ],
        "South-eastern Asia": [
            "Brunei Darussalam", "Cambodia", "Indonesia", "Lao People's Democratic Republic", 
            "Malaysia", "Myanmar [Burma]", "Philippines", "Singapore", "Thailand", 
            "Timor-Leste [East Timor]", "Viet Nam"
        ],
        "Southern Asia": [
            "Islamic Republic of Afghanistan", "Bangladesh", "Bhutan", "India", 
            "Iran (Islamic Republic of)", "Maldives", "Nepal", "Pakistan", "Sri Lanka"
        ],
        "Western Asia": [
            "Armenia", "Azerbaijan", "Bahrain", "Cyprus", "Georgia", "Iraq", "Israel", 
            "Jordan", "Kuwait", "Lebanon", "Oman", "Qatar", "Saudi Arabia", 
            "State of Palestine", "Syrian Arab Republic", "Türkiye", "United Arab Emirates", "Yemen"
        ]
    },
    "Europe": {
        "Eastern Europe": [
            "Belarus", "Bulgaria", "Czechia [Czech Republic]", "Hungary", "Poland", 
            "Republic of Moldova", "Romania", "Russian Federation", "Slovakia", "Ukraine"
        ],
        "Northern Europe": [
            "Åland Islands", "Denmark", "Estonia", "Faroe Islands", "Finland", 
            "Guernsey", "Iceland", "Ireland", "Isle of Man", "Jersey", "Latvia", 
            "Lithuania", "Norway", "Svalbard and Jan Mayen Islands", "Sweden", 
            "United Kingdom of Great Britain and Northern Ireland"
        ],
        "Southern Europe": [
            "Albania", "Andorra", "Bosnia and Herzegovina", "Croatia", "Gibraltar", 
            "Greece", "Holy See [Vatican City]", "Italy", "Kosovo", "Malta", 
            "Montenegro", "North Macedonia", "Portugal", "San Marino", "Serbia", 
            "Slovenia", "Spain"
        ],
        "Western Europe": [
            "Austria", "Belgium", "France [French Republic]", "Germany", "Liechtenstein", 
            "Luxembourg", "Monaco", "Netherlands", "Switzerland"
        ]
    },
    "Oceania": {
        "Australia and New Zealand": [
            "Australia", "Christmas Island", "Cocos (Keeling) Islands", 
            "Heard Island and McDonald Islands", "New Zealand", "Norfolk Island"
        ],
        "Melanesia": [
            "Fiji", "New Caledonia", "Papua New Guinea", "Solomon Islands", "Vanuatu"
        ],
        "Micronesia": [
            "Guam", "Kiribati", "Marshall Islands", "Micronesia (Federated States of)", 
            "Nauru", "Northern Mariana Islands", "Palau", "United States Minor Outlying Islands"
        ],
        "Polynesia": [
            "American Samoa", "Cook Islands", "French Polynesia", "Niue", 
            "Pitcairn [Pitcairn Islands]", "Samoa", "Tokelau", "Tonga", 
            "Tuvalu", "Wallis and Futuna Islands"
        ]
    }
}

In [8]:
import os
import logging
import pandas as pd
from typing import List, Optional
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from openai import OpenAI


# load api key
load_dotenv()
api_key = os.getenv("API_KEY")

# Set up logging
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


class LocationClassifications(BaseModel):
    continent: Optional[str] = Field(None, description="Continent of the country")
    subregion: Optional[str] = Field(None, description="Subregion of the country")
    country: Optional[str] = Field(None, description="Country")

class ResolutionTarget(BaseModel):
    classifications: List[LocationClassifications] = Field(..., description="List of relevant classifications for this resolution")
                           
def call_api(Title: str) -> ResolutionTarget:
    """
    Analyzes a UN resolution text and analyses whether it is related to a specific country or region.
    
    Args:
        Title: Title of the resolution and description to analyze
       
    Returns:
        ResolutionClassification: Structured classification results
    """
    # Initialize OpenAI client
    client = OpenAI(api_key=api_key)
    
    # Prepare the system prompt
    system_prompt = f"""You are a UN document classification assistant. Your task is to analyze UN resolutions given their Title, which contains the name and some details,
 if a specific continent, subregion, or country is mentioned in the resolution. you should identify the continent, subregion, and country mentioned in the resolution. 
 If the resolution mentions refers to a continent you should just keep the continent and subregion and country should be None.
 If the resolution mentions refers to a subregion you should keep the continent and subregion and country should be None.
 If the resolution mentions refers to a country you should keep the continent, subregion, and country.:

{geo_hierarchy}

For each resolution text, identify ALL relevant tags that apply. For each tag:
1. Select the appropriate continent (e.g., Africa, Americas, Antarctica, Asia, Europe, Oceania) (if applicable)
2. Select the appropriate subregion (e.g., Northern Africa, Sub-Saharan Africa, Latin America and the Caribbean, Northern America) (if applicable)
3. Select the appropriate country (e.g., Algeria, Egypt, Libya, Morocco, Sudan, Tunisia, Western Sahara (disputed)) (if applicable)

A resolution may match multiple categories, so return all that apply.
"""
    # Call the API with structured output
    try:
        logger.info("Calling OpenAI API for resolution classification.")
        response = client.beta.chat.completions.parse(
            model="gpt-4o-mini",
            temperature=0.3,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Resolution text: {Title}"}
            ],
            max_tokens=1000,
            response_format=ResolutionTarget,
        )
        
        # Extract the parsed result
        classification_result: ResolutionTarget = response.choices[0].message.parsed
        logger.info("API call successful.")
        return classification_result
        
    except Exception as e:
        logger.error(f"Error during API call: {e}")
        # Return empty classification with error message
        return ResolutionTarget(
            classifications=[
                LocationClassifications(
                    continent="error",
                    subregion="processing_error",
                    country=None
                )
            ]
        )

def get_target_location(Title: str) -> List[List]:
    """
    Calls the API to get classification tags and returns a list of location details.
    
    Returns:
        List of lists containing [continent, subregion, country] 
        for each classification.
    """
    logger.info("Getting tags for provided resolution text.")
    classification_result = call_api(Title)
    
    result = []
    for classification in classification_result.classifications:
        # Only add entries that have at least a continent
        if classification.continent:
            result.append([
                classification.continent,
                classification.subregion,
                classification.country
            ])
    
    logger.info(f"Extracted tags: {result}")
    return result

In [10]:
df_sample_complete = pd.read_csv("data/gpt4o-mini_UN_VOTING_DATA_RAW_with_tags.csv")

In [11]:
#add new column to dataframe with tags
logger.info("Adding tags to sample dataframe.")
df_sample_complete['target_location'] = df_sample_complete.apply(lambda row: get_target_location(row['Title']), axis=1)
logger.info("Tags added to sample dataframe.")

2025-03-12 22:13:51,956 - INFO - Adding tags to sample dataframe.
2025-03-12 22:13:51,958 - INFO - Getting tags for provided resolution text.
2025-03-12 22:13:52,274 - INFO - Calling OpenAI API for resolution classification.
2025-03-12 22:13:53,131 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-12 22:13:53,131 - INFO - API call successful.
2025-03-12 22:13:53,131 - INFO - Extracted tags: [['Asia', 'Western Asia', 'Iraq']]
2025-03-12 22:13:53,131 - INFO - Getting tags for provided resolution text.
2025-03-12 22:13:53,459 - INFO - Calling OpenAI API for resolution classification.
2025-03-12 22:13:53,980 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-12 22:13:53,982 - INFO - API call successful.
2025-03-12 22:13:53,982 - INFO - Extracted tags: []
2025-03-12 22:13:53,990 - INFO - Getting tags for provided resolution text.
2025-03-12 22:13:54,551 - INFO - Calling OpenAI API for resolution cla

In [12]:
#save csv 

df_sample_complete.to_csv("data/tags+country_gpt4o-mini_UN_VOTING_DATA_RAW.csv", index=False)