In [2]:
import logging
import os
import pandas as pd
from typing import List, Optional
from pydantic import BaseModel, Field
from openai import OpenAI

# Set up logging
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# UN classification dictionary
un_classification = {
    'war': {
        'conflict': ['civil war', 'international war'],
        'peacekeeping': ['UN missions']
    },
    'trade': {
        'agreements': ['NAFTA', 'EU trade deal'],
        'disputes': ['WTO cases', 'bilateral disputes']
    },
    'energy': {
        'renewable': ['solar', 'wind'],
        'non-renewable': ['coal', 'oil']
    }
}

class TagClassification(BaseModel):
    tag: str = Field(..., description="Main category tag (e.g., 'war', 'trade', 'energy')")
    subtag1: str = Field(..., description="Subcategory of the main tag (e.g., 'conflict', 'agreements', 'renewable')")
    subtag2: Optional[str] = Field(None, description="Specific item within subcategory (e.g., 'civil war', 'NAFTA', 'solar')")
    confidence: float = Field(..., description="Confidence score between 0 and 1")
    reasoning: str = Field(..., description="Reasoning for this classification")

class ResolutionClassification(BaseModel):
    classifications: List[TagClassification] = Field(..., description="List of relevant classifications for this resolution")

def call_api(resolution_text: str, context: str) -> ResolutionClassification:
    """
    Analyzes a UN resolution text and classifies it according to the UN classification system.
    
    Args:
        resolution_text: Text of the resolution to analyze
        context: Additional context about the resolution
        
    Returns:
        ResolutionClassification: Structured classification results
    """
    # Initialize OpenAI client
    client = OpenAI(api_key="your key here ")
    
    # Prepare the system prompt
    system_prompt = f"""You are a UN document classification assistant. Your task is to analyze UN resolutions given their name and some context,
and classify them according to the following hierarchical classification system:

{un_classification}

For each resolution text, identify ALL relevant tags that apply. For each tag:
1. Select the appropriate main tag (e.g., war, trade, energy)
2. Select the appropriate subtag1 (e.g., conflict, agreements, renewable)
3. Select the appropriate subtag2 if applicable (e.g., civil war, NAFTA, solar)
4. Provide a confidence score (0.0-1.0)
5. Provide brief reasoning for your classification

A resolution may match multiple categories, so return all that apply.
"""
    # Call the API with structured output
    try:
        logger.info("Calling OpenAI API for resolution classification.")
        response = client.beta.chat.completions.parse(
            model="gpt-4o",
            temperature=0.3,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Resolution text: {resolution_text}\n\nAdditional context: {context}"}
            ],
            max_tokens=1000,
            response_format=ResolutionClassification,
        )
        
        # Extract the parsed result
        classification_result: ResolutionClassification = response.choices[0].message.parsed
        logger.info("API call successful.")
        return classification_result
        
    except Exception as e:
        logger.error(f"Error during API call: {e}")
        # Return empty classification with error message
        return ResolutionClassification(
            classifications=[
                TagClassification(
                    tag="error", 
                    subtag1="processing_error",
                    confidence=0.0,
                    reasoning=f"Error during classification: {str(e)}"
                )
            ]
        )

def get_tags(resolution_text: str, context: str) -> List[List]:
    """
    Calls the API to get classification tags and returns a list of tag details.
    
    Returns:
        List of lists containing [tag, subtag1, subtag2, confidence, reasoning] 
        for each classification where subtag2 is not None.
    """
    logger.info("Getting tags for provided resolution text.")
    classification_result = call_api(resolution_text, context)
    tags = [classification.tag for classification in classification_result.classifications]
    subtags = [classification.subtag1 for classification in classification_result.classifications]
    subsubtags = [classification.subtag2 for classification in classification_result.classifications]
    reasoning = [classification.reasoning for classification in classification_result.classifications]
    confidence = [classification.confidence for classification in classification_result.classifications]

    result = [
        [tags[i], subtags[i], subsubtags[i], confidence[i], reasoning[i]]
        for i in range(len(tags)) if subsubtags[i] is not None
    ]
    logger.info(f"Extracted tags: {result}")
    return result

In [None]:
from data_clean.ipnyb import df_sample

In [2]:
# Create a fake DataFrame with 3 examples
data = {
'resolution_text': [
    "nuclear energy is a renewable energy source who's with me",
    "EU trade deal  on solar energy tariff barriers",
    "establishing peacekeeping operations in conflict regions affected by civil war"
],
'context': [
    "This resolution discusses the use of nuclear energy in the context of renewable energy sources.",
    "This resolution addresses climate change mitigation strategies through advantageous tariffs on solar.",
    "This resolution proposes deploying UN peacekeepers to regions experiencing internal armed conflicts."
]
}
df = pd.DataFrame(data)

logger.info("Applying get_tags function to each row of the DataFrame.")
# Apply get_tags for each row and store the result in a new 'tags' column
df['tags'] = df.apply(lambda row: get_tags(row['resolution_text'], row['context']), axis=1)

logger.info("DataFrame processing complete.")
print(df)

2025-03-05 22:41:10,710 - INFO - Applying get_tags function to each row of the DataFrame.
2025-03-05 22:41:10,715 - INFO - Getting tags for provided resolution text.
2025-03-05 22:41:10,857 - INFO - Calling OpenAI API for resolution classification.
2025-03-05 22:41:22,199 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-05 22:41:22,334 - INFO - API call successful.
2025-03-05 22:41:22,334 - INFO - Extracted tags: [['energy', 'renewable', 'solar', 0.7, 'The resolution discusses nuclear energy in the context of renewable energy sources. Although nuclear energy is not traditionally classified as renewable like solar or wind, the context suggests a classification under renewable energy. However, since nuclear is not explicitly listed under renewable subcategories, the confidence is moderate.']]
2025-03-05 22:41:22,339 - INFO - Getting tags for provided resolution text.
2025-03-05 22:41:22,358 - INFO - Calling OpenAI API for resolution classif

                                     resolution_text  \
0  nuclear energy is a renewable energy source wh...   
1     EU trade deal  on solar energy tariff barriers   
2  establishing peacekeeping operations in confli...   

                                             context  \
0  This resolution discusses the use of nuclear e...   
1  This resolution addresses climate change mitig...   
2  This resolution proposes deploying UN peacekee...   

                                                tags  
0  [[energy, renewable, solar, 0.7, The resolutio...  
1  [[trade, agreements, EU trade deal, 0.9, The r...  
2  [[war, conflict, civil war, 0.9, The resolutio...  


In [3]:
df

Unnamed: 0,resolution_text,context,tags
0,nuclear energy is a renewable energy source wh...,This resolution discusses the use of nuclear e...,"[[energy, renewable, solar, 0.7, The resolutio..."
1,EU trade deal on solar energy tariff barriers,This resolution addresses climate change mitig...,"[[trade, agreements, EU trade deal, 0.9, The r..."
2,establishing peacekeeping operations in confli...,This resolution proposes deploying UN peacekee...,"[[war, conflict, civil war, 0.9, The resolutio..."


In [3]:
df = pd.read_csv('data/UN_VOTING_DATA_CLEANED.csv')

#create a dataframe that randomly selects 50 rows from the original dataframe
df_sample = df.sample(n=50)

  df = pd.read_csv('data/UN_VOTING_DATA_CLEANED.csv')
