In [1]:
from openai import AzureOpenAI
import pandas as pd
import re
import os

In [2]:
# Azure credentials and configuration
api_key = "4gM2HRtaDIsFbDjfQQl1DAO1RzN4l2TfAcmkIuC0KgcEjmsEOS9yJQQJ99BBACHYHv6XJ3w3AAAAACOGpcKh"
endpoint = "https://ppbai6350320563.openai.azure.com"
api_version = "2024-12-01-preview"
deployment = "o1"

# Initialize Azure OpenAI client
client = AzureOpenAI(
    api_key=api_key,
    api_version=api_version,
    azure_endpoint=endpoint,
)

# Chat completion request with the correct parameter
response = client.chat.completions.create(
    model=deployment,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "I am going to Paris, what should I see?"}
    ],
    max_completion_tokens=400
)

# Print the assistant's response
print(response.choices[0].message.content)

Paris is a city rich with history, culture, art, and fabulous cuisine. Whether you love art museums, grand architecture, strolling along charming streets, or sampling French pastries, there’s something for everyone. Below are some highlights to consider adding to your itinerary:

1. The Eiffel Tower
   • An iconic symbol of Paris. 
   • Climb to the top or take the elevator for stunning views of the city.
   • The surrounding Champ de Mars park is wonderful for picnics and photos.

2. The Louvre Museum
   • Home to some of the world’s most famous artworks (Mona Lisa, Venus de Milo, and beyond).
   • The grand palace itself is a sight to see. 
   • Plan ahead as the museum is huge—decide which sections you’d like most to explore, such as the Egyptian Antiquities or the Italian paintings.

3. Notre-Dame Cathedral (under restoration but worth visiting the area)
   • A masterpiece of Gothic architecture, currently being restored due to the 2019 fire.
   • Explore the surrounding Île de la 

In [3]:
# Load the raw and pivot sheets
youth_df = pd.read_excel("Data/Youth - jeunesse - export_250416.xlsx", sheet_name=None)
crime_df = pd.read_excel("Data/Crime - Criminalité-export_250416.xlsx", sheet_name=None)
digital_df = pd.read_excel("Data/Digital - numérique - export_250416.xlsx", sheet_name=None)
education_df = pd.read_excel("Data/Education-éducation-all-countries-export_250416.xlsx", sheet_name=None)
mining_df = pd.read_excel("Data/Mining-Mine_export_250416.xlsx", sheet_name=None)
illicit_df = pd.read_excel("Data/Illicit - illicite - export_250416.xlsx", sheet_name=None)
iff_df = pd.read_excel("Data/IFF - export_250416.xlsx", sheet_name=None)

# Show all sheet names to identify pivot vs raw
print("Youth sheets:", youth_df.keys())
print("Crime sheets:", crime_df.keys())
print("Digital sheets:", digital_df.keys())
print("Education sheets:", education_df.keys())
print("Mining sheets:", mining_df.keys())
print("Illicit sheets:", illicit_df.keys())
print("IFF sheets:", iff_df.keys())

Youth sheets: dict_keys(['Youth - Pivot Table', 'Youth', 'Youth - SubOutputs'])
Crime sheets: dict_keys(['Crime - Pivot Table', 'Crime', 'Crime - SubOutputs'])
Digital sheets: dict_keys(['Digital - Pivot Table', 'Digital - SubOutputs', 'Digital'])
Education sheets: dict_keys(['Education - Pivote Table', 'Education', 'Education - SubOutputs'])
Mining sheets: dict_keys(['Mining - Pivot Table', 'Mining', 'Mining - SubOutputs'])
Illicit sheets: dict_keys(['Illicit - Pivot Table', 'Illicit - SubOutputs', 'Illicit'])
IFF sheets: dict_keys(['IFF - Pivot Table', 'IFF', 'IFF - SubOutputs'])


In [4]:
# File and sheet configuration
theme_configs = {
    "Youth": {
        "file": "Data/Youth - jeunesse - export_250416.xlsx",
        "sheets": ["Youth - Pivot Table", "Youth - SubOutputs"]
    },
    "Crime": {
        "file": "Data/Crime - Criminalité-export_250416.xlsx",
        "sheets": ["Crime - Pivot Table", "Crime - SubOutputs"]
    },
    "Digital": {
        "file": "Data/Digital - numérique - export_250416.xlsx",
        "sheets": ["Digital - Pivot Table", "Digital - SubOutputs"]
    },
    "Education": {
        "file": "Data/education-éducation-all-countries-export_250416.xlsx",
        "sheets": ["Education - Pivote Table", "Education - SubOutputs"]
    },
    "Mining": {
        "file": "Data/Mining-Mine_export_250416.xlsx",
        "sheets": ["Mining - Pivot Table", "Mining - SubOutputs"]
    },
    "Illicit": {
        "file": "Data/illicit - illicite - export_250416.xlsx",
        "sheets": ["Illicit - Pivot Table", "Illicit - SubOutputs"]
    },
    "IFF": {
        "file": "Data/IFF - export_250416.xlsx",
        "sheets": ["IFF - Pivot Table", "IFF - SubOutputs"]
    },
}

# Extract sub-outputs from the relevant sheets
def extract_sub_outputs(file_path, sheet_names):
    combined = []
    xls = pd.read_excel(file_path, sheet_name=sheet_names)

    for sheet in sheet_names:
        df = xls[sheet]
        sub_col = next((c for c in df.columns if "sub-output" in c.lower()), None)
        if sub_col:
            entries = df[sub_col].dropna().astype(str)
            entries = entries[entries.str.len() > 10]  # Filter short or blank content
            combined.extend(entries.tolist())
    return combined

# Create the prompt
def build_prompt(theme, sub_outputs):
    bullets = "\n".join(f"- {item}" for item in sub_outputs[:50])
    return f"""
You are an AI assistant analyzing UN INFO Cooperation Framework (CF JWP) data from 2024.

Theme: {theme}

Based on the following extracted sub-output entries from UN country programming in Africa, please answer:

1. What are the 4 main areas of focus for {theme} in Africa where the UN is supporting (2024)?
2. What are potential challenges or gaps in support?

Data:
{bullets}

Please return:
- A list of 4 main areas of focus (bullets)
- 2–3 sentences summarizing key challenges/gaps
"""

# Create output directory
output_dir = "o1_outputs"
os.makedirs(output_dir, exist_ok=True)

# Process each theme
for theme, config in theme_configs.items():
    print(f"\n🔍 Processing theme: {theme}")
    file_path = config["file"]
    sheet_names = config["sheets"]

    try:
        sub_outputs = extract_sub_outputs(file_path, sheet_names)
    except Exception as e:
        print(f"❌ Error reading data for {theme}: {e}")
        continue

    if not sub_outputs:
        print(f"⚠️ Skipping {theme} – no valid sub-output entries found.")
        continue

    prompt = build_prompt(theme, sub_outputs)

    try:
        response = client.chat.completions.create(
            model=deployment,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ]
            # No max_completion_tokens — allow natural full output
        )

        result = response.choices[0].message.content
        if result:
            filepath = os.path.join(output_dir, f"{theme}_Output.txt")
            with open(filepath, "w", encoding="utf-8") as f:
                f.write(result)

            print(f"✅ Output saved: {filepath}")
        else:
            print(f"⚠️ o1 model returned empty content for {theme}")

    except Exception as e:
        print(f"❌ Error during Azure o1 call for {theme}: {e}")


🔍 Processing theme: Youth
✅ Output saved: o1_outputs/Youth_Output.txt

🔍 Processing theme: Crime
✅ Output saved: o1_outputs/Crime_Output.txt

🔍 Processing theme: Digital
✅ Output saved: o1_outputs/Digital_Output.txt

🔍 Processing theme: Education
✅ Output saved: o1_outputs/Education_Output.txt

🔍 Processing theme: Mining
✅ Output saved: o1_outputs/Mining_Output.txt

🔍 Processing theme: Illicit
✅ Output saved: o1_outputs/Illicit_Output.txt

🔍 Processing theme: IFF
✅ Output saved: o1_outputs/IFF_Output.txt
