In [0]:
!pip install instructor
!pip install azure.identity openai

dbutils.library.restartPython()

In [0]:
import pandas as pd
from random import sample
from time import sleep
from pydantic import BaseModel, Field
import os
import instructor
from pydantic import create_model, BaseModel
from enum import Enum
from typing import Optional, Type
import requests
from abc import ABC
from openai import OpenAI
import json
from tqdm import tqdm
from openai import AzureOpenAI
from azure.identity import get_bearer_token_provider, ClientSecretCredential
from pydantic import BaseModel, Field

In [0]:
import dlt
import json
import unicodedata
from pyspark.sql.functions import col, lower, regexp_extract, regexp_replace, when, lit, substring, expr, floor, concat, udf, lpad
from pyspark.sql.types import StringType, DoubleType
from glob import glob
from functools import reduce


In [0]:
# client set-up
credential = ClientSecretCredential(
       tenant_id = dbutils.secrets.get(scope="DAPGPTKEYVAULT", key="GPT-APIM-Tenant-ID"),
       client_id = dbutils.secrets.get(scope="DAPGPTKEYVAULT", key="GPT-APIM-Client-ID"),
       client_secret = dbutils.secrets.get(scope="DAPGPTKEYVAULT", key="GPT-APIM-Client-Secret")
   )
# set up Azure AD token provider
token_provider = get_bearer_token_provider(credential, dbutils.secrets.get(scope="DAPGPTKEYVAULT", key="GPT-APIM-Token-Cred"))
   
#dbutils.secrets.get(scope="DAPGPTKEYVAULT", key="GPT-APIM-Token-Cred"
# initialize AzureOpenAI client
client = AzureOpenAI(
    azure_endpoint="https://azapim.worldbank.org/conversationalai/v2/",
    azure_ad_token_provider=token_provider,
    api_version="2024-08-01-preview"
)

class BaseService(ABC):
    """
    Base class for all services. Automatically registers subclasses with the ServiceRegistry.
    """

    def __init_subclass__(cls, **kwargs):
        """
        Automatically registers subclasses with the ServiceRegistry when they are defined.
        """
        super().__init_subclass__(**kwargs)

        if not hasattr(cls, "service_name") or not isinstance(cls.service_name, str):
            raise AttributeError(
                f"Service class '{cls.__name__}' must define a string 'service_name'."
            )


class AzureOpenAIService(BaseService):
    """
    A service for interacting with Azure OpenAI using Instructor.
    """

    service_name = "azureopenai"
    
    DEFAULT_SYSTEM_PROMPT = '''You are a public finance expert working with a multilateral development institution...

    (same as before)
    '''

    def __init__(
        self,
        client: AzureOpenAI,
        system_prompt: Optional[str] = None,
    ):
        """
        Initializes the AzureOpenAIService with an existing AzureOpenAI client.
        """
        self.SYSTEM_PROMPT = system_prompt or self.DEFAULT_SYSTEM_PROMPT

        # Wrap your AzureOpenAI client using instructor
        self.client = instructor.from_openai(
            client,
            mode=instructor.Mode.JSON,
        )

    def execute(
        self,
        prompt: str,
        model: str,
        response_model: Type,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        system_message: Optional[str] = None,
        **kwargs,
    ):
        """
        Executes a structured chat completion.
        """
        if not isinstance(prompt, str) or not prompt.strip():
            raise ValueError("The 'prompt' must be a non-empty string.")
        if not isinstance(model, str):
            raise ValueError("The 'model' must be a string.")

        system_message = system_message or self.SYSTEM_PROMPT

        messages = [
            {"role": "system", "content": system_message},
            {"role": "user", "content": prompt},
        ]

        try:
            response = self.client.chat.completions.create(
                model=model,
                response_model=response_model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
                **kwargs,
            )
            return response

        except Exception as e:
            raise Exception(f"Error in AzureOpenAI API call: {e}")

In [0]:
class ConfidenceLevel(str, Enum):
    strong = "strong"
    borderline = "borderline"
    weak = "weak"
    
class ValidationConfidence(str, Enum):
    high = "high"
    medium = "medium"
    low = "low"

class BottleneckBase(BaseModel):
    confidence: Optional[ConfidenceLevel] = Field(
        None,
        description=(
            "How confidently the extracted evidence supports the bottleneck. "
            "Choose 'strong' if the evidence clearly and directly supports the bottleneck, "
            "'borderline' if it is somewhat relevant but may be open to interpretation, "
            "and 'weak' if the evidence is tenuous, ambiguous, or only indirectly related."
        )
    )

    

pf_challenges = [
    {
        "challenge_id": 1,
        "role_of_public_finance": "Commitment to Feasible Policy",
        "role_description": (
            "Finance ministries and other central agencies play a fundamental coordinating role when it comes to policy decisions, "
            "functioning as a clearing house for various policy proposals across sector institutions, promoting dialogue and consultation around policy trade-offs, "
            "and ultimately linking policy objectives with resource availability, mobilization and use. "
            "This helps build commitment to policy decisions that are made and improves their feasibility and the likelihood that they will be implemented."
        ),
        "challenge_name": "Insufficient Stakeholder Commitment to Policy Action",
        "challenge_description": (
            "Focuses on whether political and technical stakeholders demonstrate sustained commitment to implementing approved policies, "
            "including challenges around ownership, continuity, and buy-in."
        ),
        "bottlenecks": [
            {
                "bottleneck_id": "1.1",
                "bottleneck_name": "Inadequate Commitment of Political and Technical Leadership",
                "bottleneck_description": (
                    "This bottleneck applies when there is a clear lack of sustained commitment by political or technical leaders to implement approved policies. "
                    "This includes delays, resistance, or failure to act when reforms threaten the status quo, require politically difficult trade-offs, "
                    "or demand resource shifts that are not followed through despite stated priorities. "
                    "Examples include: approved reforms not being enacted, persistent underfunding of a priority despite commitments, or misalignment between stated goals and actual budget execution. "
                    "Do **not** classify general governance weakness, vague statements, or budget/funding gaps **unless** directly tied to political/technical unwillingness or inaction. "
                    "Be careful to distinguish from other bottlenecks like 2.1 (coordination failures), 5.2 (disconnect between budgets and policy), or 6.3 (weak execution)."
                ),
                "model_key": "bottleneck_1_1"
            }
        ]
    },
]

class Bottleneck_1_1(BottleneckBase):
    extracted_evidence: Optional[str] = Field(
        None,
        description=(
            "Verbatim excerpt from the text that provides concrete evidence of political or technical leadership failing to commit to implementing approved policies. "
            "Relevant evidence may include examples of delays, failure to act, resistance to disrupting the status quo, or refusal to reallocate resources despite stated goals. "
            "Mere descriptions of policy ambitions or general intentions are not sufficient. "
            "Focus only on failures of action, follow-through, or resource application. "
            "Use only direct text from the source; do not paraphrase or infer."
        )
    )
    reasoning: Optional[str] = Field(
        None,
        description=(
            "Short explanation of how the extracted text demonstrates weak or absent leadership commitment. "
            "The reasoning must be grounded entirely in the quoted text and explain how inaction, delay, or resistance is evident."
        )
    )

class BottleneckValidation_1_1(BaseModel):
    # Reform or policy presence
    mentions_approved_reform: Optional[bool] = Field(
        None,
        description="True only if the text explicitly refers to a specific reform, policy, program, recommendation, or budget measure that has been officially approved, agreed upon, or endorsed by government or leadership. Do not mark true for general ambitions, intentions, or proposed reforms without clear approval."
    )

    reform_not_followed_through: Optional[bool] = Field(
        None,
        description="True only if the text states or clearly describes that the approved reform or policy has not been implemented, enacted, or operationalized. Do not infer this based on slow progress or ambiguous phrasing."
    )

    followthrough_failure_attributed_to_leadership: Optional[bool] = Field(
        None,
        description="True only if the failure to act is explicitly linked to political or technical leadership. The text must name or clearly point to decision-makers (e.g., Cabinet, ministry, Parliament) as responsible. Do not mark true if no actors are identified or if alternative causes are equally plausible."
    )

    # Resistance or interference
    political_resistance_described: Optional[bool] = Field(
        None,
        description="True if the text clearly describes political actors actively resisting, avoiding, or opposing a proposed or ongoing reform. Must include reference to politicians, political parties, or government decision-makers."
    )

    resistance_due_to_political_cost: Optional[bool] = Field(
        None,
        description="True if the resistance is clearly described as stemming from political cost, disruption of patronage, fear of losing influence, or other vested interests. Do not mark true if the cost is described in technical, not political, terms."
    )

    interference_in_execution: Optional[bool] = Field(
        None,
        description="True if the text describes political or technical actors interfering with, overriding, or distorting the execution of an approved reform or budgeted action."
    )

    interference_is_discretionary: Optional[bool] = Field(
        None,
        description="True only if the interference is described as intentional, discretionary, or politically motivated—not due to weak capacity, technical errors, or administrative bottlenecks."
    )

    # Resource prioritization and tradeoffs
    failure_to_prioritize_resources: Optional[bool] = Field(
        None,
        description="True if the text explicitly describes leadership or ministry unwillingness or failure to reallocate, constrain, or prioritize resources when tradeoffs are necessary. Do not mark true for passive budget misalignment or general underfunding unless refusal to act is clearly shown."
    )

    # Passive signals
    demoralization_or_abandonment_described: Optional[bool] = Field(
        None,
        description="True if the text describes clear symptoms such as demoralization, disengagement, abandonment of reform, or loss of momentum. These must reflect failure of implementation or leadership follow-through."
    )

    passive_signals_linked_to_leadership: Optional[bool] = Field(
        None,
        description="True only if the above symptoms are explicitly or plausibly linked to disengagement or inaction by named political or technical leadership. Do not infer this if no actor is identified."
    )

    # Top-down failure
    central_decision_harmed_implementation: Optional[bool] = Field(
        None,
        description="True if the text shows that a central or national-level decision (or inaction) undermined subnational or delegated implementation or follow-through."
    )

    failure_due_to_top_level_coordination: Optional[bool] = Field(
        None,
        description="True only if the issue is clearly attributed to a failure of leadership coordination, consultation, or direction—not to generic system design problems."
    )

    actor_named_or_identifiable: Optional[bool] = Field(
        None,
        description="True only if a responsible political or technical actor (e.g., a named ministry, Cabinet, Parliament) is explicitly mentioned or clearly implied in the text."
    )

    cause_of_inaction_explicit: Optional[bool] = Field(
        None,
        description="True only if the cause of inaction is clearly described as reluctance, disinterest, avoidance, resistance, or disengagement by leadership. Do not infer this from neutral language like 'progress is slow'."
    )

    reform_tied_to_government_commitment: Optional[bool] = Field(
        None,
        description="True if the reform or priority is described as originating from government commitments, statements, or strategies—not just from donor reports, technical plans, or analyst recommendations."
    )

    alternative_explanations_ruled_out: Optional[bool] = Field(
        None,
        description="True if the text rules out other plausible causes for inaction—such as technical constraints, funding limitations, or institutional capacity gaps. Do not mark true if ambiguity remains."
    )

    uses_conditional_language: Optional[bool] = Field(
        None,
        description="True if the text uses conditional or speculative phrases (e.g., 'should', 'could', 'must', 'may') instead of making clear statements about what has or has not happened."
    )

    too_vague_or_generic: Optional[bool] = Field(
        None,
        description="True if the text lacks specific examples, actor mentions, or implementation details and instead offers general complaints or aspirations."
    )

    fits_other_bottleneck: Optional[str] = Field(
        None,
        description="If the chunk better supports another bottleneck (e.g., 2.1, 5.2, 5.3, 8.2), indicate which. Leave blank if unsure."
    )

    # Final validation judgment
    is_valid: bool = Field(
        ...,
        description="True only if the evidence clearly and directly supports Bottleneck 1.1 according to the criteria above. False if vague, misattributed, inferential, or better fits another bottleneck."
    )

    validation_reasoning: Optional[str] = Field(
        None,
        description="Explain how the answers to the above flags led you to your decision. Highlight any red flags or failure to meet admission conditions."
    )

    confidence: Optional[ConfidenceLevel] = Field(
        None,
        description="How confidently you judge this to support Bottleneck 1.1: 'strong' for clear and direct evidence, 'borderline' for partial or ambiguous support, 'weak' for tenuous or indirect relevance."
    )


MODEL_REGISTRY = {
    "1.1": Bottleneck_1_1,
}
        
VALIDATION_MODEL_REGISTRY = {
    "1.1": BottleneckValidation_1_1,

}



def make_bottleneck_prompt(
    text: str,
    role_of_public_finance: str,
    role_description: str,
    challenge_name: str,
    challenge_description: str,
    bottleneck_name: str,
    bottleneck_description: str,
    bottleneck_examples: Optional[list[str]] = None
) -> str:

    example_section = ""
    if bottleneck_examples:
        formatted = "\n".join(f"- {ex}" for ex in bottleneck_examples)
        example_section = f"""
        Examples of valid evidence for this bottleneck include:
        {formatted}
        """.strip()

    return f"""
        You are analyzing a public finance document to identify specific bottlenecks affecting development outcomes.
        
        The context for your analysis is as follows:
        
        Role of Public Finance: {role_of_public_finance}
        → {role_description}
        
        PFM Challenge: {challenge_name}
        → {challenge_description}
        
        Specific Bottleneck: {bottleneck_name}
        → {bottleneck_description}
        
        {example_section}
        
        ---
        
        Your task:
        
        - Carefully read the excerpt below.
        - Extract direct evidence from the text that clearly supports the presence of the specific bottleneck listed above.
        - Only extract text that is explicitly present in the excerpt.
        - Do not infer, assume, or include information that is not stated.
        - If you find no clear evidence, return null.
        
        For each piece of extracted evidence, briefly explain your reasoning (i.e., why this excerpt indicates the bottleneck), and indicate if the match is ambiguous.
        
        Text to analyze:
        
        {text}
        """.strip()



def make_validation_prompt(
    extracted_evidence: str,
    reasoning: str,
    role_of_public_finance: str,
    role_description: str,
    challenge_name: str,
    challenge_description: str,
    bottleneck_name: str,
    bottleneck_description: str,
    validation_model_cls=None,
    bottleneck_examples: Optional[list[str]] = None
) -> str:
    # Get bottleneck-specific guidance if available
    bottleneck_specific_criteria = ""
    if validation_model_cls and hasattr(validation_model_cls, "validation_guidance"):
        bottleneck_specific_criteria = validation_model_cls.validation_guidance()

    # Format examples, if provided
    example_text_block = ""
    if bottleneck_examples:
        formatted_examples = "\n".join(f"- {ex}" for ex in bottleneck_examples)
        example_text_block = (
            "Here are some representative examples of valid evidence for this bottleneck:\n"
            f"{formatted_examples}"
        )

    # Compose the full prompt
    return f"""
        You are validating whether a previously extracted piece of text is strong evidence of a specific bottleneck in public finance.

        Here is the context:

        Role of Public Finance: {role_of_public_finance}
        → {role_description}

        PFM Challenge: {challenge_name}
        → {challenge_description}

        Specific Bottleneck: {bottleneck_name}
        → {bottleneck_description}

        {("Additional evaluation criteria:" + bottleneck_specific_criteria) if bottleneck_specific_criteria else ""}
        {f"{example_text_block}" if example_text_block else ""}

        ---

        Extracted Evidence:
        {extracted_evidence}

        Reasoning for Extraction:
        {reasoning}

        ---

        Your task:

        - First, evaluate whether the evidence is general, misplaced, insufficient, or misclassified using the bottleneck-specific criteria above.
        - Then, reflect on those evaluations to decide:
            - Does this evidence clearly and directly support the bottleneck?
            - Is the reasoning plausible and grounded in the evidence?
        - If the evidence is vague, general, or fits another bottleneck better, mark `is_valid` as False.
        - Ensure the `is_valid` field reflects your judgment based on the flags you select.
        - Your validation reasoning must explain how the intermediate evaluations informed your final decision.
        """.strip()

        
def make_structured_summary_prompt(
    context_text: str,
    extracted_evidence: str,
    bottleneck_name: str,
    bottleneck_description: str,
    metadata_tuple: tuple
) -> str:
    country, doc_name, region, topic_text = metadata_tuple
    return f"""
You are summarizing validated evidence from a public finance diagnostic document.

Document Metadata:
- Country: {country}
- Document: {doc_name}
- Region: {region}
- Issue Area: {topic_text}

Bottleneck Focus:
- {bottleneck_name}
→ {bottleneck_description}

Text Context:
\"\"\"{context_text}\"\"\"

Extracted Evidence:
\"\"\"{extracted_evidence}\"\"\"

Your task:
Write a concise 1–2 sentence summary that:
- Names the **specific challenge**
- Describes its **consequence**
- Mentions the relevant **policy or issue area** if identifiable

Avoid jargon, hedging, or hallucination.
""".strip()



In [0]:
ACTIVE_BOTTLENECK_IDS = ["1.1"]
active_bottlenecks = []
for challenge in pf_challenges:
    for b in challenge["bottlenecks"]:
        if b["bottleneck_id"] in ACTIVE_BOTTLENECK_IDS:
            active_bottlenecks.append({
                "challenge_name": challenge["challenge_name"],
                "challenge_id": challenge["challenge_id"],
                "challenge_description": challenge["challenge_description"],
                "role_of_public_finance": challenge["role_of_public_finance"],
                "role_description": challenge["role_description"],
                **b
            })
sys_prompt = """
You are a public finance expert working at a multilateral development bank. 
Your job is to identify and evaluate evidence of bottlenecks in Public Financial Management (PFM) systems, 
based on diagnostic reports, budget support documents, and public expenditure reviews.

You are especially trained to distinguish between:
- vague or aspirational language vs. concrete implementation gaps,
- general governance issues vs. specific failures of leadership commitment,
- PFM system reform vs. follow-through on approved policy actions.

Only validate a chunk as evidence for a bottleneck if it meets the precise criteria described.
"""
service = AzureOpenAIService(client=client, system_prompt=sys_prompt)

In [0]:
@dlt.table(name="source_chunks")
def load_chunks():
    node_ids = [
        "9612673", "8846482", "5669851"
    ]

    return (
        spark.read.table("prd_mega.sboost4.per_pfr_chunks")
        .filter(col("node_id").isin(node_ids))
        .select("node_id", "chunk_id", "text")
    )

In [0]:
@dlt.table(name="bottleneck_extractions")
def extract_evidence():
    df = dlt.read("source_chunks").toPandas()

    results = []
    for row in df.itertuples():
        for b in active_bottlenecks:
            model_cls = MODEL_REGISTRY[b["bottleneck_id"]]
            prompt = make_bottleneck_prompt(
                text=row.text,
                role_of_public_finance=b["role_of_public_finance"],
                role_description=b["role_description"],
                challenge_name=b["challenge_name"],
                challenge_description=b["challenge_description"],
                bottleneck_name=b["bottleneck_name"],
                bottleneck_description=b["bottleneck_description"],
                bottleneck_examples=b.get("boAttleneck_examples"),
            )
            try:
                result = service.execute(prompt, model="gpt-4o", response_model=model_cls)
                results.append({
                    "node_id": row.node_id,
                    "chunk_id": row.chunk_id,
                    "bottleneck_id": b["bottleneck_id"],
                    "extracted_evidence": result.extracted_evidence,
                    "reasoning": result.reasoning,
                    "extraction_confidence": str(result.confidence) 
                })
            except Exception:
                continue

    from pyspark.sql.types import StructType, StructField, StringType

    schema = StructType([
        StructField("node_id", StringType()),
        StructField("chunk_id", StringType()),
        StructField("bottleneck_id", StringType()),
        StructField("extracted_evidence", StringType()),
        StructField("reasoning", StringType()),
        StructField("extraction_confidence", StringType()),  
    ])

    return spark.createDataFrame(results or [], schema=schema)


In [0]:
@dlt.table(name="bottleneck_validations")
def validate_evidence():
    df = dlt.read("bottleneck_extractions").toPandas()

    results = []
    for row in df.itertuples():
        config = next(b for b in active_bottlenecks if b["bottleneck_id"] == row.bottleneck_id)
        validation_cls = VALIDATION_MODEL_REGISTRY.get(row.bottleneck_id)

        if not validation_cls:
            continue

        try:
            prompt = make_validation_prompt(
                extracted_evidence=row.extracted_evidence,
                reasoning=row.reasoning,
                role_of_public_finance=config["role_of_public_finance"],
                role_description=config["role_description"],
                challenge_name=config["challenge_name"],
                challenge_description=config["challenge_description"],
                bottleneck_name=config["bottleneck_name"],
                bottleneck_description=config["bottleneck_description"],
                validation_model_cls=validation_cls
            )

            val = service.execute(
                prompt,
                model="gpt-4o",
                response_model=validation_cls
            )

            results.append({
                "node_id": row.node_id,
                "chunk_id": row.chunk_id,
                "bottleneck_id": row.bottleneck_id,
                "is_valid": val.is_valid,
                "validation_reasoning": val.validation_reasoning,
                "validation_confidence": val.confidence,
                **val.model_dump(exclude_unset=True, exclude={"validation_reasoning", "validation_confidence", "is_valid"})
            })

        except Exception as e:
            print(f"Validation error: {e}")
            continue

    # Handle empty results case
    if not results:
        from pyspark.sql.types import StructType, StructField, StringType, BooleanType
        schema = StructType([
            StructField("node_id", StringType()),
            StructField("chunk_id", StringType()),
            StructField("bottleneck_id", StringType()),
            StructField("is_valid", BooleanType()),
            StructField("validation_reasoning", StringType()),
            StructField("validation_confidence", StringType())
        ])
        return spark.createDataFrame([], schema)

    return spark.createDataFrame(results)


In [0]:
@dlt.table(name="bottleneck_summaries")
def summarize_validated_chunks():
    import pandas as pd

    df = dlt.read("bottleneck_validations").filter("is_valid = true").toPandas()

    results = []
    for row in df.itertuples():
        try:
            prompt = make_structured_summary_prompt(
                context_text=row.chunk,
                extracted_evidence=row.extracted_evidence,
                bottleneck_name=row.bottleneck,
                bottleneck_description=row.bottleneck_description,
                metadata_tuple=(
                    row.cntry_name,
                    row.doc_name,
                    getattr(row, "region", ""),  # safer than hasattr for pandas rows
                    row.ent_topic_text,
                )
            )
            summary = service.execute(prompt, model="gpt-4o", response_model=str)
            results.append({
                "node_id": row.node_id,
                "chunk_id": row.chunk_id,
                "bottleneck_id": row.bottleneck_id,
                "summary": summary.strip()
            })
        except Exception as e:
            print(f"Error summarizing {row.node_id}/{row.chunk_id}: {e}")
            continue

    # Fallback if empty
    if not results:
        from pyspark.sql.types import StructType, StructField, StringType
        schema = StructType([
            StructField("node_id", StringType()),
            StructField("chunk_id", StringType()),
            StructField("bottleneck_id", StringType()),
            StructField("summary", StringType()),
        ])
        return spark.createDataFrame([], schema)

    return spark.createDataFrame(results)
