# Mappr

> Map report to evaluation frameworks

In [None]:
#| default_exp mappr

In [None]:
from pathlib import Path
from functools import reduce
from toolslm.md_hier import *
from rich import print


In [None]:
#| eval: false
report = Path("../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/fixed_eval_report.md")
with open(report, "r") as f: report = f.read()
print(report[:500])

# **PPMi** .... page 1

**Final Evaluation of the EU-IOM Joint Initiative for migrant protection and reintegration in the horn of Africa**

Final Evaluation Report, 17 March 2023

![img-0.jpeg](img-0.jpeg)

**EU-IOM** Joint Initiative for Migrant Protection and Reintegration

Project funded by the European Union
Project implemented by IOM
This Final Evaluation Report was commissioned by IOM and developed by the evaluation team of PPMI Group, including: Loes van der Graaf, Rimantas Dumcius, Radvi


In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

In [None]:
import dspy

# lm = dspy.LM('google/gemini-2.0-flash-exp', api_key=GEMINI_API_KEY)
lm = dspy.LM('gemini/gemini-2.0-flash-exp', api_key=GEMINI_API_KEY)
dspy.configure(lm=lm)
print(dspy.settings.lm)

<dspy.clients.lm.LM object>


In [None]:
from typing import List, Dict, Optional

In [None]:
class InitialOverview(dspy.Signature):
    """Identify strategic sections to start theme analysis"""
    theme: str = dspy.InputField(desc="Theme to analyze")
    all_headings: str = dspy.InputField(desc="Complete document structure")
    priority_sections: List[str] = dspy.OutputField(desc="Ordered list of section keys to explore first")
    strategy: str = dspy.OutputField(desc="Reasoning for this exploration strategy")

In [None]:
overview = dspy.ChainOfThought(InitialOverview)

In [None]:
query = "Facilitating pathways for regular migration. Channels for regular migration are sustainable and responsive to current and emerging trends. Migrants are able to return to their homes voluntarily, safely and in dignity. Member states are supported to develop or improve existing evidence-based policy and legal frameworks to facilitate effective and sustainable regular migration channels for voluntary return."

In [None]:
# print(hdgs['**PPMi** .... page 1']['2. Background of the JI-HoA .... page 5']['2.2. External factors affecting the implementation of the JI .... page 7'].text)

In [None]:
hdgs = create_heading_dict(report); hdgs

{'**PPMi** .... page 1': {'CONTENTS .... page 3': {},
  '1. Introduction .... page 4': {},
  '2. Background of the JI-HoA .... page 5': {'2.1. Context and design of the JI-HoA .... page 5': {},
   '2.2. External factors affecting the implementation of the JI .... page 7': {}},
  '3. Methodology .... page 8': {},
  '4. Findings .... page 10': {'4.1. Relevance .... page 10': {'4.1.1. Relevance of programme activities for migrants, returnees, and communities .... page 10': {'Overall performance score for relevance: $3.9 / 5$ <br> Robustness score for the evidence: $4.5 / 5$ .... page 10': {},
     '4.1.1.1 Needs of migrants .... page 10': {},
     '4.1.1.2 Needs of returnees .... page 10': {},
     '4.1.1.3 Needs of community members .... page 12': {}},
    "4.1.2. Programme's relevance to the needs of stakeholders .... page 12": {'4.1.2.1 Needs of governments .... page 12': {},
     '4.1.2.2 Needs of other stakeholders .... page 13': {}}},
   '4.2. Coherence .... page 13': {"4.2.1. The J

In [None]:
result = overview(theme=query, all_headings=str(hdgs))
print("Priority sections:", result.priority_sections)
print("Strategy:", result.strategy)

In [None]:
class ExplorationDecision(dspy.Signature):
    """Decide next exploration step based on current findings"""
    theme: str = dspy.InputField(desc="Theme being analyzed")
    current_findings: str = dspy.InputField(desc="Evidence found so far")
    available_sections: str = dspy.InputField(desc="Remaining sections to explore")
    next_section: str = dspy.OutputField(desc="Next section key to explore, or 'DONE' if sufficient")
    reasoning: str = dspy.OutputField(desc="Why this section or why stopping")

In [None]:
explore = dspy.ChainOfThought(ExplorationDecision)

In [None]:
decision = explore(
    theme=query,
    current_findings="No evidence collected yet",
    available_sections=str(result.priority_sections)
)

print("Next section:", decision.next_section)
print("Reasoning:", decision.reasoning)

In [None]:
def get_content_tool(hdgs, keys_list):
    "Navigate through nested levels using the exact key strings"
    return reduce(lambda current, key: current[key], keys_list, hdgs).text

In [None]:
def find_section_path(hdgs, target_section):
    """Find the nested key path for a given section name"""
    def search_recursive(current_dict, path=[]):
        for key, value in current_dict.items():
            current_path = path + [key]
            if key == target_section:
                return current_path
            if isinstance(value, dict):
                result = search_recursive(value, current_path)
                if result:
                    return result
        return None
    
    return search_recursive(hdgs)

In [None]:
path = find_section_path(hdgs, "4.1.1.1 Needs of migrants .... page 10"); path

['**PPMi** .... page 1',
 '4. Findings .... page 10',
 '4.1. Relevance .... page 10',
 '4.1.1. Relevance of programme activities for migrants, returnees, and communities .... page 10',
 '4.1.1.1 Needs of migrants .... page 10']

In [None]:
content = get_content_tool(hdgs, path)
print(content[:500])

In [None]:
class EvidenceAssessment(dspy.Signature):
    """Assess if current evidence is sufficient for theme analysis"""
    theme: str = dspy.InputField(desc="Theme being analyzed")
    evidence_so_far: str = dspy.InputField(desc="All evidence collected")
    sections_explored: str = dspy.InputField(desc="Sections already checked")
    sufficient: bool = dspy.OutputField(desc="Is evidence sufficient to make conclusion?")
    confidence_score: float = dspy.OutputField(desc="Confidence in current findings (0-1)")
    next_priority: str = dspy.OutputField(desc="If continuing, what type of section to prioritize")

In [None]:
assess = dspy.ChainOfThought(EvidenceAssessment)

In [None]:
assessment = assess(
    theme=query,
    evidence_so_far=content,
    sections_explored="['4.1.1.1 Needs of migrants .... page 10']"
)

print("Sufficient:", assessment.sufficient)
print("Confidence score:", assessment.confidence_score)
print("Next priority:", assessment.next_priority)

In [None]:
class FinalSynthesis(dspy.Signature):
    """Provide detailed rationale and synthesis of theme analysis"""
    theme: str = dspy.InputField(desc="Theme being analyzed")
    all_evidence: str = dspy.InputField(desc="All collected evidence")
    sections_explored: str = dspy.InputField(desc="List of sections explored")
    theme_covered: bool = dspy.OutputField(desc="Final decision on theme coverage")
    confidence_explanation: str = dspy.OutputField(desc="Detailed explanation of confidence score")
    evidence_summary: str = dspy.OutputField(desc="Key evidence supporting the conclusion")
    gaps_identified: str = dspy.OutputField(desc="Any gaps or missing aspects")

In [None]:
class ThemeAnalyzer(dspy.Module):
    def __init__(self):
        self.overview = dspy.ChainOfThought(InitialOverview)
        self.explore = dspy.ChainOfThought(ExplorationDecision)
        self.assess = dspy.ChainOfThought(EvidenceAssessment)
        self.synthesize = dspy.ChainOfThought(FinalSynthesis)
    
    def forward(self, theme, headings, get_content_fn):
        # Step 1: Get strategic overview
        overview = self.overview(theme=theme, all_headings=str(headings))
        print("Overview result:", overview.priority_sections)
        
        evidence_collected = []
        sections_explored = []
        available_sections = overview.priority_sections.copy()
        
        # Step 2: Adaptive iterative exploration
        for i in range(10):  # Max iterations
            print(f"\n--- Iteration {i+1} ---")
            
            # Decide next section
            decision = self.explore(
                theme=theme,
                current_findings="\n\n".join(evidence_collected) if evidence_collected else "No evidence collected yet",
                available_sections=str(available_sections)
            )
            print("Decision:", decision.next_section)
            print("Reasoning:", decision.reasoning)
            
            if decision.next_section == 'DONE':
                print("Decision says DONE, breaking")
                break
            
            # Get content
            path = find_section_path(headings, decision.next_section)
            print("Path found:", path)
            
            if path:
                content = get_content_fn(headings, path)
                print("Content length:", len(content))
                evidence_collected.append(f"Section: {decision.next_section}\nContent: {content}")
                sections_explored.append(decision.next_section)
                if decision.next_section in available_sections:
                    available_sections.remove(decision.next_section)
            else:
                print("No path found for section!")
            
            # Assess sufficiency
            assessment = self.assess(
                theme=theme,
                evidence_so_far="\n\n".join(evidence_collected),
                sections_explored=str(sections_explored)
            )
            print("Assessment - Sufficient:", assessment.sufficient, "Confidence:", assessment.confidence_score)
            
            if assessment.sufficient and assessment.confidence_score > 0.8:
                print("Assessment says sufficient, breaking")
                break
        
        # Final synthesis
        synthesis = self.synthesize(
            theme=theme,
            all_evidence="\n\n".join(evidence_collected),
            sections_explored=str(sections_explored)
        )
        print("Synthesis result:", synthesis.theme_covered)
        print("Synthesis reasoning:", synthesis.confidence_explanation)
        print("Synthesis evidence:", synthesis.evidence_summary)
        print("Synthesis gaps:", synthesis.gaps_identified)
    

In [None]:
# Usage example:
analyzer = ThemeAnalyzer()
result = analyzer(theme=query, headings=hdgs, get_content_fn=get_content_tool)