In [1]:
import inspect
from textwrap import dedent
from typing import List, Callable, Tuple, Any
import re

def extract_placeholders(text: str) -> List[str]:
    return re.findall(r'\{(\w+)\}', text)

class Prompt:
    def __init__(self, func: Callable):
        self.func = func
        self.signature = inspect.signature(func)

    def __call__(self, **inputs) -> Tuple[str, List[str]]:
        # Get the source code of the function
        source = inspect.getsource(self.func)
        
        # Extract the function body
        function_body = self.extract_function_body(source)
        
        # Create a new namespace and add input parameters
        namespace = inputs.copy()
        
        # Execute the function body in this namespace
        exec(function_body, namespace)
        
        # Filter out function parameters and built-in variables
        filtered_locals = {k: v for k, v in namespace.items() 
                           if k not in inputs and not k.startswith('__')}
        
        placeholders = []
        template_parts = []
        for text in filtered_locals.values():
            if isinstance(text, str):
                placeholders.extend(extract_placeholders(text))
                template_parts.append(dedent(text))
        
        template = "\n".join(template_parts)
        return template, list(set(placeholders))

    @staticmethod
    def extract_function_body(source: str) -> str:
        lines = source.split('\n')
        # Find the line where the function body starts
        body_start = next(i for i, line in enumerate(lines) if line.strip().endswith(':'))
        # Extract the function body, including nested functions if any
        body_lines = lines[body_start + 1:]
        min_indent = min(len(line) - len(line.lstrip()) for line in body_lines if line.strip())
        return '\n'.join(line[min_indent:] for line in body_lines)

# Decorator to automatically wrap the function
def metaprompt(func):
    return Prompt(func)

In [2]:
@metaprompt
def prompt(output_format: str, image_paths: List[str], context: str):
    system_prompt = "You are an ESG data analyst with expertise in extracting and analyzing environmental, social, and governance (ESG) metrics."
    
    match output_format:
        case "json":
            output_instructions = "Output a table according to a json schema: {json_schema}"
        case "markdown":
            output_instructions = "Output a table in markdown with the following columns: {columns}"

    rules = """
    - Ensure the 'Year' is in the format YYYY.
    - Provide total amounts for each metric per 'Year' without a detailed breakdown.
    - Convert units using simple multiplication factors:
        - Multiply by 1000 for "Thousands".
        - Multiply by 1000000 for "Millions".
    - Use restated values if corrected post external audit.
    - Focus on extracting information for the main company in the ESG report.
    - Extract only explicitly mentioned information; do not make assumptions.
    - Indicate "N/A" for any missing information.
    - Ensure all years explicitly mentioned in the tables or figures are included.\n"""
    rules += "- remove ',' in numbers" if output_format == "json" else ""

    verification = """
    ## Verify Accuracy
    - Check that the data aligns with the provided 'Data Type', 'Unit of Measure', and 'Description' for each metric:
    {metrics_information}"""

    image_instructions = "Analyze images to ensure accuracy and match exact colors as per the legend and graph." if len(image_paths) > 0 else ""

    context_preview = "Here is the extracted information from the report: {context}" if len(context) > 0 else ""

In [3]:
template, vars = prompt(output_format="json", image_paths=[], context="Sample context")

In [4]:
template

'You are an ESG data analyst with expertise in extracting and analyzing environmental, social, and governance (ESG) metrics.\nOutput a table according to a json schema: {json_schema}\n\n- Ensure the \'Year\' is in the format YYYY.\n- Provide total amounts for each metric per \'Year\' without a detailed breakdown.\n- Convert units using simple multiplication factors:\n    - Multiply by 1000 for "Thousands".\n    - Multiply by 1000000 for "Millions".\n- Use restated values if corrected post external audit.\n- Focus on extracting information for the main company in the ESG report.\n- Extract only explicitly mentioned information; do not make assumptions.\n- Indicate "N/A" for any missing information.\n- Ensure all years explicitly mentioned in the tables or figures are included.\n- remove \',\' in numbers\n\n## Verify Accuracy\n- Check that the data aligns with the provided \'Data Type\', \'Unit of Measure\', and \'Description\' for each metric:\n{metrics_information}\n\nHere is the extra

In [5]:
vars

['context', 'metrics_information', 'json_schema']