In [3]:
!pip install instructor anthropic[bedrock]

Collecting instructor
  Downloading instructor-1.7.0-py3-none-any.whl.metadata (17 kB)
Collecting docstring-parser<0.17,>=0.16 (from instructor)
  Downloading docstring_parser-0.16-py3-none-any.whl.metadata (3.0 kB)
Collecting jiter<0.7,>=0.6.1 (from instructor)
  Downloading jiter-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting requests<3.0.0,>=2.32.3 (from instructor)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tenacity<10.0.0,>=9.0.0 (from instructor)
  Downloading tenacity-9.0.0-py3-none-any.whl.metadata (1.2 kB)
Collecting typer<1.0.0,>=0.9.0 (from instructor)
  Downloading typer-0.15.1-py3-none-any.whl.metadata (15 kB)
Collecting shellingham>=1.3.0 (from typer<1.0.0,>=0.9.0->instructor)
  Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)
Downloading instructor-1.7.0-py3-none-any.whl (70 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 kB[0m [31m1.0 MB/s[0

In [70]:
import instructor
from anthropic import AnthropicBedrock
from pydantic import BaseModel
from typing import List
from IPython.display import Markdown, display
from tabulate import tabulate
import textwrap
import json
from utils import read_file, save_file

In [53]:
i_client = instructor.from_anthropic(AnthropicBedrock())
a_client = AnthropicBedrock()

In [44]:
class User(BaseModel):
    name: str
    age: int

In [45]:
# note that client.chat.completions.create will also work
resp = i_client.messages.create(
    model="anthropic.claude-3-haiku-20240307-v1:0",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": "Extract Jason is 25 years old.",
        }
    ],
    response_model=User,
)

In [46]:
assert isinstance(resp, User)
assert resp.name == "Jason"
assert resp.age == 25
resp

User(name='Jason', age=25)

In [106]:
ab_guidance = read_file('data/whitepaper/AB_2013-07_Model_Risk_Management_Guidance.md')
moody_paper = read_file('data/whitepaper/riskcalc-3.1-whitepaper.md')

In [48]:
class Task(BaseModel):
    description: str
    instructons: str
    exmaples: str

In [102]:
def generate_tasks(question, document, model="anthropic.claude-3-haiku-20240307-v1:0"):
    message = f"""
    Generate a comprehensive list of model analysis tasks based on provided model whitepaper. Each task includes short description, detailed instructions and list of examples to answer this question: {question}.
    Be as detailed as possible. Number of identified tasks should ensure comprehensive analysis.
    Your response should be a valid json object and nothing else.
    """
    example = """
      Example:
    {'tasks': [
        {
         'descripiton': 'task desciption',
         'insturctions': 'task instructions',
         'examples': ['example', 'example',...]
        },
      ...
     ]}
    """
    whitepaper = f"""
    <whitepaper>
    {document}
    </whitepaper>
    """

    return a_client.messages.create(
        model=model,
        system= whitepaper,
        max_tokens=3000,
        messages=[
            {
                "role": "user",
                "content": message + example,
            }
        ]#,
        #response_model=Task,
        )

In [103]:
qq = ['Identify any specific limitations and model usage risk in stagflation environment']#,
      #'Indentify any specific limitations and model usage risks in hyper-inflation scenario']

for i, q in enumerate(qq):
    tasks = generate_tasks(q, moody_paper)#, model= 'anthropic.claude-3-sonnet-20240229-v1:0')
    #display(Markdown(f"```json\n{tasks.content[0].text}```"))
    display(Markdown(f"```json\n{tasks.content[0].text}\n```"))


```json
{
    "tasks": [
        {
            "description": "Assess the model's performance during periods of economic stress and volatility",
            "instructions": "Review the model validation results in Section 4 of the whitepaper, specifically the analysis of model performance over the credit cycle. Identify how the model's predictive power and calibration changes during periods of high default rates and economic volatility, such as the 1998-2002 period. Assess whether the model's performance is stable across different economic conditions, or if it exhibits sensitivity to the credit environment.",
            "examples": [
                "Evaluate the model's accuracy ratio and likelihood gains during the 1998-2002 period of high default rates compared to other time periods",
                "Analyze how well the model's default probability estimates align with realized default rates during periods of economic stress",
                "Assess whether the model's ranking of credits from high to low risk remains consistent across different credit environments"
            ]
        },
        {
            "description": "Examine the model's ability to capture industry-specific effects during stagflation",
            "instructions": "Review the discussion in Section 3.3 on the importance of controlling for industry variation in the RiskCalc v3.1 model. Assess whether the model's industry-specific adjustments would adequately capture the differential impact of stagflation on various sectors. Identify any potential limitations in the industry modeling approach that could affect the model's performance during a stagflation scenario.",
            "examples": [
                "Evaluate how well the model's industry-level adjustments would account for the varying impact of rising prices and stagnant economic growth on different industries",
                "Assess whether the model's industry classifications are granular enough to capture heterogeneous effects within broader sectors during a stagflation environment",
                "Analyze the stability of the industry-level parameters in the model and whether they would remain representative during a stagflation scenario"
            ]
        },
        {
            "description": "Examine the model's ability to incorporate forward-looking market information during stagflation",
            "instructions": "Review the discussion in Section 3.2 on the use of the distance-to-default measure to incorporate forward-looking market information into the RiskCalc v3.1 model. Assess whether this market-based input would continue to provide reliable leading indicators of default risk during a stagflation scenario, where equity market performance may diverge from underlying credit conditions.",
            "examples": [
                "Evaluate how well the distance-to-default measure would capture the market's assessment of default risk during a stagflation environment, where equity prices may not fully reflect the deterioration in credit quality",
                "Analyze the timeliness and responsiveness of the distance-to-default measure in signaling changes in default risk compared to firm-specific financial statement information during stagflation",
                "Assess the model's ability to differentiate the impact of systematic market factors versus idiosyncratic firm-specific factors on default risk during a stagflation scenario"
            ]
        },
        {
            "description": "Assess the model's sensitivity to changes in key financial ratios during stagflation",
            "instructions": "Review the list of financial ratios used in the RiskCalc v3.1 model's Financial Statement Only (FSO) mode in the Appendix. Identify which ratios are likely to be most affected by the unique characteristics of a stagflation environment, such as rising prices, stagnant economic growth, and potential accounting distortions. Assess the model's robustness to changes in these key ratios during a stagflation scenario.",
            "examples": [
                "Evaluate the impact of changes in profitability ratios (e.g., ROA) on the model's default probability estimates during a stagflation environment",
                "Analyze the sensitivity of the model's leverage and debt coverage ratios to the potential distortions in financial statements caused by stagflation",
                "Assess how changes in growth and activity ratios (e.g., sales growth, inventory turnover) would affect the model's performance in a stagflation scenario"
            ]
        },
        {
            "description": "Examine the model's ability to provide accurate default probability estimates during stagflation",
            "instructions": "Review the discussion in Section 4 on the model's calibration, or its ability to accurately predict default rates. Assess whether the model's calibration would remain stable during a stagflation environment, where default rates may deviate from historical patterns. Identify any potential limitations in the model's ability to provide reliable default probability estimates under stagflation conditions.",
            "examples": [
                "Analyze how well the model's default probability estimates would align with realized default rates during a stagflation scenario, where the relationship between firm characteristics and default risk may change",
                "Evaluate the model's performance in accurately predicting default rates for different industry sectors that may be impacted differently by stagflation",
                "Assess the model's ability to provide accurate default probability estimates at different risk levels (e.g., high-risk, low-risk credits) during a stagflation environment"
            ]
        },
        {
            "description": "Evaluate the model's stress testing capabilities in a stagflation scenario",
            "instructions": "Review the discussion in Section 2.3 on the model's stress testing features, which allow users to assess a firm's sensitivity to default risk under different economic conditions. Assess the model's ability to provide meaningful stress test results for a stagflation environment, where both systematic market factors and firm-specific factors may be significantly impacted.",
            "examples": [
                "Analyze the model's capability to simulate a firm's default probability under different stagflation scenarios, such as varying degrees of economic growth, inflation, and industry-specific effects",
                "Evaluate the model's ability to differentiate the impact of systematic market risks versus firm-specific risks on default probability during a stagflation stress test",
                "Assess the model's stress testing results for firms in industries that may be particularly vulnerable to the unique challenges of a stagflation environment"
            ]
        },
        {
            "description": "Review the model's data quality management processes in the context of a stagflation environment",
            "instructions": "Examine the model's data quality management techniques discussed in Section 3.4.1, such as the use of Benford's Law and the detection of misclassification errors. Assess whether these techniques would be effective in identifying and addressing potential data quality issues that may arise during a stagflation scenario, where accounting practices and data reporting may be distorted.",
            "examples": [
                "Evaluate the model's ability to detect potential rounding errors or other accounting anomalies in financial statements that may become more prevalent during a stagflation environment",
                "Analyze the model's effectiveness in identifying and correcting misclassification errors in the identification of default events, which may be more challenging to capture during periods of economic stress",
                "Assess the model's robustness to changes in data quality and reporting practices that may occur during a stagflation scenario"
            ]
        },
        {
            "description": "Assess the model's validation and performance testing in the context of a stagflation environment",
            "instructions": "Review the comprehensive validation approach described in Section 4, including out-of-sample testing, walk-forward analysis, and the use of a pure holdout sample. Evaluate whether these validation techniques would adequately test the model's performance and stability in a stagflation scenario, where the relationship between firm characteristics and default risk may be different from historical patterns.",
            "examples": [
                "Analyze the model's performance on the holdout sample in Section 4.3, and assess whether the results would be representative of the model's behavior during a stagflation environment",
                "Evaluate the model's stability and consistency across the various out-of-sample and out-of-time testing approaches described, and identify any potential vulnerabilities to the unique characteristics of a stagflation scenario",
                "Assess the comprehensiveness of the model's validation in capturing the potential impact of systematic and idiosyncratic factors on default risk during a stagflation environment"
            ]
        }
    ]
}
```

In [143]:
def generate_compliance_tasks(question, document, model="anthropic.claude-3-haiku-20240307-v1:0"):
    message = f"""
    Generate a comprehensive list of tasks to be used to analyze model whitepare compiance with provided AB guidance. Each task includes short description, detailed instructions and list of examples to answer this compliance question: {question}.
    Be as detailed as possible. Number of identified tasks should ensure comprehensive analysis.
    Your response should be a valid json object and nothing else. It should pass json validation when creating loading response into json object using joson.loads python funciton.
    """
    example = """
      Example:
    {[
        {
         'description': 'task desciption',
         'insturctions': 'task instructions',
         'examples': ['example', 'example',...]
        },
      ...
     ]}
    """
    guidance = f"""
    <guidnace>
    {document}
    </guidnace>
    """

    tasks = a_client.messages.create(
        model=model,
        system= guidance,
        max_tokens=3000,
        temperature=0,
        messages=[
            {
                "role": "user",
                "content": message + example,
            }
        ])
    print(tasks.content[0].text)
    return json.loads(tasks.content[0].text)

In [144]:
qq = ['Assess model whitepaper for compliance with AB guidance',
      'Assess model whitepaper for compliance with AB guidance requirements for model documentation']

for i, q in enumerate(qq):
    tasks = generate_compliance_tasks(q, ab_guidance)#, model= 'anthropic.claude-3-sonnet-20240229-v1:0')
    title = (f"## {q}")
    display(Markdown(title))
    headers = ['Task', 'Instructions', 'Examples']
    data = []
    for task in tasks['tasks']:
        data.append([task['description'], task['instrucitons'], task['examples']])
    #display(Markdown(f"```json\n{json.dumps(tasks)}\n```"))
    display(Markdown(tabulate(data, headers=headers, tablefmt='pipe')))


{
    "tasks": [
        {
            "description": "Assess model inventory management",
            "instructions": "Review the model whitepaper to ensure the following:
- The entity maintains a comprehensive inventory of all models, including internally developed models, vendor models, and models shared with other regulated entities.
- The inventory includes key attributes of each model such as its use, purpose, classification, owner, governance committee, last update, and validation schedule.
- For large, complex enterprises, the inventory also includes major assumptions, key sensitivities, performance thresholds, and significant adjustments for each model.
- The inventory is updated at least quarterly.",
            "examples": [
                "Verify the model inventory includes all models that affect risk management, business decisions, and financial statements and disclosures.",
                "Ensure the inventory captures the version number of vendor models in use as well

JSONDecodeError: Invalid control character at: line 5 column 82 (char 172)