# Validate Analysis Results

This notebook compares analysis outputs between a **production run** (baseline) and a **test run** to validate that the test produced expected results.

**Endpoints compared:**
- Statistics (`/stats`)
- EP Metrics (`/ep`)
- Event Loss Table (`/elt`)
- Period Loss Table (`/plt`) - HD analyses only

## 1. Setup & Imports

In [1]:
%load_ext autoreload
%autoreload 2

import math
from typing import Dict, List, Any, Tuple, Optional
from dataclasses import dataclass
from helpers.irp_integration import IRPClient

irp_client = IRPClient()
print("Setup complete.")

Setup complete.


## 2. Configuration

Enter the **appAnalysisId** values for the production and test analyses to compare.

The `appAnalysisId` is the ID shown in the Moody's RiskModeler UI (e.g., 35810).

In [2]:
# Analysis IDs to compare
PRODUCTION_APP_ANALYSIS_ID = 35839  # Replace with your production analysis ID
TEST_APP_ANALYSIS_ID = 35662        # Replace with your test analysis ID

# Perspective code: 'GR' (Gross), 'GU' (Ground-Up), 'RL' (Reinsurance Layer)
PERSPECTIVE_CODE = 'GU'

# Include PLT comparison? (only for HD analyses)
INCLUDE_PLT = False

# Comparison settings
RELATIVE_TOLERANCE = 1e-9  # For floating-point comparison
MAX_DIFFERENCES_TO_SHOW = 50  # Limit output for large datasets

## 3. Comparison Helper Functions

In [3]:
# Fields to IGNORE when comparing (metadata, not analysis results)
IGNORED_FIELDS = {
    'analysisId',
    'jobId', 
    'uri',
    'exposureResourceId',
    'exposureResourceType',
    'perspectiveCode',
    'appAnalysisId',
    'createdAt',
    'modifiedAt',
    'createdBy',
    'modifiedBy',
}

# Meaningful fields to compare per endpoint (allowlist approach)
# If empty, compare all fields except IGNORED_FIELDS
ELT_FIELDS = {
    'eventId',
    'positionValue',
    'stdDevI', 
    'stdDevC',
    'exposureValue',
    'eventRate',
}

EP_FIELDS = {
    'epType',           # OEP, AEP, CEP, TCE-OEP, TCE-AEP
    'returnPeriod',
    'loss',
    'lossValue',
    'probability',
}

STATS_FIELDS = {
    'metricCode',
    'metricName', 
    'value',
    'stdDev',
    'mean',
    'aep',
    'oep',
    'aal',              # Average Annual Loss
    'stdDevAal',
    'cv',               # Coefficient of Variation
}

PLT_FIELDS = {
    'eventId',
    'eventDate',
    'lossDate',
    'loss',
    'lossValue',
    'positionValue',
}


@dataclass
class ComparisonResult:
    """Result of comparing two datasets."""
    endpoint: str
    passed: bool
    total_records_prod: int
    total_records_test: int
    differences: List[Dict[str, Any]]
    missing_in_test: List[Any]
    extra_in_test: List[Any]
    error: Optional[str] = None


def values_match(a: Any, b: Any, rel_tol: float = 1e-9) -> bool:
    """Compare two values with tolerance for floats."""
    if a is None and b is None:
        return True
    if a is None or b is None:
        return False
    if isinstance(a, (int, float)) and isinstance(b, (int, float)):
        if a == 0 and b == 0:
            return True
        return math.isclose(a, b, rel_tol=rel_tol)
    return a == b


def compare_records(
    prod_record: Dict[str, Any],
    test_record: Dict[str, Any],
    key_field: str,
    fields_to_compare: set = None,
    rel_tol: float = 1e-9
) -> List[Dict[str, Any]]:
    """Compare two records and return list of field differences.
    
    Args:
        prod_record: Production record
        test_record: Test record  
        key_field: Field used as key (will be skipped in comparison)
        fields_to_compare: If provided, only compare these fields. 
                          If None, compare all fields except IGNORED_FIELDS.
        rel_tol: Relative tolerance for float comparison
    """
    differences = []
    
    # Determine which fields to compare
    if fields_to_compare:
        # Use allowlist - only compare specified fields (excluding key)
        all_keys = fields_to_compare - {key_field}
    else:
        # Compare all fields except ignored ones and key
        all_keys = (set(prod_record.keys()) | set(test_record.keys())) - IGNORED_FIELDS - {key_field}
    
    for key in all_keys:
        prod_val = prod_record.get(key)
        test_val = test_record.get(key)
        
        if not values_match(prod_val, test_val, rel_tol):
            differences.append({
                'field': key,
                'prod_value': prod_val,
                'test_value': test_val
            })
    
    return differences


def compare_datasets(
    prod_data: List[Dict[str, Any]],
    test_data: List[Dict[str, Any]],
    key_field: str,
    endpoint_name: str,
    fields_to_compare: set = None,
    rel_tol: float = 1e-9
) -> ComparisonResult:
    """Compare two datasets by matching on key_field."""
    # Build lookup dictionaries
    prod_by_key = {r.get(key_field): r for r in prod_data}
    test_by_key = {r.get(key_field): r for r in test_data}
    
    prod_keys = set(prod_by_key.keys())
    test_keys = set(test_by_key.keys())
    
    # Find missing/extra records
    missing_in_test = list(prod_keys - test_keys)
    extra_in_test = list(test_keys - prod_keys)
    common_keys = prod_keys & test_keys
    
    # Compare common records
    all_differences = []
    for key in common_keys:
        diffs = compare_records(
            prod_by_key[key], 
            test_by_key[key], 
            key_field, 
            fields_to_compare,
            rel_tol
        )
        if diffs:
            all_differences.append({
                'key': key,
                'differences': diffs
            })
    
    passed = (len(missing_in_test) == 0 and 
              len(extra_in_test) == 0 and 
              len(all_differences) == 0)
    
    return ComparisonResult(
        endpoint=endpoint_name,
        passed=passed,
        total_records_prod=len(prod_data),
        total_records_test=len(test_data),
        differences=all_differences,
        missing_in_test=missing_in_test,
        extra_in_test=extra_in_test
    )


def compare_by_index(
    prod_data: List[Dict[str, Any]],
    test_data: List[Dict[str, Any]],
    endpoint_name: str,
    fields_to_compare: set = None,
    rel_tol: float = 1e-9
) -> ComparisonResult:
    """Compare data by index position (for stats/EP without unique keys)."""
    if len(prod_data) != len(test_data):
        return ComparisonResult(
            endpoint=endpoint_name,
            passed=False,
            total_records_prod=len(prod_data),
            total_records_test=len(test_data),
            differences=[],
            missing_in_test=[],
            extra_in_test=[],
            error=f"Record count mismatch: prod={len(prod_data)}, test={len(test_data)}"
        )
    
    all_differences = []
    for i, (prod_rec, test_rec) in enumerate(zip(prod_data, test_data)):
        diffs = compare_records(
            prod_rec, 
            test_rec, 
            key_field='_index_', 
            fields_to_compare=fields_to_compare,
            rel_tol=rel_tol
        )
        if diffs:
            all_differences.append({
                'key': f'record_{i}',
                'differences': diffs
            })
    
    return ComparisonResult(
        endpoint=endpoint_name,
        passed=len(all_differences) == 0,
        total_records_prod=len(prod_data),
        total_records_test=len(test_data),
        differences=all_differences,
        missing_in_test=[],
        extra_in_test=[]
    )


print("Helper functions loaded.")
print(f"Ignoring metadata fields: {sorted(IGNORED_FIELDS)}")

Helper functions loaded.
Ignoring metadata fields: ['analysisId', 'appAnalysisId', 'createdAt', 'createdBy', 'exposureResourceId', 'exposureResourceType', 'jobId', 'modifiedAt', 'modifiedBy', 'perspectiveCode', 'uri']


## 4. Fetch Analysis Metadata

In [4]:
print("Fetching analysis metadata...")
print()

# Fetch production analysis
prod_analysis = irp_client.analysis.get_analysis_by_app_analysis_id(PRODUCTION_APP_ANALYSIS_ID)
prod_analysis_id = prod_analysis['analysisId']
prod_exposure_resource_id = prod_analysis['exposureResourceId']
print(f"Production Analysis:")
print(f"  appAnalysisId: {PRODUCTION_APP_ANALYSIS_ID}")
print(f"  analysisId: {prod_analysis_id}")
print(f"  analysisName: {prod_analysis['analysisName']}")
print(f"  exposureResourceId: {prod_exposure_resource_id}")
print()

# Fetch test analysis
test_analysis = irp_client.analysis.get_analysis_by_app_analysis_id(TEST_APP_ANALYSIS_ID)
test_analysis_id = test_analysis['analysisId']
test_exposure_resource_id = test_analysis['exposureResourceId']
print(f"Test Analysis:")
print(f"  appAnalysisId: {TEST_APP_ANALYSIS_ID}")
print(f"  analysisId: {test_analysis_id}")
print(f"  analysisName: {test_analysis['analysisName']}")
print(f"  exposureResourceId: {test_exposure_resource_id}")
print()
print(f"Perspective: {PERSPECTIVE_CODE}")

Fetching analysis metadata...

Production Analysis:
  appAnalysisId: 35839
  analysisId: 3428895
  analysisName: RM_EDM_202511_Quarterly_USFL: PORTFOLIO: USFL_Other_Other
  exposureResourceId: 8

Test Analysis:
  appAnalysisId: 35662
  analysisId: 3405266
  analysisName: USFL_Other_Other_LT
  exposureResourceId: 8

Perspective: GU


## 5. Fetch and Compare Results

In [5]:
results: List[ComparisonResult] = []

# --- Statistics ---
print("Fetching Statistics...")
try:
    prod_stats = irp_client.analysis.get_stats(prod_analysis_id, PERSPECTIVE_CODE, prod_exposure_resource_id)
    test_stats = irp_client.analysis.get_stats(test_analysis_id, PERSPECTIVE_CODE, test_exposure_resource_id)
    stats_result = compare_by_index(prod_stats, test_stats, 'Statistics', STATS_FIELDS, RELATIVE_TOLERANCE)
    results.append(stats_result)
    print(f"  Production: {len(prod_stats)} records")
    print(f"  Test: {len(test_stats)} records")
except Exception as e:
    results.append(ComparisonResult(
        endpoint='Statistics', passed=False, total_records_prod=0, total_records_test=0,
        differences=[], missing_in_test=[], extra_in_test=[], error=str(e)
    ))
    print(f"  Error: {e}")

# --- EP Metrics ---
print("\nFetching EP Metrics...")
try:
    prod_ep = irp_client.analysis.get_ep(prod_analysis_id, PERSPECTIVE_CODE, prod_exposure_resource_id)
    test_ep = irp_client.analysis.get_ep(test_analysis_id, PERSPECTIVE_CODE, test_exposure_resource_id)
    ep_result = compare_by_index(prod_ep, test_ep, 'EP Metrics', EP_FIELDS, RELATIVE_TOLERANCE)
    results.append(ep_result)
    print(f"  Production: {len(prod_ep)} records")
    print(f"  Test: {len(test_ep)} records")
except Exception as e:
    results.append(ComparisonResult(
        endpoint='EP Metrics', passed=False, total_records_prod=0, total_records_test=0,
        differences=[], missing_in_test=[], extra_in_test=[], error=str(e)
    ))
    print(f"  Error: {e}")

# --- ELT ---
print("\nFetching ELT...")
try:
    prod_elt = irp_client.analysis.get_elt(prod_analysis_id, PERSPECTIVE_CODE, prod_exposure_resource_id)
    test_elt = irp_client.analysis.get_elt(test_analysis_id, PERSPECTIVE_CODE, test_exposure_resource_id)
    elt_result = compare_datasets(prod_elt, test_elt, 'eventId', 'ELT', ELT_FIELDS, RELATIVE_TOLERANCE)
    results.append(elt_result)
    print(f"  Production: {len(prod_elt)} events")
    print(f"  Test: {len(test_elt)} events")
except Exception as e:
    results.append(ComparisonResult(
        endpoint='ELT', passed=False, total_records_prod=0, total_records_test=0,
        differences=[], missing_in_test=[], extra_in_test=[], error=str(e)
    ))
    print(f"  Error: {e}")

# --- PLT (optional, HD only) ---
if INCLUDE_PLT:
    print("\nFetching PLT...")
    try:
        prod_plt = irp_client.analysis.get_plt(prod_analysis_id, PERSPECTIVE_CODE, prod_exposure_resource_id)
        test_plt = irp_client.analysis.get_plt(test_analysis_id, PERSPECTIVE_CODE, test_exposure_resource_id)
        plt_result = compare_datasets(prod_plt, test_plt, 'eventId', 'PLT', PLT_FIELDS, RELATIVE_TOLERANCE)
        results.append(plt_result)
        print(f"  Production: {len(prod_plt)} events")
        print(f"  Test: {len(test_plt)} events")
    except Exception as e:
        results.append(ComparisonResult(
            endpoint='PLT', passed=False, total_records_prod=0, total_records_test=0,
            differences=[], missing_in_test=[], extra_in_test=[], error=str(e)
        ))
        print(f"  Error: {e}")

print("\nComparison complete.")

Fetching Statistics...
  Production: 1 records
  Test: 1 records

Fetching EP Metrics...
  Production: 4 records
  Test: 4 records

Fetching ELT...
  Production: 100 events
  Test: 100 events

Comparison complete.


## 6. Validation Results

In [6]:
print("=" * 60)
print("ANALYSIS VALIDATION RESULTS")
print("=" * 60)
print()
print(f"Production Analysis ID: {PRODUCTION_APP_ANALYSIS_ID}")
print(f"Test Analysis ID:       {TEST_APP_ANALYSIS_ID}")
print(f"Perspective:            {PERSPECTIVE_CODE}")
print()
print("-" * 60)
print("Endpoint Results:")
print("-" * 60)

overall_pass = True
for result in results:
    status = "PASS" if result.passed else "FAIL"
    icon = "[OK]" if result.passed else "[X]" 
    
    details = ""
    if result.error:
        details = f" (Error: {result.error})"
    elif not result.passed:
        issues = []
        if result.differences:
            issues.append(f"{len(result.differences)} value differences")
        if result.missing_in_test:
            issues.append(f"{len(result.missing_in_test)} missing in test")
        if result.extra_in_test:
            issues.append(f"{len(result.extra_in_test)} extra in test")
        details = f" ({', '.join(issues)})"
    
    print(f"  {icon} {result.endpoint}: {status}{details}")
    
    if not result.passed:
        overall_pass = False

print()
print("=" * 60)
overall_status = "PASS" if overall_pass else "FAIL"
print(f"OVERALL: {overall_status}")
print("=" * 60)

ANALYSIS VALIDATION RESULTS

Production Analysis ID: 35839
Test Analysis ID:       35662
Perspective:            GU

------------------------------------------------------------
Endpoint Results:
------------------------------------------------------------
  [X] Statistics: FAIL (1 value differences)
  [OK] EP Metrics: PASS
  [OK] ELT: PASS

OVERALL: FAIL


## 7. Detailed Differences (if any)

In [7]:
for result in results:
    if result.passed:
        continue
    
    print()
    print("=" * 60)
    print(f"{result.endpoint} DIFFERENCES")
    print("=" * 60)
    
    if result.error:
        print(f"\nError: {result.error}")
        continue
    
    # Missing records
    if result.missing_in_test:
        print(f"\nRecords in PRODUCTION but missing in TEST ({len(result.missing_in_test)} total):")
        shown = result.missing_in_test[:MAX_DIFFERENCES_TO_SHOW]
        for key in shown:
            print(f"  - {key}")
        if len(result.missing_in_test) > MAX_DIFFERENCES_TO_SHOW:
            print(f"  ... and {len(result.missing_in_test) - MAX_DIFFERENCES_TO_SHOW} more")
    
    # Extra records
    if result.extra_in_test:
        print(f"\nRecords in TEST but not in PRODUCTION ({len(result.extra_in_test)} total):")
        shown = result.extra_in_test[:MAX_DIFFERENCES_TO_SHOW]
        for key in shown:
            print(f"  - {key}")
        if len(result.extra_in_test) > MAX_DIFFERENCES_TO_SHOW:
            print(f"  ... and {len(result.extra_in_test) - MAX_DIFFERENCES_TO_SHOW} more")
    
    # Value differences
    if result.differences:
        print(f"\nValue differences ({len(result.differences)} records with differences):")
        shown = result.differences[:MAX_DIFFERENCES_TO_SHOW]
        for diff in shown:
            print(f"\n  Key: {diff['key']}")
            for field_diff in diff['differences']:
                print(f"    {field_diff['field']}:")
                print(f"      prod: {field_diff['prod_value']}")
                print(f"      test: {field_diff['test_value']}")
        if len(result.differences) > MAX_DIFFERENCES_TO_SHOW:
            print(f"\n  ... and {len(result.differences) - MAX_DIFFERENCES_TO_SHOW} more records with differences")

if overall_pass:
    print("\nNo differences found - all endpoints match!")


Statistics DIFFERENCES

Value differences (1 records with differences):

  Key: record_0
    cv:
      prod: 2.794750131263206
      test: 2.9412708724306715
