# Check MRI Mapping Attributes

This notebook checks MRI import files (Account and Location CSVs) against a mapping.json file to identify any **missing attributes** in the mapping.

**What it does:**
- Reads the CSV headers from Account and Location import files
- Compares them against the `source` entries in the mapping.json file
- Reports any CSV columns that are **not mapped** (missing from mapping)

**This is a read-only operation** - it does not modify any files. Use this to preview what changes would be needed before running an MRI import.

**Note:** During actual MRI import submission, missing attributes are automatically added to the mapping file. This notebook lets you check beforehand.

## 1. Setup

In [1]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
from helpers.irp_integration.mri_import import MRIImportManager

print("Setup complete.")

Setup complete.


## 2. Configuration

Specify the paths to your import files and mapping file.

**Options:**
1. **Use active cycle files** - Files are resolved from the active cycle's `files/working_files/` and `files/mapping/` directories
2. **Specify paths directly** - Provide full paths to each file

In [2]:
# === Option 1: Use active cycle directories ===
# Set USE_ACTIVE_CYCLE = True and specify just the filenames

USE_ACTIVE_CYCLE = True  # Set to True to use active cycle directories

# Filenames (used with Option 1)
ACCOUNTS_FILE_NAME = "Modeling_202503_Moodys_USOW_Account.csv"
LOCATIONS_FILE_NAME = "Modeling_202503_Moodys_USOW_Location.csv"
MAPPING_FILE_NAME = "mapping.json"

# === Option 2: Specify full paths directly ===
# Set USE_ACTIVE_CYCLE = False and specify full paths below

ACCOUNTS_FILE_PATH = "/home/jovyan/workspace/workflows/_Tools/files/working_files/Modeling_202511_Moodys_Quarterly_USEQ_Account.csv"
LOCATIONS_FILE_PATH = "/home/jovyan/workspace/workflows/_Tools/files/working_files/Modeling_202511_Moodys_Quarterly_USEQ_Location.csv"
MAPPING_FILE_PATH = "/home/jovyan/workspace/workflows/_Template/files/mapping/mapping.json"

In [3]:
# Resolve file paths based on configuration
if USE_ACTIVE_CYCLE:
    from helpers.irp_integration.utils import get_cycle_file_directories
    
    dirs = get_cycle_file_directories()
    accounts_path = os.path.join(dirs['data'], ACCOUNTS_FILE_NAME)
    locations_path = os.path.join(dirs['data'], LOCATIONS_FILE_NAME)
    mapping_path = os.path.join(dirs['mapping'], MAPPING_FILE_NAME)
    
    print("Using active cycle directories:")
    print(f"  Data directory: {dirs['data']}")
    print(f"  Mapping directory: {dirs['mapping']}")
else:
    accounts_path = ACCOUNTS_FILE_PATH
    locations_path = LOCATIONS_FILE_PATH
    mapping_path = MAPPING_FILE_PATH
    
    print("Using specified file paths.")

print()
print("Files to check:")
print(f"  Accounts:  {accounts_path}")
print(f"  Locations: {locations_path}")
print(f"  Mapping:   {mapping_path}")

# Validate files exist
missing_files = []
for path, name in [(accounts_path, "Accounts"), (locations_path, "Locations"), (mapping_path, "Mapping")]:
    if not os.path.exists(path):
        missing_files.append(f"{name}: {path}")

if missing_files:
    print()
    print("ERROR: The following files were not found:")
    for f in missing_files:
        print(f"  - {f}")
    raise FileNotFoundError("One or more files not found. Please check the paths.")
else:
    print()
    print("All files found.")

Using active cycle directories:
  Data directory: /home/jovyan/workspace/workflows/Active_Test-USEQ/files/data
  Mapping directory: /home/jovyan/workspace/workflows/Active_Test-USEQ/files/mapping

Files to check:
  Accounts:  /home/jovyan/workspace/workflows/Active_Test-USEQ/files/data/Modeling_202503_Moodys_USOW_Account.csv
  Locations: /home/jovyan/workspace/workflows/Active_Test-USEQ/files/data/Modeling_202503_Moodys_USOW_Location.csv
  Mapping:   /home/jovyan/workspace/workflows/Active_Test-USEQ/files/mapping/mapping.json

All files found.


## 3. Check Missing Attributes

Run the check to see which CSV columns are missing from the mapping file.

In [4]:
# Check for missing attributes
result = MRIImportManager.check_missing_mapping_attributes(
    mapping_file_path=mapping_path,
    accounts_file_path=accounts_path,
    locations_file_path=locations_path
)

print("="*60)
print("MAPPING ATTRIBUTE CHECK RESULTS")
print("="*60)
print()
print(f"Accounts file:  {result['accounts_file']}")
print(f"Locations file: {result['locations_file']}")
print(f"Mapping file:   {result['mapping_file']}")
print()

# Account results
print("-"*60)
print("ACCOUNT FILE")
print("-"*60)
print(f"Total columns in CSV: {len(result['account_headers'])}")
print(f"Missing from mapping: {len(result['missing_account_attributes'])}")

if result['missing_account_attributes']:
    print()
    print("Missing account attributes:")
    for attr in sorted(result['missing_account_attributes']):
        print(f"  - {attr}")
else:
    print()
    print("All account columns are mapped.")

print()

# Location results
print("-"*60)
print("LOCATION FILE")
print("-"*60)
print(f"Total columns in CSV: {len(result['location_headers'])}")
print(f"Missing from mapping: {len(result['missing_location_attributes'])}")

if result['missing_location_attributes']:
    print()
    print("Missing location attributes:")
    for attr in sorted(result['missing_location_attributes']):
        print(f"  - {attr}")
else:
    print()
    print("All location columns are mapped.")

print()
print("="*60)

if result['has_missing']:
    total_missing = len(result['missing_account_attributes']) + len(result['missing_location_attributes'])
    print(f"RESULT: {total_missing} attribute(s) missing from mapping")
    print()
    print("These attributes will be automatically added during MRI import.")
    print("Each will be mapped as: SOURCE -> SOURCE (same name)")
else:
    print("RESULT: All attributes are mapped. No changes needed.")

print("="*60)

MAPPING ATTRIBUTE CHECK RESULTS

Accounts file:  Modeling_202503_Moodys_USOW_Account.csv
Locations file: Modeling_202503_Moodys_USOW_Location.csv
Mapping file:   mapping.json

------------------------------------------------------------
ACCOUNT FILE
------------------------------------------------------------
Total columns in CSV: 47
Missing from mapping: 1

Missing account attributes:
  - ACCGRPNAME

------------------------------------------------------------
LOCATION FILE
------------------------------------------------------------
Total columns in CSV: 72
Missing from mapping: 0

All location columns are mapped.

RESULT: 1 attribute(s) missing from mapping

These attributes will be automatically added during MRI import.
Each will be mapped as: SOURCE -> SOURCE (same name)


## 4. View All Headers (Optional)

View all CSV column headers for reference.

In [5]:
# View all account headers
print(f"Account CSV Headers ({len(result['account_headers'])} columns):")
print("-"*40)
for i, header in enumerate(result['account_headers'], 1):
    status = "[MISSING]" if header in result['missing_account_attributes'] else ""
    print(f"{i:3}. {header} {status}")

Account CSV Headers (47 columns):
----------------------------------------
  1. ACCNTNUM 
  2. ACCNTNAME 
  3. ACCGRPNAME [MISSING]
  4. UWRITRNAME 
  5. PRODID 
  6. BRANCHNAME 
  7. PRODNAME 
  8. CEDANTID 
  9. CEDANTNAME 
 10. POLICYNUM 
 11. LOBNAME 
 12. INCEPTDATE 
 13. EXPIREDATE 
 14. UNDCOVAMT 
 15. UNDCOVCUR 
 16. PARTOF 
 17. PARTOFCUR 
 18. POLICYTYPE 
 19. POLICYSTRUCTURE 
 20. MINDEDAMT 
 21. MINDEDCUR 
 22. MAXDEDAMT 
 23. MAXDEDCUR 
 24. BLANDEDAMT 
 25. BLANDEDCUR 
 26. BLANLIMAMT 
 27. BLANLIMCUR 
 28. BLANPREAMT 
 29. BLANPRECUR 
 30. COMBINEDLIM 
 31. COMBINEDLCUR 
 32. COMBINEDDED 
 33. COMBINEDDCUR 
 34. COMBINEDPREM 
 35. COMBINEDPCUR 
 36. COVBASE 
 37. LIMITGU 
 38. USERDEF1 
 39. USERDEF2 
 40. USERDEF3 
 41. USERDEF4 
 42. USERTXT1 
 43. USERTXT2 
 44. POLICYUSERTXT1 
 45. POLICYUSERTXT2 
 46. POLICYUSERTXT3 
 47. POLICYUSERTXT4 


In [6]:
# View all location headers
print(f"Location CSV Headers ({len(result['location_headers'])} columns):")
print("-"*40)
for i, header in enumerate(result['location_headers'], 1):
    status = "[MISSING]" if header in result['missing_location_attributes'] else ""
    print(f"{i:3}. {header} {status}")

Location CSV Headers (72 columns):
----------------------------------------
  1. ACCNTNUM 
  2. LOCNUM 
  3. LOCNAME 
  4. ADDRESSNUM 
  5. STREETNAME 
  6. DISTRICT 
  7. DSTRCTCODE 
  8. CITY 
  9. CITYCODE 
 10. STATE 
 11. STATECODE 
 12. POSTALCODE 
 13. COUNTY 
 14. COUNTYCODE 
 15. CRESTA 
 16. LATITUDE 
 17. LONGITUDE 
 18. FLOORAREA 
 19. AREAUNIT 
 20. CNTRYSCHEME 
 21. CNTRYCODE 
 22. NUMBLDGS 
 23. BLDGSCHEME 
 24. BLDGCLASS 
 25. OCCSCHEME 
 26. OCCTYPE 
 27. YEARBUILT 
 28. NUMSTORIES 
 29. TOCV4VAL 
 30. TOCV4VCUR 
 31. TOCV5VAL 
 32. TOCV5VCUR 
 33. TOCV6VAL 
 34. TOCV6VCUR 
 35. TOCV7VAL 
 36. TOCV7VCUR 
 37. TOCV4LIMIT 
 38. TOCV4LCUR 
 39. TOCV5LIMIT 
 40. TOCV5LCUR 
 41. TOCV6LIMIT 
 42. TOCV6LCUR 
 43. TOCV7LIMIT 
 44. TOCV7LCUR 
 45. TOCV4DED 
 46. TOCV4DCUR 
 47. TOCV5DED 
 48. TOCV5DCUR 
 49. TOCV6DED 
 50. TOCV6DCUR 
 51. TOCV7DED 
 52. TOCV7DCUR 
 53. TOSITELIM 
 54. TOSITELCUR 
 55. TOSITEDED 
 56. TOSITEDCUR 
 57. TOCOMBINEDLIM 
 58. TOCOMBINEDLCUR 
 59. TOC

## 5. Batch Check Multiple File Pairs (Optional)

Check multiple account/location file pairs against the same mapping file.

In [7]:
# Define multiple file pairs to check
# Each tuple is (accounts_file_path, locations_file_path)

FILE_PAIRS = [
    # Example file pairs - update with your actual files
    # ("/path/to/accounts1.csv", "/path/to/locations1.csv"),
    # ("/path/to/accounts2.csv", "/path/to/locations2.csv"),
]

# Mapping file to use for all checks
BATCH_MAPPING_PATH = mapping_path

if not FILE_PAIRS:
    print("No file pairs configured. Add file pairs to FILE_PAIRS list above.")
else:
    print(f"Configured {len(FILE_PAIRS)} file pair(s) to check.")

No file pairs configured. Add file pairs to FILE_PAIRS list above.


In [8]:
# Run batch check
if FILE_PAIRS:
    import pandas as pd
    
    batch_results = []
    
    for accounts_path, locations_path in FILE_PAIRS:
        try:
            result = MRIImportManager.check_missing_mapping_attributes(
                mapping_file_path=BATCH_MAPPING_PATH,
                accounts_file_path=accounts_path,
                locations_file_path=locations_path
            )
            
            batch_results.append({
                'accounts_file': result['accounts_file'],
                'locations_file': result['locations_file'],
                'account_columns': len(result['account_headers']),
                'missing_account': len(result['missing_account_attributes']),
                'location_columns': len(result['location_headers']),
                'missing_location': len(result['missing_location_attributes']),
                'status': 'MISSING' if result['has_missing'] else 'OK',
                'error': None
            })
        except Exception as e:
            batch_results.append({
                'accounts_file': os.path.basename(accounts_path),
                'locations_file': os.path.basename(locations_path),
                'account_columns': 0,
                'missing_account': 0,
                'location_columns': 0,
                'missing_location': 0,
                'status': 'ERROR',
                'error': str(e)
            })
    
    # Display results as DataFrame
    df = pd.DataFrame(batch_results)
    print("Batch Check Results:")
    print("="*80)
    display(df)
    
    # Summary
    ok_count = len([r for r in batch_results if r['status'] == 'OK'])
    missing_count = len([r for r in batch_results if r['status'] == 'MISSING'])
    error_count = len([r for r in batch_results if r['status'] == 'ERROR'])
    
    print()
    print(f"Summary: {ok_count} OK, {missing_count} with missing attributes, {error_count} errors")
else:
    print("Skipped - no file pairs configured.")

Skipped - no file pairs configured.
