In [0]:
# - Jason Votaw 2025
# You need to run these to install the required packages (only once)
# %pip install pandas openpyxl
# %pip install faker

import pandas as pd
import random
from faker import Faker
from datetime import datetime
from collections import defaultdict

fake = Faker()
random.seed(42)

# Use the actual CNA_HCC codes from your HCC_LIST
CNA_HCC_CODES = [
    "CNA_HCC1", "CNA_HCC2", "CNA_HCC6", "CNA_HCC17", "CNA_HCC18", "CNA_HCC19", "CNA_HCC20", 
    "CNA_HCC21", "CNA_HCC22", "CNA_HCC23", "CNA_HCC35", "CNA_HCC36", "CNA_HCC37", "CNA_HCC38", 
    "CNA_HCC48", "CNA_HCC49", "CNA_HCC50", "CNA_HCC51", "CNA_HCC62", "CNA_HCC63", "CNA_HCC64", 
    "CNA_HCC65", "CNA_HCC68", "CNA_HCC77", "CNA_HCC78", "CNA_HCC79", "CNA_HCC80", "CNA_HCC81", 
    "CNA_HCC92", "CNA_HCC93", "CNA_HCC94", "CNA_HCC107", "CNA_HCC108", "CNA_HCC109", "CNA_HCC111", 
    "CNA_HCC112", "CNA_HCC114", "CNA_HCC115", "CNA_HCC125", "CNA_HCC126", "CNA_HCC127", "CNA_HCC135", 
    "CNA_HCC136", "CNA_HCC137", "CNA_HCC138", "CNA_HCC139", "CNA_HCC151", "CNA_HCC152", "CNA_HCC153", 
    "CNA_HCC154", "CNA_HCC155", "CNA_HCC180", "CNA_HCC181", "CNA_HCC182", "CNA_HCC190", "CNA_HCC191", 
    "CNA_HCC192", "CNA_HCC193", "CNA_HCC195", "CNA_HCC196", "CNA_HCC197", "CNA_HCC198", "CNA_HCC199", 
    "CNA_HCC200", "CNA_HCC201", "CNA_HCC202", "CNA_HCC211", "CNA_HCC212", "CNA_HCC213", "CNA_HCC221", 
    "CNA_HCC222", "CNA_HCC223", "CNA_HCC224", "CNA_HCC225", "CNA_HCC226", "CNA_HCC227", "CNA_HCC228", 
    "CNA_HCC229", "CNA_HCC238", "CNA_HCC248", "CNA_HCC249", "CNA_HCC253", "CNA_HCC254", "CNA_HCC263", 
    "CNA_HCC264", "CNA_HCC267", "CNA_HCC276", "CNA_HCC277", "CNA_HCC278", "CNA_HCC279", "CNA_HCC280", 
    "CNA_HCC282", "CNA_HCC283", "CNA_HCC298", "CNA_HCC300", "CNA_HCC326", "CNA_HCC327", "CNA_HCC328", 
    "CNA_HCC329", "CNA_HCC379", "CNA_HCC380", "CNA_HCC381", "CNA_HCC382", "CNA_HCC383", "CNA_HCC385", 
    "CNA_HCC387", "CNA_HCC397", "CNA_HCC398", "CNA_HCC399", "CNA_HCC401", "CNA_HCC402", "CNA_HCC405", 
    "CNA_HCC409", "CNA_HCC454", "CNA_HCC463"
]

RECORD_TYPE = "D"  # Using 'D' as shown in PDF examples

# Age group mapping (1-7 digits as per MOR specification)
def get_age_group_digit(birth_date, current_year=2023):
    """Calculate age and return appropriate age group digit (1-7)"""
    birth_year = int(birth_date[:4])
    age = current_year - birth_year
    
    if 65 <= age <= 69:
        return '1'
    elif 70 <= age <= 74:
        return '2'
    elif 75 <= age <= 79:
        return '3'
    elif 80 <= age <= 84:
        return '4'
    elif 85 <= age <= 89:
        return '5'
    elif 90 <= age <= 94:
        return '6'
    elif age >= 95:
        return '7'
    else:
        return '1'  # Default fallback

def get_sex_digit(sex):
    """Convert sex to digit: Male=1, Female=2"""
    return '1' if sex == 'M' else '2'

def generate_mor_flat_records(num_members=100, year=2023, hcc_prevalence=0.3):
    records = []
    
    # Generate header record
    contract_id = "H1234"
    run_date = "20190407"
    payment_month = "201905"
    
    header_line = f"1{contract_id}{run_date}{payment_month}"
    records.append(header_line)
    
    for i in range(num_members):
        mbi = f"A{str(i+111111111):>10}"[:11]  # Format like A111111111
        sex = random.choice(["M", "F"])
        
        # Generate names based on sex
        if sex == "M":
            first_name = fake.first_name_male().upper().ljust(12)[:12]
        else:
            first_name = fake.first_name_female().upper().ljust(12)[:12]
            
        last_name = fake.last_name().upper().ljust(12)[:12]
        mi = fake.random_uppercase_letter()
        dob = fake.date_of_birth(minimum_age=65, maximum_age=90).strftime('%Y%m%d')
        
        # Get demographic indicators as per MOR spec
        sex_digit = get_sex_digit(sex)
        age_group_digit = get_age_group_digit(dob, year)
        
        # Determine if this patient has HCCs
        has_hccs = random.random() < hcc_prevalence
        if has_hccs:
            num_hccs = random.randint(1, 3)
            member_hccs = set(random.sample(CNA_HCC_CODES, num_hccs))
        else:
            member_hccs = set()
        
        # Create HCC indicator string (103 characters for all CNA_HCC codes)
        hcc_indicators = ''
        for hcc_code in CNA_HCC_CODES:
            hcc_indicators += '1' if hcc_code in member_hccs else '0'
        
        # Add padding to ensure we have exactly 103 HCC positions
        while len(hcc_indicators) < 103:
            hcc_indicators += '0'
        hcc_indicators = hcc_indicators[:103]  # Truncate if too long
        
        # Build the MOR flat file record according to CMS specification from PDF
        # Based on Figure 3 format: RECORD_TYPE + MBI + NAMES + DOB + DEMOGRAPHIC_INDICATORS + HCC_FLAGS
        line = (
            f"{RECORD_TYPE}"                    # Record Type (1 char)
            f"{mbi}"                            # MBI (11 chars) 
            f"{last_name}"                      # Last Name (12 chars)
            f"{first_name}"                     # First Name (12 chars)
            f"{mi}"                             # Middle Initial (1 char)
            f"{dob}"                            # Date of Birth (8 chars)
            f"{sex_digit}"                      # Sex indicator (1 char: 1=Male, 2=Female)
            f"{age_group_digit}"                # Age group indicator (1 char: 1-7)
            f"{'0' * 20}"                       # Demographic category indicators (20 chars)
            f"{hcc_indicators}"                 # HCC flags (103 chars)
            f"{'0' * 30}"                       # Additional indicators/padding (30 chars)
        )
        
        records.append(line)
    
    return records

# Generate and write
records = generate_mor_flat_records(num_members=1000, hcc_prevalence=0.3)
with open("mor_mock_flatfile.txt", "w") as f:
    for line in records:
        f.write(line + "\n")
        
print("Mock MOR flat file generated conforming to CMS MOR specification.")
print("Format matches PDF examples:")
print("- Header record with contract, run date, payment month")
print("- Sex indicator: 1=Male, 2=Female") 
print("- Age group indicator: 1=65-69, 2=70-74, 3=75-79, 4=80-84, 5=85-89, 6=90-94, 7=95+")
print(f"- {len(CNA_HCC_CODES)} CNA_HCC flags (0 or 1 for each HCC)")
print("- Fixed-width format matching MOR data file specification from PDF")
print(f"Generated {len(records)} total records (1 header + {len(records)-1} member records)")

Mock MOR flat file generated conforming to CMS MOR specification.
Format matches PDF examples:
- Header record with contract, run date, payment month
- Sex indicator: 1=Male, 2=Female
- Age group indicator: 1=65-69, 2=70-74, 3=75-79, 4=80-84, 5=85-89, 6=90-94, 7=95+
- 115 CNA_HCC flags (0 or 1 for each HCC)
- Fixed-width format matching MOR data file specification from PDF
Generated 1001 total records (1 header + 1000 member records)
