In [16]:
import os
import requests

# URL of the dataset
url = 'https://data.chhs.ca.gov/dataset/b79b3447-4c10-4ae6-84e2-1076f83bb24e/resource/3340c5d7-4054-4d03-90e0-5f44290ed095/download/independent-medical-review-imr-determinations-trends.csv'

# Path to the data folder and the renamed CSV file
data_folder = 'data'
csv_filename = 'original.csv'
csv_filepath = os.path.join(data_folder, csv_filename)

if os.path.exists(csv_filepath):
    print(f"File already exists: {csv_filepath}")
else:

    # Create the data folder if it doesn't exist
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)

    # Download the CSV file and save it to the data folder
    response = requests.get(url)
    if response.status_code == 200:
        with open(csv_filepath, 'wb') as file:
            file.write(response.content)
        print(f"File downloaded and saved as {csv_filepath}")
    else:
        print(f"Failed to download the file. Status code: {response.status_code}")


File already exists: data/original.csv


In [17]:
import pandas as pd

# Read the downloaded file as a pandas DataFrame
df = pd.read_csv('data/original.csv')

# Display DataFrame
df

Unnamed: 0,ReferenceID,ReportYear,DiagnosisCategory,DiagnosisSubCategory,TreatmentCategory,TreatmentSubCategory,Determination,Type,AgeRange,PatientGender,IMRType,DaysToReview,DaysToAdopt,Findings
0,MN24-41564,2024,Endocrine/Metabolic,Obesity,Pharmacy,Weight Control,Overturned Decision of Health Plan,Medical Necessity,31 to 40,Female,Standard,19.0,41,Nature of Statutory Criteria/Case Summary: The...
1,EI24-41563,2024,Digestive System/ GI,Crohn's Disease,Diag Imag & Screen,Lab Work,Overturned Decision of Health Plan,Experimental/Investigational,51 to 64,Female,Standard,21.0,45,Findings: The physician reviewer found that N...
2,MN24-41562,2024,Cancer,Brain,Cancer Care,Other,Overturned Decision of Health Plan,Medical Necessity,41 to 50,Male,Standard,21.0,47,Findings: The physician reviewer found that N...
3,MN24-41561,2024,Endocrine/Metabolic,Diabetes,Pharmacy,Weight Control,Overturned Decision of Health Plan,Medical Necessity,51 to 64,Female,Expedited,2.0,10,Nature of Statutory Criteria/Case Summary: A p...
4,MN24-41560,2024,Cardiac/Circ Problem,Varicose Veins,Cardio-Vasc Proc,Vein Ablation,Upheld Decision of Health Plan,Medical Necessity,51 to 64,Male,Expedited,2.0,11,Nature of Statutory Criteria/Case Summary: The...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37611,MN01-600,2001,Morbid Obesity,Other,Gen Surg Proc,Gastric Bypass,Upheld Decision of Health Plan,Medical Necessity,,,Standard,18.0,44,A 34-year-old female requested authorization a...
37612,MN01-596,2001,Orth/Musculoskeletal,Other,Diag Imag & Screen,MRI,Upheld Decision of Health Plan,Medical Necessity,,,Standard,21.0,44,A 64-year-old female enrollee requested author...
37613,MN01-594,2001,Infectious Disease,Lyme Disease,Pharmacy,Antibiotics,Upheld Decision of Health Plan,Medical Necessity,,,Expedited,0.0,15,A 33-year-old female enrollee requested author...
37614,EI01-592,2001,Cancer,Other,Cancer Care,Clin Trial (I),Upheld Decision of Health Plan,Experimental/Investigational,,,Expedited,7.0,14,A 55-year-old male enrollee requested authoriz...


In [18]:
df = df.drop(columns=['IMRType', 'DaysToReview', 'DaysToAdopt'])
df

Unnamed: 0,ReferenceID,ReportYear,DiagnosisCategory,DiagnosisSubCategory,TreatmentCategory,TreatmentSubCategory,Determination,Type,AgeRange,PatientGender,Findings
0,MN24-41564,2024,Endocrine/Metabolic,Obesity,Pharmacy,Weight Control,Overturned Decision of Health Plan,Medical Necessity,31 to 40,Female,Nature of Statutory Criteria/Case Summary: The...
1,EI24-41563,2024,Digestive System/ GI,Crohn's Disease,Diag Imag & Screen,Lab Work,Overturned Decision of Health Plan,Experimental/Investigational,51 to 64,Female,Findings: The physician reviewer found that N...
2,MN24-41562,2024,Cancer,Brain,Cancer Care,Other,Overturned Decision of Health Plan,Medical Necessity,41 to 50,Male,Findings: The physician reviewer found that N...
3,MN24-41561,2024,Endocrine/Metabolic,Diabetes,Pharmacy,Weight Control,Overturned Decision of Health Plan,Medical Necessity,51 to 64,Female,Nature of Statutory Criteria/Case Summary: A p...
4,MN24-41560,2024,Cardiac/Circ Problem,Varicose Veins,Cardio-Vasc Proc,Vein Ablation,Upheld Decision of Health Plan,Medical Necessity,51 to 64,Male,Nature of Statutory Criteria/Case Summary: The...
...,...,...,...,...,...,...,...,...,...,...,...
37611,MN01-600,2001,Morbid Obesity,Other,Gen Surg Proc,Gastric Bypass,Upheld Decision of Health Plan,Medical Necessity,,,A 34-year-old female requested authorization a...
37612,MN01-596,2001,Orth/Musculoskeletal,Other,Diag Imag & Screen,MRI,Upheld Decision of Health Plan,Medical Necessity,,,A 64-year-old female enrollee requested author...
37613,MN01-594,2001,Infectious Disease,Lyme Disease,Pharmacy,Antibiotics,Upheld Decision of Health Plan,Medical Necessity,,,A 33-year-old female enrollee requested author...
37614,EI01-592,2001,Cancer,Other,Cancer Care,Clin Trial (I),Upheld Decision of Health Plan,Experimental/Investigational,,,A 55-year-old male enrollee requested authoriz...


In [19]:
from faker import Faker
import random

# Instantiate Faker
fake = Faker(locale='en_US')
fake.seed_instance(1234)


In [20]:
def fake_name(gender: str):
    if gender == 'Male':
        return fake.name_male()
    elif gender == 'Female':
        return fake.name_female()
    else:
        return fake.name()
    
def fake_age_within_range(age_range):
    if age_range == '0 to 10':
        return random.randint(0, 10)
    elif age_range == '11 to 20':
        return random.randint(11, 20)
    elif age_range == '21 to 30':
        return random.randint(21, 30)
    elif age_range == '31 to 40':
        return random.randint(31, 40)
    elif age_range == '41 to 50':
        return random.randint(41, 50)
    elif age_range == '51 to 64':
        return random.randint(51, 64)
    elif age_range == '65+':
        return random.randint(65, 100)  # Assuming maximum age of 100 for the example
    else:
        return None

def fake_blood_type():
    real_blood_types = ['A+', 'A-', 'B+', 'B-', 'O+', 'O-', 'AB+', 'AB-']
    
    # Randomly select and return a blood type
    return random.choice(real_blood_types)

def fake_insurance_provider():
    insurance_providers = [
        "UnitedHealth Group",
        "Kaiser Foundation",
        "Anthem Inc.",
        "Centene Corporation",
        "Humana",
        "CVS Health (Aetna)",
        "HCSC (Health Care Service Corporation)",
        "Cigna Health",
        "Molina Healthcare",
        "Independence Health Group",
        "GuideWell Mutual Holding",
        "WellCare",
        "Blue Cross Blue Shield",
        "Highmark",
        "Medicare",
        "Medicaid",
    ]

    return random.choice(insurance_providers)

def fake_consulting_physicians():
    consulting_physicians = ['Dr. Alexandria Gaines', 'Dr. Eddie Young', 'Dr. James Barber', 'Dr. Jerry Daniels', 'Dr. Michelle Lamb', 'Dr. Shelly Hunt']
    return random.choice(consulting_physicians)

In [21]:
# Add columns with fake data

df['PatientName'] = df['PatientGender'].apply(fake_name)
df['PatientAge'] = df['AgeRange'].apply(fake_age_within_range)
df['PatientPhone'] = df.apply(lambda _: fake.bothify(text='+1-###-###-####'), axis=1)
df['PatientAddress'] = df.apply(lambda _: fake.address(), axis=1)
df['PatientBloodType'] = df.apply(lambda _: fake_blood_type(), axis=1)
df['PatientSSN'] = df.apply(lambda _: fake.ssn(), axis=1)
df['PatientInsuranceProvider'] = df.apply(lambda _: fake_insurance_provider(), axis=1)
df['PatientInsuranceNumber'] = df.apply(lambda _: fake.bothify(text='?#??##?#?##?', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ'), axis=1)
df['ConsultingPhysician'] = df.apply(lambda _: fake_consulting_physicians(), axis=1)

In [22]:
df

Unnamed: 0,ReferenceID,ReportYear,DiagnosisCategory,DiagnosisSubCategory,TreatmentCategory,TreatmentSubCategory,Determination,Type,AgeRange,PatientGender,Findings,PatientName,PatientAge,PatientPhone,PatientAddress,PatientBloodType,PatientSSN,PatientInsuranceProvider,PatientInsuranceNumber,ConsultingPhysician
0,MN24-41564,2024,Endocrine/Metabolic,Obesity,Pharmacy,Weight Control,Overturned Decision of Health Plan,Medical Necessity,31 to 40,Female,Nature of Statutory Criteria/Case Summary: The...,Mrs. Adrienne Bartlett,38.0,+1-346-618-8073,"385 Johnson Terrace Suite 563\nCoreyland, LA 2...",B-,450-31-6466,Medicare,M6ZQ03U3M80S,Dr. Michelle Lamb
1,EI24-41563,2024,Digestive System/ GI,Crohn's Disease,Diag Imag & Screen,Lab Work,Overturned Decision of Health Plan,Experimental/Investigational,51 to 64,Female,Findings: The physician reviewer found that N...,Lauren Powers,62.0,+1-034-435-2919,Unit 8509 Box 3674\nDPO AA 89104,AB-,365-48-3942,Cigna Health,M5ZP04O1X24U,Dr. Alexandria Gaines
2,MN24-41562,2024,Cancer,Brain,Cancer Care,Other,Overturned Decision of Health Plan,Medical Necessity,41 to 50,Male,Findings: The physician reviewer found that N...,Randall Young,47.0,+1-142-638-7824,"01604 Garcia Ports\nReedshire, VT 48487",B-,893-11-2424,GuideWell Mutual Holding,H2HP56W5L38K,Dr. Shelly Hunt
3,MN24-41561,2024,Endocrine/Metabolic,Diabetes,Pharmacy,Weight Control,Overturned Decision of Health Plan,Medical Necessity,51 to 64,Female,Nature of Statutory Criteria/Case Summary: A p...,Rachel Reed,51.0,+1-029-343-6913,"29007 David Shores\nIngramville, MH 56990",AB+,340-36-8472,HCSC (Health Care Service Corporation),S9AN49Z5E44D,Dr. Michelle Lamb
4,MN24-41560,2024,Cardiac/Circ Problem,Varicose Veins,Cardio-Vasc Proc,Vein Ablation,Upheld Decision of Health Plan,Medical Necessity,51 to 64,Male,Nature of Statutory Criteria/Case Summary: The...,Luis Hernandez,53.0,+1-570-685-8386,"22535 Alexander Knolls\nPort Susan, PW 24490",O-,374-35-3984,Medicare,C8EN14X7A17W,Dr. Jerry Daniels
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37611,MN01-600,2001,Morbid Obesity,Other,Gen Surg Proc,Gastric Bypass,Upheld Decision of Health Plan,Medical Necessity,,,A 34-year-old female requested authorization a...,Jason Smith,,+1-798-725-7327,"675 Chad Burgs\nLake Roger, MI 97790",O-,743-97-5425,Cigna Health,N6EU89D2C07B,Dr. James Barber
37612,MN01-596,2001,Orth/Musculoskeletal,Other,Diag Imag & Screen,MRI,Upheld Decision of Health Plan,Medical Necessity,,,A 64-year-old female enrollee requested author...,Stephanie Brown,,+1-566-786-7248,44109 Anderson Flats Suite 297\nNorth Rhondasi...,AB-,235-13-5333,Medicare,D0TM23A6D77X,Dr. James Barber
37613,MN01-594,2001,Infectious Disease,Lyme Disease,Pharmacy,Antibiotics,Upheld Decision of Health Plan,Medical Necessity,,,A 33-year-old female enrollee requested author...,Dana Hancock,,+1-266-917-6728,"26667 Brandon Junction Suite 305\nNorth Tyler,...",B-,095-09-1775,UnitedHealth Group,U2DD29H7H16Y,Dr. Jerry Daniels
37614,EI01-592,2001,Cancer,Other,Cancer Care,Clin Trial (I),Upheld Decision of Health Plan,Experimental/Investigational,,,A 55-year-old male enrollee requested authoriz...,Alex Bean,,+1-094-495-3590,"PSC 8527, Box 5451\nAPO AE 55533",O-,078-34-8605,HCSC (Health Care Service Corporation),B6GB69B0U89J,Dr. Alexandria Gaines


In [23]:
df.to_csv('data/pii.csv', index=False)