# Imports

In [1]:
import pandas as pd

# Load cleaned data frame

In [2]:
df_age = pd.read_csv('processed_data/chexpert_plus_240401_cleaned_age.csv')
df_race = pd.read_csv('processed_data/chexpert_plus_240401_cleaned_race.csv')
df_sex = pd.read_csv('processed_data/chexpert_plus_240401_cleaned_sex.csv')
df_health = pd.read_csv('processed_data/chexpert_plus_240401_cleaned_health_insurance.csv')

# Create labels of sex, race, health insurances as category values (0,1,...)

In [3]:
for df in [df_age, df_race, df_health, df_sex]:
    # Define the mapping dictionary
    bias_var = {
        "sex": {"Male": 0, "Female": 1},
        "race": {"White": 0, "Asian": 1, "Black": 2},
        "insurance_type": {"Medicaid": 0, "Medicare": 1, "Private Insurance": 2}
    }

    # Create new columns based on the mappings
    df['sex'] = df['sex'].map(bias_var['sex'])
    df['race'] = df['race'].map(bias_var['race'])
    df['insurance_type'] = df['insurance_type'].map(bias_var['insurance_type'])

In [4]:
df.columns

Index(['path_to_image', 'path_to_dcm', 'frontal_lateral', 'ap_pa',
       'deid_patient_id', 'patient_report_date_order', 'report',
       'section_narrative', 'section_clinical_history', 'section_history',
       'section_comparison', 'section_technique', 'section_procedure_comments',
       'section_findings', 'section_impression', 'section_end_of_impression',
       'section_summary', 'section_accession_number', 'age', 'sex', 'race',
       'ethnicity', 'interpreter_needed', 'insurance_type', 'recent_bmi',
       'deceased', 'split', 'No Finding', 'Enlarged Cardiomediastinum',
       'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation',
       'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
       'Pleural Other', 'Fracture', 'Support Devices'],
      dtype='object')

In [4]:
df_age = df_age[['path_to_image', 'path_to_dcm', 'age', 'No Finding', 'Enlarged Cardiomediastinum',
       'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation',
       'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
       'Pleural Other', 'Fracture', 'Support Devices']]

df_sex = df_sex[['path_to_image', 'path_to_dcm', 'sex', 'No Finding', 'Enlarged Cardiomediastinum',
       'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation',
       'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
       'Pleural Other', 'Fracture', 'Support Devices']]

df_race = df_race[['path_to_image', 'path_to_dcm', 'race', 'No Finding', 'Enlarged Cardiomediastinum',
       'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation',
       'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
       'Pleural Other', 'Fracture', 'Support Devices']]

df_health = df_health[['path_to_image', 'path_to_dcm','insurance_type', 'No Finding', 'Enlarged Cardiomediastinum',
       'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation',
       'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
       'Pleural Other', 'Fracture', 'Support Devices']]

# Save dataframe

In [8]:
df_sex.to_csv('final_data/chexpert_plus_240401_cleaned_label_sex.csv')
df_race.to_csv('final_data/chexpert_plus_240401_cleaned_label_race.csv')
df_age.to_csv('final_data/chexpert_plus_240401_cleaned_label_age.csv')
df_health.to_csv('final_data/chexpert_plus_240401_cleaned_label_health.csv')