In [2]:
import altair as alt
import pandas as pd
import numpy as np
from vega_datasets import data

### READ IN RAW DATA ###
df_nat = pd.read_csv('2021natality_clean.csv')
df_map = pd.read_csv('merged_maternal_morbidity.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
### DATA FIONA ###
# rename columns of interest with long names
mm_county = df_map.rename(columns={"Average Age of Mother (years)_MM": "Avg_Age_MM", "% of Total Births_MM": "percent_MM"})

# reformat state and county code to match the vega dataset county ids
state_code = []
county_code = []
for i in range(len(mm_county)):
    if len(str(mm_county["County of Residence Code"][i])) == 6:
        state_code.append(str(mm_county["County of Residence Code"][i])[:1])
        county_code.append(str(mm_county["County of Residence Code"][i])[1:4])
    if len(str(mm_county["County of Residence Code"][i])) == 7:
        state_code.append(str(mm_county["County of Residence Code"][i])[:2])
        county_code.append(str(mm_county["County of Residence Code"][i])[2:5])
        
mm_id = []
for i in range(len(mm_county)):
    mm_id.append(int(state_code[i] + county_code[i]))

# change the percentage column from a string to floats
new_percent = []
for i in range(len(mm_county)):
    percent = mm_county["percent_MM"][i]
    new_percent.append(float(percent[:-1]))
    
# change county id and percent columns to the newly formatted columns from above
mm_county["county-id"] = mm_id
mm_county["percent_MM"] = new_percent

In [7]:
# Save Fiona's Data
mm_county.to_csv('data/mm_county_fiona.csv', index = False)

In [9]:
### DATA ISABEL ###
subset = df_nat[['dob_yy', 'dob_mm', 'dob_tt', 'no_mmorb', 'mm_mtr', 'mm_plac', 'mm_rupt', 'mm_uhyst', 'mm_aicu']]
morb = subset[['dob_mm', 'no_mmorb', 'dob_tt']]

# group and filter out unknown values
morb_group = morb.groupby(['dob_mm', 'no_mmorb']).count()
morb_group = morb_group.reset_index()
morb_group = morb_group.loc[morb_group['no_mmorb'] != 9]

# recode
morb_group['no_mmorb'] = morb_group['no_mmorb'].map({1: 'Survival', 0: 'Death'})
morb_group['dob_mm'] = morb_group['dob_mm'].map({1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May',
                                                6: 'June', 7: 'July', 8: 'August', 9: 'September', 10: 'October',
                                                11: 'November', 12: 'December'})

# filter unknown times and outcomes
morb = morb.loc[morb['dob_tt'] != 9999]
morb = morb.loc[morb['no_mmorb'] != 9]

# create bins to count the number of births per hour
morb['bins'] = pd.cut(x=morb['dob_tt'], bins=[0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 
                                             1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 
                                             2100, 2200, 2300, 2400], labels=['0:00-1:00', '1:01-2:00', '2:01-3:00', '3:01-4:00', '4:01-5:00',
                         '5:01-6:00', '6:01-7:00', '7:01-8:00', '8:01-9:00', '9:01-10:00', '10:01-11:00',
                         '11:01-12:00', '12:01-13:00', '13:01-14:00', '14:01-15:00', '15:01-16:00',
                         '16:01-17:00', '17:01-18:00', '18:01-19:00', '19:01-20:00', '20:01-21:00',
                         '21:01-22:00', '22:01-23:00', '23:01-24:00'])

morb['dob_mm'] = morb['dob_mm'].map({1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May',
                                                6: 'June', 7: 'July', 8: 'August', 9: 'September', 10: 'October',
                                                11: 'November', 12: 'December'})

morb_group2 = morb.groupby(['bins', 'dob_mm', 'no_mmorb']).count()
morb_group2 = morb_group2.reset_index()
morb_group2['no_mmorb'] = morb_group2['no_mmorb'].map({1: 'Survival', 0: 'Death'})

In [12]:
# Save Isabel's Data
morb_group.to_csv('data/morb_group_isabel.csv', index = False)
morb_group2.to_csv('data/morb_group2_isabel.csv', index = False)

In [36]:
### DATA ALICE ###
df_natality_clean = df_nat[df_nat['no_mmorb'] == 0]

df_natality_clean1 = df_natality_clean[['no_infec', 'rf_gdiab', 'rf_ghype', 'mm_mtr', 
                                        'mm_plac', 'mm_rupt', 'mm_uhyst', 'mm_aicu']]

for i in ['mm_mtr', 'mm_plac', 'mm_rupt', 'mm_uhyst', 'mm_aicu', 'rf_gdiab', 'rf_ghype']:
    df_natality_clean1[i] = df_natality_clean1[i] == 'Y'

df_natality_clean2 = pd.melt(
     df_natality_clean1, id_vars = ['no_infec', 'rf_gdiab', 'rf_ghype'], 
     var_name = 'Outcome', value_name = 'yn')

df_natality_clean2['yn'] = df_natality_clean2['yn'].astype(int)

df_natality_clean2 = df_natality_clean2.rename(columns = {'no_infec': 'No Infection', 
                                     'rf_gdiab': 'Gestational Diabetes', 
                                     'rf_ghype': 'Gestational Hypertension'})

df_natality_clean2['Outcome'] = df_natality_clean2['Outcome'].map({'mm_mtr': 'Maternal Transfusion', 
                                                                   'mm_plac': 'Perineal Laceration', 
                                                                   'mm_rupt': 'Ruptured Uterus', 
                                                                   'mm_uhyst': 'Unplanned Hysterectomy', 
                                                                   'mm_aicu': 'Admit to ICU'})

df_grouped1 = df_natality_clean2.groupby(['No Infection', 'Gestational Diabetes', 'Gestational Hypertension', 'Outcome'], 
    group_keys = False).sum().apply(lambda x: x).reset_index()

df_melt1 = pd.melt(
     df_natality_clean1, id_vars = ['mm_mtr', 'mm_plac', 'mm_rupt', 'mm_uhyst', 'mm_aicu'], 
     var_name = 'Risk Factor', value_name = 'Count'
)
df_melt1['Count'] = df_melt1['Count'].astype(int)
df_grouped2 = df_melt1.groupby('Risk Factor').sum().reset_index()

df_grouped2['Risk Factor'] = df_grouped2['Risk Factor'].map({'no_infec': 'No Infection', 
                                                     'rf_gdiab': 'Gestational Diabetes', 
                                                     'rf_ghype': 'Gestational Hypertension'})

df_sum_counts = df_grouped2[['Risk Factor', 'Count']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [55]:
df_natality_clean2['No Infection'] = df_natality_clean2['No Infection'].map({0: 'Infection', 
                                                                             1: 'No Infection'})
df_natality_clean2['Gestational Diabetes'] = df_natality_clean2['Gestational Diabetes'].map({False: 'No Gest. Diabetes', 
                                                                                             True: 'Gest. Diabetes'})
df_natality_clean2['Gestational Hypertension'] = df_natality_clean2['Gestational Hypertension'].map({False: 'No Gest. Hypertension', 
                                                                                                     True: 'Gest. Hypertension'})

In [38]:
# Save Alice's Data
df_sum_counts.to_csv('data/sum_risk_alice.csv', index = False)
df_natality_clean2.to_csv('data/rate_risk_alice.csv', index = False)

In [41]:
# data risk averaging #
df_bw_risk = df_nat[['mager', 'mar_p', 'dmar',
                     'meduc', 'feduc', 'cig_0', 'dbwt']]

df_bw_risk_clean = df_bw_risk[df_bw_risk['mar_p'] != 'U']

df_bw_risk_clean.loc[:,'mage_cat'] = np.where(df_bw_risk_clean['mager'] < 17, '<20',
                               np.where(df_bw_risk_clean['mager'] < 26, '20-25',
                               np.where(df_bw_risk_clean['mager'] < 31, '26-30',
                               np.where(df_bw_risk_clean['mager'] < 36, '31-35',
                               np.where(df_bw_risk_clean['mager'] < 41, '36-40',
                               np.where(df_bw_risk_clean['mager'] < 46, '41-45', '46-50'))))))

df_bw_risk_clean.loc[:,'cig_cat'] = np.where(df_bw_risk_clean['cig_0'] < 1, 'none', 
                                    np.where(df_bw_risk_clean['cig_0'] < 5, '<5',
                                    np.where(df_bw_risk_clean['cig_0'] < 11, '5-10',
                                    np.where(df_bw_risk_clean['cig_0'] < 21, '11-20',
                                    np.where(df_bw_risk_clean['cig_0'] < 41, '21-40', '41+')))))

df_bw_risk_clean1 = df_bw_risk_clean.drop(columns = ['mager', 'cig_0'])

df_bw1 = df_bw_risk_clean1.groupby(['mage_cat', 'mar_p', 'dmar', 'meduc', 'feduc', 'cig_cat'], 
                                     group_keys=False).mean().apply(lambda x: x).reset_index()

#marital status
df_bw1['dmar'] = df_bw1['dmar'].map({1.0: 'Married',
                                     2.0: 'Unmarried'})
#meduc
#['< 8th grade', '9-12th grade', 'High school graduate/GED',
#'Some college', 'Associate', "Bachelor's", "Master's", 'Doctorate/Profession', 'Unknown']
df_bw1['meduc'] = df_bw1['meduc'].map({1: '< 8th grade',
                                       2: '9-12th grade',
                                       3: 'High school graduate/GED',
                                       4: 'Some college',
                                       5: 'Associate', 
                                       6: "Bachelor's", 7: "Master's", 
                                       8: 'Doctorate/Profession', 
                                       9: 'Unknown'})
#feduc
df_bw1['feduc'] = df_bw1['feduc'].map({1: '< 8th grade',
                                       2: '9-12th grade',
                                       3: 'High school graduate/GED',
                                       4: 'Some college',
                                       5: 'Associate', 
                                       6: "Bachelor's", 7: "Master's", 
                                       8: 'Doctorate/Profession', 
                                       9: 'Unknown'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [43]:
# Save Alice's Data part 2
df_bw1.to_csv('data/bw_risk_alice.csv', index = False)