### CDC Low Birth Weight

Able to find county-level data for 2019 - by mother's race and ethnicity. 

Centers for Disease Control and Prevention, National Center for Health Statistics. National Vital Statistics System, Natality on CDC WONDER Online Database. Data are from the Natality Records 2016-2020, as compiled from data provided by the 57 vital statistics jurisdictions through the Vital Statistics Cooperative Program. Accessed at http://wonder.cdc.gov/natality-expanded-current.html on Jul 7, 2022 11:46:06 AM

Note: low birth weight is defined as under 2500 grams at birth

### Low Birth Weight Percent by Mother's Race and County

In [1]:
import os
if 'COLAB_GPU' in os.environ:
    from google.colab import  drive
    drive.mount('/drive')
    data_path = '/drive/Shared drives/Capstone/notebooks/data'
else:
    data_path = 'data'


In [2]:
import pandas as pd
import numpy as np

def clean_input(data, dropping, low_weight):
    cdc_df = pd.read_csv(data)
    cdc_df.drop(dropping, axis=1,inplace=True)
    
    cdc_df['low_birth_weight'] = np.where(cdc_df['Infant Birth Weight 12'].isin(low_weight), 'low', 'not low')
    cdc_df = cdc_df.dropna(axis='rows')
    cdc_df.drop("Infant Birth Weight 12", axis=1,inplace=True)
    return cdc_df

#this is the data broken down by mother's race
data = f'{data_path}/external/natality_2019_race.csv'

to_drop = ['Notes','Infant Birth Weight 12 Code', "Mother's Single Race 6 Code", #'Year',
 'Year Code','Average Birth Weight (grams)']


low_weight = ['499 grams or less', '500 - 999 grams', '1000 - 1499 grams', '1500 - 1999 grams',
       '2000 - 2499 grams']



cdc_df = clean_input(data, to_drop, low_weight)

cdc_df.head()


Unnamed: 0,County of Residence,County of Residence Code,Mother's Single Race 6,Year,Births,low_birth_weight
0,"Jefferson County, AL",1073.0,Black or African American,2019.0,18.0,low
1,"Mobile County, AL",1097.0,Black or African American,2019.0,20.0,low
2,"Montgomery County, AL",1101.0,Black or African American,2019.0,10.0,low
3,"Tuscaloosa County, AL",1125.0,Black or African American,2019.0,10.0,low
4,"Unidentified Counties, AL",1999.0,Black or African American,2019.0,21.0,low


In [3]:
# only have data for 626 counties
len(cdc_df['County of Residence Code'].unique())


626

In [4]:
race1 = "Mother's Single Race 6"

#one row per county broken down by race
def create_pivot(df, race_ethnicity=False):
    grouping = ['low_birth_weight','County of Residence','County of Residence Code']
    if race_ethnicity:
        grouping.append(race_ethnicity)

    simple_df = df.groupby(grouping).sum().reset_index()
    
    grouping2 = ['low_birth_weight']
    if race_ethnicity:
        grouping2.append(race_ethnicity)

    cdc_pivot = pd.pivot_table(simple_df, values='Births', index=["County of Residence",'County of Residence Code'],
                    columns=grouping2, aggfunc=np.sum)
    if race_ethnicity:
        races = list(df[race_ethnicity].unique())

        for race in races: #calc percent low birth rate
            cdc_pivot[race+'_low_birth_rate']=cdc_pivot[('low', race)]/(cdc_pivot[('low', race)]+cdc_pivot[('not low', race)])

        for birth_status in ['low', 'not low']:
            for race in races:
                cdc_pivot.drop((birth_status,race), axis='columns', inplace=True)
    else:
        cdc_pivot['low_birth_rate']=cdc_pivot['low']/(cdc_pivot['low']+cdc_pivot['not low'])
        for birth_status in ['low', 'not low']:
            cdc_pivot.drop(birth_status, axis='columns', inplace=True)

    cdc_pivot = cdc_pivot.reset_index()

    return(cdc_pivot)


race_pivot = create_pivot(cdc_df, race1)
race_pivot.columns = race_pivot.columns.droplevel(1)
race_pivot.head()

low_birth_weight,County of Residence,County of Residence Code,Black or African American_low_birth_rate,White_low_birth_rate,Asian_low_birth_rate,More than one race_low_birth_rate,American Indian or Alaska Native_low_birth_rate,Native Hawaiian or Other Pacific Islander_low_birth_rate
0,"Ada County, ID",16001.0,,0.055351,0.057803,0.075758,,
1,"Adams County, CO",8001.0,0.106618,0.089228,0.101266,0.076433,,
2,"Adams County, PA",42001.0,,0.057953,,,,
3,"Aiken County, SC",45003.0,0.145228,0.065343,,,,
4,"Alachua County, FL",12001.0,0.176259,0.072773,,,,


## Low birth weight percent by ethnicity

Used https://wonder.cdc.gov/controller/datarequest/D149;jsessionid=007BD96F2FDB2F19A7F3DE2DB56D

See screenshot for specific selections

In [5]:
data1 = f'{data_path}/external/natality_2019_ethnicity.csv'

to_drop = ['Notes','Infant Birth Weight 12 Code', # 'Year',
 'Year Code','Average Birth Weight (grams)',"Mother's Hispanic Origin Code"]


low_weight = ['499 grams or less', '500 - 999 grams', '1000 - 1499 grams', '1500 - 1999 grams',
       '2000 - 2499 grams']


eth_df = clean_input(data1, to_drop, low_weight)

eth_df.head()

Unnamed: 0,County of Residence,County of Residence Code,Mother's Hispanic Origin,Year,Births,low_birth_weight
0,"Jefferson County, AL",1073.0,Not Hispanic or Latino,2019.0,24.0,low
1,"Mobile County, AL",1097.0,Not Hispanic or Latino,2019.0,27.0,low
2,"Montgomery County, AL",1101.0,Not Hispanic or Latino,2019.0,11.0,low
3,"Tuscaloosa County, AL",1125.0,Not Hispanic or Latino,2019.0,13.0,low
4,"Unidentified Counties, AL",1999.0,Not Hispanic or Latino,2019.0,36.0,low


In [6]:
ethnicity = "Mother's Hispanic Origin"

#one row per county broken down by race



ethnicity_pivot = create_pivot(eth_df, ethnicity)

ethnicity_pivot.columns = ethnicity_pivot.columns.droplevel(1)

ethnicity_pivot.head()

low_birth_weight,County of Residence,County of Residence Code,Not Hispanic or Latino_low_birth_rate,Hispanic or Latino_low_birth_rate,Unknown or Not Stated_low_birth_rate
0,"Ada County, ID",16001.0,0.056522,0.042308,
1,"Adams County, CO",8001.0,0.097646,0.088296,0.1
2,"Adams County, PA",42001.0,0.058215,,
3,"Aiken County, SC",45003.0,0.103023,,
4,"Alachua County, FL",12001.0,0.109038,0.070632,


### Overall Low Birth Weight Rate by County

In [7]:
data3 = f'{data_path}/external/natality_2019_all.csv'

In [8]:

to_drop = ['Notes','Infant Birth Weight 12 Code', # 'Year',
 'Year Code','Average Birth Weight (grams)']


low_weight = ['499 grams or less', '500 - 999 grams', '1000 - 1499 grams', '1500 - 1999 grams',
       '2000 - 2499 grams']


overall_df = clean_input(data3, to_drop, low_weight)

overall_df.head()

Unnamed: 0,County of Residence,County of Residence Code,Year,Births,low_birth_weight
0,"Jefferson County, AL",1073.0,2019.0,25.0,low
1,"Mobile County, AL",1097.0,2019.0,27.0,low
2,"Montgomery County, AL",1101.0,2019.0,11.0,low
3,"Tuscaloosa County, AL",1125.0,2019.0,13.0,low
4,"Unidentified Counties, AL",1999.0,2019.0,36.0,low


In [9]:
len(overall_df['County of Residence Code'].unique())

626

In [10]:

overall_pivot = create_pivot(overall_df)

overall_pivot
two_dfs = pd.merge(overall_pivot,ethnicity_pivot,how='outer',on=['County of Residence Code','County of Residence'])
all_df = pd.merge(two_dfs,race_pivot,how='outer',on=['County of Residence Code','County of Residence'])

In [11]:
all_df

low_birth_weight,County of Residence,County of Residence Code,low_birth_rate,Not Hispanic or Latino_low_birth_rate,Hispanic or Latino_low_birth_rate,Unknown or Not Stated_low_birth_rate,Black or African American_low_birth_rate,White_low_birth_rate,Asian_low_birth_rate,More than one race_low_birth_rate,American Indian or Alaska Native_low_birth_rate,Native Hawaiian or Other Pacific Islander_low_birth_rate
0,"Ada County, ID",16001.0,0.057352,0.056522,0.042308,,,0.055351,0.057803,0.075758,,
1,"Adams County, CO",8001.0,0.094523,0.097646,0.088296,0.1,0.106618,0.089228,0.101266,0.076433,,
2,"Adams County, PA",42001.0,0.059977,0.058215,,,,0.057953,,,,
3,"Aiken County, SC",45003.0,0.103391,0.103023,,,0.145228,0.065343,,,,
4,"Alachua County, FL",12001.0,0.106652,0.109038,0.070632,,0.176259,0.072773,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
621,"Yolo County, CA",6113.0,0.057617,0.046888,0.046936,,,0.051249,0.040956,,,
622,"York County, ME",23031.0,0.070667,0.071054,,,,0.062350,,,,
623,"York County, PA",42133.0,0.087653,0.084101,0.103506,,0.122407,0.080314,,0.087432,,
624,"York County, SC",45091.0,0.094486,0.095652,0.053922,,0.158967,0.062941,,,,


In [12]:


all_df = all_df.reset_index()


all_df.to_csv(f'{data_path}/interim/county_low_birth.csv', index=False)



<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=96aa4e71-9142-4f1e-8c2a-54e13d3b1860' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>