## What is in the notebook:

This notebook gathers together state maternal mortality data, health-insurance data, and income data for states between 2010-2019. It also gathers together various natility data from 2010-2019.

In [2]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

### Loading Data

#### CDC Maternal mortality data

In [14]:
maternal_mortality_df = pd.read_csv("..\\data\\cdc_data\\all_states_all_codes_by_year.csv")
maternal_mortality_df = maternal_mortality_df[["State", "Year", "Deaths", "Population"]]
print(maternal_mortality_df)

          State  Year  Deaths  Population
0       Alabama  1999      10     1801848
1       Alabama  2005      12     1856905
2       Alabama  2006      10     1880993
3       Alabama  2015      12     1929191
4       Alabama  2016      35     1920571
...         ...   ...     ...         ...
1237  Wisconsin  1984       3     2421197
1238  Wisconsin  1985       4     2427815
1239  Wisconsin  1986       3     2430990
1240  Wisconsin  1987       3     2442233
1241  Wisconsin  1988       6     2463839

[1242 rows x 4 columns]


#### ACS Income Data

This data frame needs to be pivoted.

In [4]:
income_df = pd.read_csv("..\\data\\acs_income_by_year\\all_income_from_2010_to_2023.csv")
income_df = income_df[income_df.columns[1:]]
income_df_pivot = pd.pivot(data = income_df,  index ='Year', columns= 'label')
income_df = income_df_pivot.stack(level=0)
income_df.reset_index(inplace=True)
income_df.rename(columns = {"level_1" : "State"},inplace= True)
income_df.head()


  income_df = income_df_pivot.stack(level=0)


label,Year,State,"$10,000 to $14,999","$100,000 to $149,999","$15,000 to $24,999","$150,000 to $199,999","$200,000 or more","$25,000 to $34,999","$35,000 to $49,999","$50,000 to $74,999","$75,000 to $99,999","Less than $10,000",Total
0,2010,Alabama,291136,462295,568262,155190,110550,515050,683128,805464,487371,418610,4499250
1,2010,Alaska,24791,122561,46620,45879,36225,52474,73631,121399,89373,23792,637189
2,2010,Arizona,301174,694479,620656,213517,196895,663570,884336,1119302,695469,414482,5802329
3,2010,Arkansas,186755,243784,379359,68553,56348,371449,442926,508686,289906,236611,2784750
4,2010,California,1486323,4692266,2969203,2048096,2047855,2756878,3942244,5360641,3913537,1690857,30895516


#### Natality Data

In [21]:
natility_df = pd.read_csv("StateNatalityDF.csv")
natility_df.head()

Unnamed: 0,State,Year,Births,ChronicHypertensionBirths,DiabetesBirths,EnclampsiaBirths,Pregnancy-AssociatedHypertensionBirths,TobaccoUseBirths,HospitalBirths,BirthCenterBirths,...,NotReportedInitialCare,Under20Weeks,20-27Weeks,28-31Weeks,32-35Weeks,36Weeks,37-39Weeks,40Weeks,41Weeks,42PlusWeeks
0,Arizona,2010,87477,769.0,3694.0,769.0,3441.0,0.0,86346.0,437,...,87477.0,42.0,465,667.0,3579,3697.0,55528,18468,4671.0,337
1,Arizona,2011,85543,711.0,4014.0,772.0,3599.0,0.0,84380.0,454,...,85543.0,41.0,546,618.0,3421,3354.0,54646,18076,4482.0,321
2,Arizona,2012,86441,807.0,4452.0,634.0,3903.0,0.0,85177.0,505,...,86441.0,54.0,480,688.0,3476,3290.0,55425,18265,4441.0,271
3,Arizona,2013,85600,956.0,4919.0,604.0,4791.0,0.0,84450.0,459,...,85600.0,49.0,437,641.0,3430,3218.0,54667,18067,4787.0,261
4,Arizona,2014,86887,1039.0,6126.0,341.0,4567.0,4675.0,85628.0,511,...,0.0,35.0,474,664.0,3440,3206.0,55382,17920,5388.0,290


#### Merging Data Sets

In [22]:
merged_df = pd.merge(left = maternal_mortality_df, right = income_df, on = ["Year","State"], how = 'inner')
merged_df = pd.merge(left = merged_df, right = natility_df, on = ["Year", "State"], how = 'inner')
merged_df.shape

(150, 54)