In [13]:
import pandas as pd
import plotly.express as px

In [7]:
us_births = pd.read_csv('https://raw.githubusercontent.com/avt1993/US-Births-2016-2021/main/us_births_2016_2021.csv')
us_births.head()
us_births.dtypes

State                             object
State Abbreviation                object
Year                               int64
Gender                            object
Education Level of Mother         object
Education Level Code               int64
Number of Births                   int64
Average Age of Mother (years)    float64
Average Birth Weight (g)         float64
dtype: object

In [3]:
education_level = us_births['Education Level of Mother'].unique().tolist()
education_level

['8th grade or less',
 '9th through 12th grade with no diploma',
 'High school graduate or GED completed',
 'Some college credit, but not a degree',
 'Associate degree (AA, AS)',
 "Bachelor's degree (BA, AB, BS)",
 "Master's degree (MA, MS, MEng, MEd, MSW, MBA)",
 'Doctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD)',
 'Unknown or Not Stated']

In [6]:
year_selected = 2020
state_selected = "California"
ed_level_code_selected = 1
grouped_df = us_births[us_births['Education Level Code'] == 1]
filtered_df = us_births[(us_births['State'] == state_selected) & (us_births['Year'] == year_selected) & (us_births['Education Level Code'] == ed_level_code_selected)]

new_df = us_births[us_births['Education Level Code'] == 1].groupby(['State', 'Year'])['Number of Births'].sum().reset_index(name = 'Total Births')
new_df


Unnamed: 0,State,Year,Total Births
0,Alabama,2016,2240
1,Alabama,2017,1990
2,Alabama,2018,1856
3,Alabama,2019,2350
4,Alabama,2020,2294
...,...,...,...
301,Wyoming,2017,80
302,Wyoming,2018,80
303,Wyoming,2019,59
304,Wyoming,2020,63


In [15]:
state_condition = us_births['State'] == 'Alabama'
year_condition = us_births['Year'].between(2017, 2020)
ed_condition = us_births['Education Level of Mother'] == '8th grade or less'
filter_df = us_births[state_condition & year_condition & ed_condition].groupby('Year')['Number of Births'].sum().reset_index()
filter_df

Unnamed: 0,Year,Number of Births
0,2017,1990
1,2018,1856
2,2019,2350
3,2020,2294


In [16]:
fig = px.bar(filter_df, x = 'Year', y = 'Number of Births')

fig.update_layout(
    title='Number of Births by Year'
)
fig.show()

In [26]:
us_births = us_births.rename(columns = {'State Abbreviation ': 'State_Code', 'Average Age of Mother (years)': 'Avg. Age of Mother', 'Average Birth Weight (g)' : 'Avg. Birth Weight in Grams'})
us_births.head(30)

Unnamed: 0,State,State Abbreviation,Year,Gender,Education Level of Mother,Education Level Code,Number of Births,Avg. Age of Mother,Avg. Birth Weight in Grams
0,Alabama,AL,2016,F,8th grade or less,1,1052,27.8,3116.9
1,Alabama,AL,2016,F,9th through 12th grade with no diploma,2,3436,24.1,3040.0
2,Alabama,AL,2016,F,High school graduate or GED completed,3,8777,25.4,3080.0
3,Alabama,AL,2016,F,"Some college credit, but not a degree",4,6453,26.7,3121.9
4,Alabama,AL,2016,F,"Associate degree (AA, AS)",5,2227,28.9,3174.3
5,Alabama,AL,2016,F,"Bachelor's degree (BA, AB, BS)",6,4453,30.3,3239.0
6,Alabama,AL,2016,F,"Master's degree (MA, MS, MEng, MEd, MSW, MBA)",7,1910,32.0,3263.5
7,Alabama,AL,2016,F,"Doctorate (PhD, EdD) or Professional Degree (M...",8,487,33.1,3196.7
8,Alabama,AL,2016,F,Unknown or Not Stated,-9,65,27.7,3083.9
9,Alabama,AL,2016,M,8th grade or less,1,1188,27.6,3232.9
