Determine Vaccination Rate across the nation - Yi
Vaccination rate by state, age, sex
Population picture (Education level, population density etc)
Find the correlation between vaccination rate and the population picture, discover the driver of the rate

Percent of the Total Population with at Least One Dose by States

In [78]:
import pandas as pd
from sodapy import Socrata # This is the database that contain all the CDC covid vacction information
import plotly.graph_objects as go


In [79]:
client = Socrata("data.cdc.gov", None) #Pulling CDC data from Socrata

# Filter to only 2023/03/15 data, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("unsk-b7fc", where="date='2023-03-15T00:00:00.000'")

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)



In [80]:
#Filter only US states and exclue other jurisdictions
state_set = ["AK","AL","AR","AZ","CA","CO","CT","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY"]
results_df = results_df[results_df["location"].isin(state_set)]
len(results_df)

50

In [81]:
#Transform the column data type to string

for col in results_df.columns:
    results_df[col] = results_df[col].astype(str)


#Generate the heatmap

fig = go.Figure(data=go.Choropleth(
    locations=results_df['location'],
    z=results_df['administered_dose1_pop_pct'].astype(float),
    locationmode='USA-states',
    colorscale='Blues',
    autocolorscale=False,
    marker_line_color='white', # line markers between states
    colorbar_title="Percentage (%)"
))

#Format the map layout

fig.update_layout(
    title_text='Percent of the Total Population with at Least One Dose by States',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=False, # lakes
        ),
    mapbox_style="light",
)

# change background to transparent

fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)'
})
fig.show()

Population Density

In [82]:
# Read population density file
file = 'population density.csv'
pop = pd.read_csv(file, encoding="ISO-8859-1")
pop = pop[["#","State","Population Density (2022)"]]

pop.head()

Unnamed: 0,#,State,Population Density (2022)
0,1,NJ,1283.4
1,2,RI,1074.3
2,3,MA,919.82
3,4,CT,746.7
4,5,MD,648.84


In [83]:
# Create population density heat map

fig = go.Figure(data=go.Choropleth(
    locations=pop['State'],
    z=pop['Population Density (2022)'].astype(float),
    locationmode='USA-states',
    colorscale='Oranges',
    autocolorscale=False,
    marker_line_color='white', # line markers between states
    colorbar_title="Population Density (per mi²)"
))

#Format the map layout

fig.update_layout(
    title_text='Population Density by States',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=False, # lakes 
        ),
        mapbox_style="light"
)

fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)'
})

fig.show()

Third Picture - Vaccination rate by Sex

In [84]:
# Filter to only 2023/03/15 data, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
sex_df = client.get("5i5k-6cmh", where="date='2023-03-15T00:00:00.000'", limit=10000)

# Convert to pandas DataFrame
sex_df = pd.DataFrame.from_records(sex_df)
sex_df.head()

Unnamed: 0,administered_dose1,administered_dose1_pct_agegroup,booster_doses,booster_doses_vax_pct_agegroup,census,date,demographic_category,location,second_booster,second_booster_vax_pct_agegroup,series_complete_pop_pct_agegroup,series_complete_yes
0,56124,85.3,27546.0,56.4,65787,2023-03-15T00:00:00.000,Female_Ages_50-64_yrs,ND,11970.0,43.5,74.3,48863.0
1,559450,81.1,278485.0,55.1,689738,2023-03-15T00:00:00.000,Female_Ages_50-64_yrs,TN,108100.0,38.8,73.3,505455.0
2,4842,6.0,96.0,3.9,80632,2023-03-15T00:00:00.000,Female_Ages_2-4_yrs,KY,,,3.1,2476.0
3,11,0.6,,,1983,2023-03-15T00:00:00.000,Female_Ages_2-4_yrs,VI,,,,
4,91271,95.0,43355.0,55.5,91824,2023-03-15T00:00:00.000,Male_Ages_25-49_yrs,VT,,,85.0,78080.0


In [85]:
ussex_df=sex_df[(sex_df["location"]=="US")&((sex_df["demographic_category"]=="Sex_Male")|(sex_df["demographic_category"]=="Sex_Female"))]

ussex_df["administered_dose1_pct_agegroup"] = ussex_df["administered_dose1_pct_agegroup"].astype(float)
# ussex_df[["demographic_category", "administered_dose1_pct_agegroup"]]





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [86]:
#Use plotly to draw the picture

import plotly.express as px
fig = px.bar(ussex_df, 
             x="demographic_category", 
             y=["administered_dose1_pct_agegroup"], 
             text_auto='.2s',
             title="Vaccination rate by Sex in US",
             labels={'demographic_category':'Gender',
                     "value":"Percentage %",
                     "variable":"Vaccination Rate"}
             )

fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)'
})

fig.show()

Fourth Picture - Geomap by sex difference

In [87]:
# Filter only demographic_category with Sex_Male and Sex_Female
allsex_df = sex_df[(sex_df["demographic_category"]=="Sex_Male")|(sex_df["demographic_category"]=="Sex_Female")]

# Only choose column "location","demographic_category", "administered_dose1_pct_agegroup"
allsex_df = allsex_df[["location","demographic_category", "administered_dose1_pct_agegroup"]]

# Change the data type of administered_dose1_pct_agegroup from string to float
allsex_df["administered_dose1_pct_agegroup"] = allsex_df["administered_dose1_pct_agegroup"].astype(float)

# Pivot Sex Male data and Sex Fale data
allsex_df = allsex_df.pivot(index = "location",columns="demographic_category",values="administered_dose1_pct_agegroup")

# Calculate the difference for each state
rate_difference = allsex_df["Sex_Male"] - allsex_df["Sex_Female"]
allsex_df["rate_difference"] = rate_difference

#Reset location as column
allsex_df = allsex_df.reset_index()

allsex_df.sort_values(by=["rate_difference"],ascending=True).head()



demographic_category,location,Sex_Female,Sex_Male,rate_difference
48,SD,86.4,77.8,-8.6
32,NC,95.0,86.9,-8.1
43,PA,91.8,84.2,-7.6
35,NH,90.8,83.8,-7.0
19,KS,79.1,72.8,-6.3


In [88]:
#Generate the heatmap

fig = go.Figure(data=go.Choropleth(
    locations=allsex_df['location'],
    z=allsex_df['rate_difference'].astype(float),
    locationmode='USA-states',
    # colorscale='Blues',
    autocolorscale=False,
    marker_line_color='white', # line markers between states
    colorbar_title="Percentage (%)"
))

#Format the map layout

fig.update_layout(
    # title_text='Covid Vaccine Rate Differece by Sex',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=False, # lakes 
        ),
        mapbox_style="light"
)

fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)'
})

fig.show()

In [104]:
# Filter Age group
age_group = ["Ages_<2yrs","Ages_<5yrs","Ages_2-4_yrs","Ages_5-11_yrs","Ages_12-17_yrs","Ages_18-24_yrs","Ages_25-39_yrs","Ages_40-49_yrs","Ages_50-64_yrs","Ages_65-74_yrs","Ages_75+_yrs"]
US = ["US"]
age_df = sex_df[sex_df["demographic_category"].isin(age_group)]
age_df = age_df[age_df["location"].isin(US)]

age_df.head()

Unnamed: 0,administered_dose1,administered_dose1_pct_agegroup,booster_doses,booster_doses_vax_pct_agegroup,census,date,demographic_category,location,second_booster,second_booster_vax_pct_agegroup,series_complete_pop_pct_agegroup,series_complete_yes
56,1899854,9.6,77840,7.6,19743791,2023-03-15T00:00:00.000,Ages_<5yrs,US,,,5.2,1018411
503,36260369,88.9,14985616,48.0,40789796,2023-03-15T00:00:00.000,Ages_40-49_yrs,US,,,76.6,31228362
974,60917319,95.0,30919776,58.0,63659835,2023-03-15T00:00:00.000,Ages_50-64_yrs,US,13468830.0,43.6,83.8,53323300
1014,626824,8.2,27643,8.6,7674438,2023-03-15T00:00:00.000,Ages_<2yrs,US,,,4.2,319611
1316,34977453,95.0,22124139,71.8,31886229,2023-03-15T00:00:00.000,Ages_65-74_yrs,US,12931944.0,58.5,95.0,30794789


In [113]:
#Choose columns needed
byage_df = age_df[["location","demographic_category", "administered_dose1_pct_agegroup"]]

# Change the data type of administered_dose1_pct_agegroup from string to float
byage_df["administered_dose1_pct_agegroup"] = byage_df["administered_dose1_pct_agegroup"].astype(float)

# Change column order
# byage_df= byage_df.set_index(["demographic_category"])

byage_df = pd.DataFrame(byage_df, index=[1014,1852,56,1435,1647,1549,2308,503,974,1316,2541])

byage_df



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,location,demographic_category,administered_dose1_pct_agegroup
1014,US,Ages_<2yrs,8.2
1852,US,Ages_2-4_yrs,10.5
56,US,Ages_<5yrs,9.6
1435,US,Ages_5-11_yrs,39.9
1647,US,Ages_12-17_yrs,72.1
1549,US,Ages_18-24_yrs,82.1
2308,US,Ages_25-39_yrs,83.2
503,US,Ages_40-49_yrs,88.9
974,US,Ages_50-64_yrs,95.0
1316,US,Ages_65-74_yrs,95.0


In [135]:
import plotly.graph_objects as go


fig = px.bar(byage_df, 
             x = "demographic_category", 
             y = "administered_dose1_pct_agegroup", 
            #  color="nation", 
             text_auto=True,
             )

fig.update_layout(
    # title='US Vaccination Rate by Age Group',
    # labels= {"demographic_category":"Age Group"},
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='%',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)'
})

fig.show()

In [133]:
#Choose columns needed
doses_df = age_df[["location","demographic_category", "booster_doses_vax_pct_agegroup"]]

# Change the data type of administered_dose1_pct_agegroup from string to float
doses_df["booster_doses_vax_pct_agegroup"] = doses_df["booster_doses_vax_pct_agegroup"].astype(float)

# Change column order
doses_df = pd.DataFrame(doses_df, index=[1014,1852,56,1435,1647,1549,2308,503,974,1316,2541])

import plotly.graph_objects as go


fig = px.bar(doses_df, 
             x = "demographic_category", 
             y = "booster_doses_vax_pct_agegroup", 
            #  color="nation", 
             text_auto=True,
             )

fig.update_layout(
    # title='US Booster Rate by Age Group',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='%',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)

fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)'
})


fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

