In [2]:
# Dependencies
import json
import matplotlib.pyplot as plt
import requests
from pprint import pprint
from scipy import stats
import pandas as pd
from sodapy import Socrata

import numpy as np
from IPython.display import display

In [3]:
# Specify URL
client = Socrata("data.cdc.gov", None)

# Make & store response
response = client.get("n8mc-b4w4", limit=1000000)



In [20]:
# Make DataFrame
response_df = pd.DataFrame.from_records(response)
response_df.head()

Unnamed: 0,case_month,res_state,state_fips_code,res_county,county_fips_code,age_group,sex,race,ethnicity,case_positive_specimen,case_onset_interval,process,exposure_yn,current_status,symptom_status,hosp_yn,icu_yn,death_yn,underlying_conditions_yn
0,2021-12,PA,42,LACKAWANNA,42069,0 - 17 years,Male,White,Hispanic/Latino,0.0,0.0,Missing,Missing,Laboratory-confirmed case,Symptomatic,Unknown,Unknown,Unknown,
1,2021-08,FL,12,MIAMI-DADE,12086,18 to 49 years,Male,White,Hispanic/Latino,,,Missing,Missing,Laboratory-confirmed case,Missing,No,Missing,No,
2,2021-08,IL,17,WILLIAMSON,17199,18 to 49 years,Female,White,Non-Hispanic/Latino,0.0,0.0,Missing,Missing,Laboratory-confirmed case,Missing,Missing,Missing,Missing,
3,2022-03,AL,1,BALDWIN,1003,50 to 64 years,Female,White,Non-Hispanic/Latino,,,Missing,Missing,Laboratory-confirmed case,Missing,Missing,Missing,Missing,
4,2022-07,NC,37,WILKES,37193,65+ years,Female,White,Non-Hispanic/Latino,1.0,0.0,Missing,Missing,Probable Case,Symptomatic,No,Unknown,No,Yes


In [21]:
# Create variable
case_count_by_state = response_df["res_state"].groupby(response_df["res_state"]).count()

In [22]:
# Sanity check
total_cases = case_count_by_state.sum()
total_cases

1000000

In [23]:
# Total unique states
total_states = case_count_by_state.nunique()
total_states

45

In [46]:
# Create Df showing cases in each state
cases_count_by_state_df = response_df[['res_state']]

cases_count_by_state_df['case_count'] = 0
cases_count_by_state_df = cases_count_by_state_df.groupby(['res_state'])['case_count'].agg('count').reset_index()
cases_count_by_state_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cases_count_by_state_df['case_count'] = 0


Unnamed: 0,res_state,case_count
0,AK,3086
1,AL,28118
2,AR,14124
3,AZ,15427
4,CA,55534


In [47]:
# Create variable to hold cases by month per state

cases_count_by_state_month_df = response_df[['res_state', 'case_month']]

cases_count_by_state_month_df['case_count'] = 0
cases_count_by_state_month_df = cases_count_by_state_month_df.groupby(['res_state','case_month'])['case_count'].agg('count').reset_index()
cases_count_by_state_month_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cases_count_by_state_month_df['case_count'] = 0


Unnamed: 0,res_state,case_month,case_count
0,AK,2020-06,11
1,AK,2020-07,41
2,AK,2020-08,74
3,AK,2020-09,112
4,AK,2020-10,293


In [48]:
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}
    
# invert the dictionary
abbrev_to_us_state = dict(map(reversed, us_state_to_abbrev.items()))

In [49]:
abr_df = pd.DataFrame(abbrev_to_us_state.items(), columns=["res_state", "jurisdiction"])
abr_df.head()

Unnamed: 0,res_state,jurisdiction
0,AL,Alabama
1,AK,Alaska
2,AZ,Arizona
3,AR,Arkansas
4,CA,California


In [50]:
cases_df = cases_count_by_state_df.merge(abr_df, how="inner")
cases_df.head()

Unnamed: 0,res_state,case_count,jurisdiction
0,AK,3086,Alaska
1,AL,28118,Alabama
2,AR,14124,Arkansas
3,AZ,15427,Arizona
4,CA,55534,California


In [51]:
#read moderna and pfizer data
moderna_df = pd.read_csv("Resources/cdc-moderna-covid-19-vaccine-distribution-by-state.csv")
pfizer_df = pd.read_csv("Resources/cdc-pfizer-covid-19-vaccine-distribution-by-state.csv")

In [52]:
moderna_df.head()

Unnamed: 0,jurisdiction,week_of_allocations,_1st_dose_allocations,_2nd_dose_allocations
0,Connecticut,2021-06-21T00:00:00.000,41220,41220
1,Maine,2021-06-21T00:00:00.000,15800,15800
2,Massachusetts,2021-06-21T00:00:00.000,79500,79500
3,New Hampshire,2021-06-21T00:00:00.000,15800,15800
4,Rhode Island,2021-06-21T00:00:00.000,12480,12480


In [53]:
pfizer_df.head()

Unnamed: 0,jurisdiction,week_of_allocations,_1st_dose_allocations,_2nd_dose_allocations
0,Connecticut,2021-06-21T00:00:00.000,54360,54360
1,Maine,2021-06-21T00:00:00.000,21420,21420
2,Massachusetts,2021-06-21T00:00:00.000,104580,104580
3,New Hampshire,2021-06-21T00:00:00.000,21420,21420
4,Rhode Island,2021-06-21T00:00:00.000,17280,17280


In [54]:
vaccines_merge_df = moderna_df.merge(pfizer_df, how="left")
vaccines_merge_df.head()

Unnamed: 0,jurisdiction,week_of_allocations,_1st_dose_allocations,_2nd_dose_allocations
0,Connecticut,2021-06-21T00:00:00.000,41220,41220
1,Maine,2021-06-21T00:00:00.000,15800,15800
2,Massachusetts,2021-06-21T00:00:00.000,79500,79500
3,New Hampshire,2021-06-21T00:00:00.000,15800,15800
4,Rhode Island,2021-06-21T00:00:00.000,12480,12480


In [55]:
vaccines_merge_clean_df = vaccines_merge_df[["jurisdiction", "_1st_dose_allocations", "_2nd_dose_allocations"]]
vaccines_merge_clean_df.head()

Unnamed: 0,jurisdiction,_1st_dose_allocations,_2nd_dose_allocations
0,Connecticut,41220,41220
1,Maine,15800,15800
2,Massachusetts,79500,79500
3,New Hampshire,15800,15800
4,Rhode Island,12480,12480


In [56]:
total_jurisdictions = vaccines_merge_clean_df.groupby(['jurisdiction'])['_1st_dose_allocations', '_2nd_dose_allocations'].agg('sum').reset_index()
total_jurisdictions.head()

  total_jurisdictions = vaccines_merge_clean_df.groupby(['jurisdiction'])['_1st_dose_allocations', '_2nd_dose_allocations'].agg('sum').reset_index()


Unnamed: 0,jurisdiction,_1st_dose_allocations,_2nd_dose_allocations
0,Alabama,834960,834960
1,Alaska,151860,151860
2,American Samoa,5000,0
3,Arizona,1183560,1183560
4,Arkansas,506920,506920


In [57]:
cases_and_vaccinations_df = cases_df.merge(total_jurisdictions, how="inner")
cases_and_vaccinations_df.head()

Unnamed: 0,res_state,case_count,jurisdiction,_1st_dose_allocations,_2nd_dose_allocations
0,AK,3086,Alaska,151860,151860
1,AL,28118,Alabama,834960,834960
2,AR,14124,Arkansas,506920,506920
3,AZ,15427,Arizona,1183560,1183560
4,CA,55534,California,6658300,6658300


In [59]:
cases_and_vaccinations_clean_df = cases_and_vaccinations_df[["res_state", "case_count", "_1st_dose_allocations","_2nd_dose_allocations"]]
cases_and_vaccinations_clean_df

Unnamed: 0,res_state,case_count,_1st_dose_allocations,_2nd_dose_allocations
0,AK,3086,151860,151860
1,AL,28118,834960,834960
2,AR,14124,506920,506920
3,AZ,15427,1183560,1183560
4,CA,55534,6658300,6658300
5,CO,16971,946800,946800
6,CT,6170,626920,626920
7,DC,1543,126120,126120
8,DE,4629,166240,166240
9,FL,50906,3642160,3642160


In [60]:
cases_and_vaccinations_clean_df.to_csv('out.csv')