# CDC: Monkeypox cases, by state

In [2]:
import pandas as pd
import us
import urllib.request, json
import datetime as dt

In [3]:
today = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%Y-%m-%d")
time = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%-I:%M %p")

---

## State populations

#### Figures from ACS 2020, 5-year estimates

In [4]:
states_pop = pd.read_csv('data/raw/states_population.csv')

#### Map FIPS codes to each state

In [5]:
states_pop['state_fips'] = states_pop['state'].map(us.states.mapping('name', 'fips'))

---

## CDC Monkeypox

#### Aggregated by state

In [6]:
states_url = 'https://www.cdc.gov/poxvirus/monkeypox/modules/data-viz/mpx-maps.json'

In [7]:
with urllib.request.urlopen(states_url) as url:
    data = json.loads(url.read().decode())
    states_src = pd.DataFrame(data['data'])

In [8]:
states_src.columns = states_src.columns.str.lower()

#### Map FIPS codes and AP abbrevations to each state

In [9]:
states_src['state_fips'] = states_src['state'].map(us.states.mapping('name', 'fips'))
states_src['state_ap'] = states_src['state'].map(us.states.mapping('name', 'ap_abbr'))

---

## Merge

#### Population and cases to create a rate

In [10]:
df = pd.merge(states_src, states_pop, on=['state_fips', 'state'])

In [11]:
df['cases'] = df['cases'].astype(int)

In [13]:
df['cases_per_million'] = ((df['cases'] / df['pop_acs_2020_5tr'])*1000000).astype(float).round(2)

In [14]:
df.drop(['case range'], axis=1, inplace=True)

In [15]:
df['cases'].sum()

7099

---

## Exports

In [None]:
# df.to_csv(f'data/processed/monkeypox_cases_states_cdc_{today}.csv', index=False)
# df.to_json(f'data/processed/monkeypox_cases_states_cdc_{today}.json', orient='records', indent=4)