# CDC: Monkeypox derived timeseries

In [1]:
import pandas as pd
import us
import urllib.request, json
import datetime as dt

In [2]:
today = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%Y-%m-%d")
time = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%-I:%M %p")

---

## Get historical case timeseries

#### May 17-Aug. 3

In [3]:
historical_src = pd.read_csv('data/processed/monkeypox_cases_timeseries_cdc_historical.csv', parse_dates=['date']).sort_values('date', ascending=False).reset_index(drop=True)

In [4]:
historical_src['date'] = historical_src['date'].astype(str)

In [5]:
historical_src = historical_src[historical_src['date'] < today].reset_index(drop=True)

---

## CDC Monkeypox

#### Latest totals, aggregated by state

In [6]:
states_url = 'https://www.cdc.gov/poxvirus/monkeypox/modules/data-viz/mpx-maps.json'

In [7]:
with urllib.request.urlopen(states_url) as url:
    data = json.loads(url.read().decode())
    states_src = pd.DataFrame(data['data'])

In [8]:
states_src.columns = states_src.columns.str.lower().str.replace(' ', '_', regex=False)

In [9]:
states_src.drop(['case_range'], axis=1, inplace=True)

In [10]:
states_src['cases'] = states_src['cases'].astype(int)

---

#### Aggregate totals among all states to add to timeseries

In [11]:
latest_total = states_src['cases'].sum()

In [12]:
historical_total = historical_src[historical_src['date'] == historical_src['date'].max()]['cumulative_sum'][0]

In [13]:
change = latest_total - historical_total

In [14]:
updated_data = {'date': today, 'cases': change, 'cumulative_sum': latest_total}

In [15]:
updated_data_df = pd.DataFrame(updated_data, index=[0])

In [16]:
df = pd.concat([updated_data_df, historical_src]).copy()

---

## Exports

In [18]:
df.to_csv(f'data/processed/monkeypox_cases_timeseries_cdc_historical.csv', index=False)
df.to_csv(f'data/processed/monkeypox_cases_derived_timeseries_latest.csv', index=False)
df.to_json(f'data/processed/monkeypox_cases_derived_timeseries_latest.json', orient='records', indent=4)