# CDC: Monkeypox derived timeseries

In [1]:
import pandas as pd
import us
import urllib.request, json
import datetime as dt

In [2]:
today = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%Y-%m-%d")
time = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%-I:%M %p")

---

## Get historical case timeseries

#### May 17-Aug. 3

In [3]:
historical_src = pd.read_csv('data/processed/monkeypox_cases_timeseries_cdc_historical.csv', parse_dates=['date']).sort_values('date', ascending=False).reset_index(drop=True)

In [4]:
historical_src['date'] = historical_src['date'].astype(str)

In [5]:
historical_src = historical_src[historical_src['date'] < today].reset_index(drop=True)

In [6]:
historical_src

Unnamed: 0,date,cases,cumulative_sum
0,2022-08-09,558,9489
1,2022-08-08,1424,8931
2,2022-08-07,0,7507
3,2022-08-06,408,7507
4,2022-08-05,483,7099
...,...,...,...
79,2022-05-21,2,4
80,2022-05-20,1,2
81,2022-05-19,0,1
82,2022-05-18,0,1


---

## CDC Monkeypox

#### Latest totals, aggregated by state

In [26]:
states_url = 'https://www.cdc.gov/poxvirus/monkeypox/modules/data-viz/mpx_US_Total_databite.json'

In [27]:
with urllib.request.urlopen(states_url) as url:
    data = json.loads(url.read().decode())
    states_src = pd.DataFrame(data['data'])

In [28]:
states_src.columns = states_src.columns.str.lower().str.replace(' ', '_', regex=False)

In [29]:
states_src.drop(['case_range'], axis=1, inplace=True)

In [30]:
states_src['cases'] = states_src['cases'].astype(int)

In [38]:
states = states_src[(states_src['location'] != 'Total') & (states_src['location'] != 'Non-US Resident')].copy()

---

#### Aggregate totals among all states to add to timeseries

In [39]:
latest_total = states['cases'].sum()

In [40]:
latest_total

10389

In [13]:
historical_total = historical_src[historical_src['date'] == historical_src['date'].max()]['cumulative_sum'][0]

In [14]:
change = latest_total - historical_total

In [15]:
updated_data = {'date': today, 'cases': change, 'cumulative_sum': latest_total}

In [16]:
updated_data_df = pd.DataFrame(updated_data, index=[0])

In [17]:
df = pd.concat([updated_data_df, historical_src]).copy()

In [18]:
df

Unnamed: 0,date,cases,cumulative_sum
0,2022-08-11,11295,20784
0,2022-08-09,558,9489
1,2022-08-08,1424,8931
2,2022-08-07,0,7507
3,2022-08-06,408,7507
...,...,...,...
79,2022-05-21,2,4
80,2022-05-20,1,2
81,2022-05-19,0,1
82,2022-05-18,0,1


---

## Exports

In [18]:
df.to_csv(f'data/processed/monkeypox_cases_timeseries_cdc_historical.csv', index=False)
df.to_csv(f'data/processed/monkeypox_cases_derived_timeseries_latest.csv', index=False)
df.to_json(f'data/processed/monkeypox_cases_derived_timeseries_latest.json', orient='records', indent=4)