# CDC: Monkeypox cases, by country

In [1]:
import pandas as pd
import us
import urllib.request, json
import datetime as dt

In [2]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [3]:
today = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%Y-%m-%d")
time = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%-I:%M %p")

---

## Country populations

#### Figures from World Bank

In [5]:
countries_pop = pd.read_csv('data/raw/countries_population.csv')

---

## CDC Monkeypox

#### Aggregated by country

In [6]:
countries_url = 'https://www.cdc.gov/wcms/vizdata/poxvirus/monkeypox/data/MPX-Cases-by-Country.csv'

In [7]:
countries_src = pd.read_csv(countries_url)

In [8]:
countries_src.columns = countries_src.columns.str.lower()

In [9]:
countries_src['updated_date'] = pd.to_datetime(countries_src['asof'].str.replace('Data as of ', '').str.replace(' 5:00 PM EDT', ''))

In [10]:
countries_src['hist_had'] = countries_src.category.str.replace('Has historically reported monkeypox', 'Has').str.replace('Has not historically reported monkeypox', 'Has not')

In [11]:
df = countries_src.drop(['category', 'asof'], axis=1).sort_values('cases', ascending=False)

---

## Merge

#### Population and cases to create a rate

In [13]:
merged_df = pd.merge(df, countries_pop, left_on='country', right_on='name')

In [14]:
merged_df['cases_per_million'] = ((merged_df['cases'] / merged_df['population'])*1000000).astype(float).round(2)

In [15]:
merged_df.drop(['name'], axis=1, inplace=True)

---

## Exports

In [16]:
merged_df.to_csv(f'data/processed/monkeypox_cases_countries_cdc_latest.csv', index=False)
merged_df.to_csv(f'data/processed/monkeypox_cases_countries_cdc_{today}.csv', index=False)
merged_df.to_json(f'data/processed/monkeypox_cases_countries_cdc_{today}.json', orient='records', indent=4)