# Visualizing COVID-19 Internationally

This notebook uses the Johns Hopkins University COVID-19 resource to plot data on cases, recoveries, and deaths

#### Notes
- As of 3/24/20, "recovered" is no longer maintained, and the data is replaced by new "confirmed" and "deaths" .csv files. "Recovered" plots are removed from this notebook
- Unfortunately, the "global" time series datasets no longer have individual states' data. Need to pull directory of daily reports from github directly and then assemble a new dataframe
- 7/14/20: Split notebook into separate "Countries" and "States" notebooks to keep things tidy.


### CURRENTLY WORKING ON:
- Get per-capita case rates

### Notes on countries'/states' containment efforts and events
*Insert notes here about containment efforts performed*
- States instituted isolation:
- Hong Kong eased restrictions 3/22/20

- MS governor overrode local ordinances 3/26/2020
- Anti containment protests 4-17-20 through ____
- Many states started reopening on 5-1-20
- George Floyd Protests in MN 5/26-, spreading to many other cities throughout the week

In [2]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
from datetime import datetime, timedelta
import os

# plotly and cufflinks
import cufflinks as cf
import plotly
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True) # allows jupyter notebook to access the visualizations in java
cf.go_offline() # make cufflinks go offline

import plotly.graph_objects as go

%matplotlib inline

# Plot international case numbers based on JHU timecourse data

In [4]:
## Open JHU timecourse data

# NEW DATA STRUCTURES implemented 3/24/20
# Retrieve data from raw .csv files on JHU Github
confirmed_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
deaths_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
recovered_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

timecourse_confirmed = pd.read_csv(confirmed_url)
timecourse_deaths = pd.read_csv(deaths_url)
timecourse_recovered = pd.read_csv(recovered_url)

# Merge dataframes into one with a "status" column
timecourse_confirmed["Status"] = "Confirmed"
timecourse_deaths["Status"] = "Deaths"
timecourse_recovered['Status'] = 'Recovered'

timecourse = pd.concat([timecourse_confirmed, timecourse_deaths, timecourse_recovered])

timecourse.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/5/20,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,Status
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,32951,33190,33384,33594,33908,34194,34366,34451,34455,Confirmed
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,2893,2964,3038,3106,3188,3278,3371,3454,3571,Confirmed
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,15941,16404,16879,17348,17808,18242,18712,19195,19689,Confirmed
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,855,855,855,855,855,855,855,855,858,Confirmed
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,346,346,386,386,396,458,462,506,525,Confirmed


In [4]:
# display country names
timecourse["Country/Region"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Benin', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Brazil', 'Brunei', 'Bulgaria',
       'Burkina Faso', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Diamond Princess', 'Cuba', 'Cyprus',
       'Czechia', 'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador',
       'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala',
       'Guinea', 'Guyana', 'Haiti', 'Holy See', 'Honduras', 'Hungary',
       'Iceland', 'India

In [6]:
## Select countries to display and sort

# drop unnecessary columns
drop_cols = ["Lat", "Long"]

# select countries to display
countries = ["US", "United Kingdom", "Italy", "France", "China", "Korea, South", "Germany", "Japan", "Sweden",
            "Finland", "Spain"]

# Drop unneeded columns, group cases by country and aggregate regional totals
by_country = timecourse.drop(drop_cols, axis=1).groupby(by=["Country/Region", "Status"]).sum().transpose()[countries]
# Rename "Korea, South" to "South Korea"
by_country.rename(columns={'Korea, South':'South Korea'}, inplace=True) # change "Korea, South" to "South Korea"

by_country.head()

Country/Region,US,US,US,United Kingdom,United Kingdom,United Kingdom,Italy,Italy,Italy,France,...,Japan,Sweden,Sweden,Sweden,Finland,Finland,Finland,Spain,Spain,Spain
Status,Confirmed,Deaths,Recovered,Confirmed,Deaths,Recovered,Confirmed,Deaths,Recovered,Confirmed,...,Recovered,Confirmed,Deaths,Recovered,Confirmed,Deaths,Recovered,Confirmed,Deaths,Recovered
1/22/20,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/23/20,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/24/20,2,0,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0
1/25/20,2,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,0,0,0,0,0
1/26/20,5,0,0,0,0,0,0,0,0,3,...,1,0,0,0,0,0,0,0,0,0


In [7]:
# Select 'confirmed' from the second level using the cross section (.xs) method
by_country.xs('Confirmed', axis=1, level=1).head()

Country/Region,US,United Kingdom,Italy,France,China,South Korea,Germany,Japan,Sweden,Finland,Spain
1/22/20,1,0,0,0,548,1,0,2,0,0,0
1/23/20,1,0,0,0,643,1,0,2,0,0,0
1/24/20,2,0,0,2,920,2,0,2,0,0,0
1/25/20,2,0,0,3,1406,2,0,2,0,0,0
1/26/20,5,0,0,3,2075,3,0,4,0,0,0


## Create interactive plots of total country numbers
Y-axis on log scale

In [8]:
# plot the confirmed rates
fig = go.Figure()
by_country.xs('Confirmed', axis=1, level=1).iplot(
    kind='lines', yaxis_type='log', theme='ggplot',
    title='COVID-19 Confirmed Cases by Country (Updated {})'.format(by_country.index.tolist()[-1]),
    yaxis_title='Number of cases', xaxis_title='Date')

### Deaths

In [9]:
# plot the confirmed rates
fig = go.Figure()
by_country.xs('Deaths', axis=1, level=1).iplot(
    kind='lines', yaxis_type='log', theme='ggplot',
    title='COVID-19 Deaths by Country (Updated {})'.format(by_country.index.tolist()[-1]),
    yaxis_title='Number of cases', xaxis_title='Date')

## Recovered

In [10]:
# plot the confirmed rates
fig = go.Figure()
by_country.xs('Recovered', axis=1, level=1).iplot(
    kind='lines', yaxis_type='log', theme='ggplot',
    title='COVID-19 Recoveries by Country (Updated {})'.format(by_country.index.tolist()[-1]),
    yaxis_title='Number of cases', xaxis_title='Date')

## Plot new cases per day by country

In [11]:
by_country.info()

<class 'pandas.core.frame.DataFrame'>
Index: 174 entries, 1/22/20 to 7/13/20
Data columns (total 33 columns):
(US, Confirmed)                174 non-null int64
(US, Deaths)                   174 non-null int64
(US, Recovered)                174 non-null int64
(United Kingdom, Confirmed)    174 non-null int64
(United Kingdom, Deaths)       174 non-null int64
(United Kingdom, Recovered)    174 non-null int64
(Italy, Confirmed)             174 non-null int64
(Italy, Deaths)                174 non-null int64
(Italy, Recovered)             174 non-null int64
(France, Confirmed)            174 non-null int64
(France, Deaths)               174 non-null int64
(France, Recovered)            174 non-null int64
(China, Confirmed)             174 non-null int64
(China, Deaths)                174 non-null int64
(China, Recovered)             174 non-null int64
(South Korea, Confirmed)       174 non-null int64
(South Korea, Deaths)          174 non-null int64
(South Korea, Recovered)       174 non-n

In [12]:
#by_country.loc['Alabama'].diff().head() # use .diff() method to subtract previous row

# Use df.diff() method to get the difference in case number per day from total cases
country_daily = by_country.diff()

# set negative values to zero
# Some adjustments to country rates cause cases to go down on certain days
country_daily[country_daily < 0] = 0 # replace all negative values with 0

### Plot daily case loads

In [18]:
# plot line graph of country by new daily confirmed cases
country_daily.xs('Confirmed', axis=1, level=1).iplot(yaxis_type='linear', title='Daily New Confirmed Cases (Updated {})'.format(by_country.index.tolist()[-1]))

In [29]:
# Plot rolling average

window_size = 7 # days to average
rolling = country_daily.xs('Confirmed', axis=1, level=1).rolling(window=window_size, min_periods=3).mean()
daily_cases = rolling.iplot(scale='log', yaxis_title='Number of cases', xaxis_title='Date',
              title='Daily New Confirmed Cases (Rolling {} Day Average, Updated {})'.format(window_size, by_country.index.tolist()[-1]), asFigure=True)

daily_cases

In [30]:
# plot line graph of country by new daily confirmed cases
country_daily.xs('Deaths', axis=1, level=1).iplot(scale='log', title='Daily New Confirmed Deaths')

In [31]:
# Plot rolling average of deaths

window_size = 7 # days to average
rolling = country_daily.xs('Deaths', axis=1, level=1).rolling(window=window_size, min_periods=3).mean()
daily_deaths = rolling.iplot(yaxis_title='Number of cases', xaxis_title='Date',
              title='Daily New Confirmed Deaths (Rolling {} Day Average, Updated {})'.format(window_size, by_country.index.tolist()[-1]), asFigure=True)

daily_deaths

In [32]:
# save figures
plotly.io.write_html(daily_cases, file='international-daily-cases-average.html')
plotly.io.write_html(daily_deaths, file='international-daily-deaths-average.html')