In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from functools import reduce

In [2]:
dfConfirmed = pd.read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
dfDeaths = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv")
dfRecovered = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")

# Confirmed cases

In [19]:
my_countries = [
    "US"#, "Iran", "China","United Kingdom", "Italy", "Korea, South", "Spain", "Germany", "Japan", "Mexico", "Canada"
]

for country in my_countries:
    df_temp = dfConfirmed[dfConfirmed['Country/Region'] == country]
    
        
df_temp

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20
99,Washington,US,47.4009,-121.4905,0,0,0,0,0,0,...,0,0,0,0,267,366,442,568,572,643
100,New York,US,42.1657,-74.9481,0,0,0,0,0,0,...,0,0,0,0,173,220,328,421,525,732
101,California,US,36.1162,-119.6816,0,0,0,0,0,0,...,0,0,0,0,144,177,221,282,340,426
102,Massachusetts,US,42.2302,-71.5301,0,0,0,0,0,0,...,0,0,0,0,92,95,108,123,138,164
103,Diamond Princess,US,35.4437,139.6380,0,0,0,0,0,0,...,45,45,45,45,46,46,46,46,46,46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401,"New Castle, DE",US,39.5393,-75.6674,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
419,Alabama,US,32.3182,-86.9023,0,0,0,0,0,0,...,0,0,0,0,0,0,0,5,6,12
422,Puerto Rico,US,18.2208,-66.5901,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,3,5
438,"Virgin Islands, U.S.",US,18.3358,-64.8963,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1


In [33]:
# Get key columns (4:) and last date column only:
df_confd_latest = dfConfirmed.drop(dfConfirmed.columns[4:len(dfConfirmed.columns)-1], axis='columns')
df_confd_latest.columns = [*df_confd_latest.columns[:-1],'Confirmed']
df_confd_latest.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Confirmed
0,,Thailand,15.0,101.0,114
1,,Japan,36.0,138.0,839
2,,Singapore,1.2833,103.8333,226
3,,Nepal,28.1667,84.25,1
4,,Malaysia,2.5,112.5,428


In [25]:
# Convert "columns" into "rows" using melt()
key_columns = dfConfirmed.columns.to_list()[:4]
date_columns = dfConfirmed.columns.to_list()[4:]

dfConfirmed_clean = pd.melt(
    dfConfirmed
    , id_vars=key_columns
    , value_vars=date_columns
    , var_name='Date'
    , value_name='Confirmed'
)

dfDeaths_clean = pd.melt(
    dfDeaths
    , id_vars=key_columns
    , value_vars=date_columns
    , var_name='Date'
    , value_name='Deaths'
)

dfRecovered_clean = pd.melt(
    dfRecovered
    , id_vars=key_columns
    , value_vars=date_columns
    , var_name='Date'
    , value_name='Recovered'
)

dfConfirmed_clean.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed
0,,Thailand,15.0,101.0,1/22/20,2
1,,Japan,36.0,138.0,1/22/20,2
2,,Singapore,1.2833,103.8333,1/22/20,0
3,,Nepal,28.1667,84.25,1/22/20,0
4,,Malaysia,2.5,112.5,1/22/20,0


In [34]:
# Merge into single dataframe
dfAll = [dfConfirmed_clean, dfDeaths_clean, dfRecovered_clean]          
dfFinal = reduce(lambda left, right: pd.merge(left, right, on = key_columns+['Date'], how='outer'), dfAll)
   
# Each row should only represent one observation
id_vars = dfFinal.columns[:5]
data_type = ['Confirmed', 'Deaths', 'Recovered']
dfFinal = pd.melt(
    dfFinal
    , id_vars=id_vars
    , value_vars=data_type
    , var_name='type'
    , value_name='Count'
)

dfFinal['Date'] = pd.to_datetime(dfFinal['Date'], format='%m/%d/%y', errors='raise')

dfFinal.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,type,Count
0,,Thailand,15.0,101.0,2020-01-22,Confirmed,2
1,,Japan,36.0,138.0,2020-01-22,Confirmed,2
2,,Singapore,1.2833,103.8333,2020-01-22,Confirmed,0
3,,Nepal,28.1667,84.25,2020-01-22,Confirmed,0
4,,Malaysia,2.5,112.5,2020-01-22,Confirmed,0


In [37]:
dfSums = dfFinal.groupby(['type', 'Date'], as_index=False).agg({'Count':'sum'})
dfSums.head()

Unnamed: 0,type,Date,Count
0,Confirmed,2020-01-22,555
1,Confirmed,2020-01-23,653
2,Confirmed,2020-01-24,941
3,Confirmed,2020-01-25,1434
4,Confirmed,2020-01-26,2118


In [38]:
import plotly_express as px
def plot_timeseries(df):
    fig = px.line(df, x='Date', y='Count', color='type',\
             template='plotly_dark')

    fig.update_layout(legend_orientation="h")
    return(fig)

fig = plot_timeseries(dfSums)
fig.show()

ModuleNotFoundError: No module named 'plotly_express'