In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from functools import reduce

In [2]:
dfConfirmed = pd.read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
dfDeaths = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv")
dfRecovered = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")

# Confirmed cases

In [3]:
my_countries = [
    "US"#, "Iran", "China","United Kingdom", "Italy", "Korea, South", "Spain", "Germany", "Japan", "Mexico", "Canada"
]

for country in my_countries:
    df_temp = dfConfirmed[dfConfirmed['Country/Region'] == country]
    
        
df_temp

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20
98,Washington,US,47.4009,-121.4905,0,0,0,0,0,0,...,572,643,904,1076,1014,1376,1524,1793,1996,1996.0
99,New York,US,42.1657,-74.9481,0,0,0,0,0,0,...,525,732,967,1706,2495,5365,8310,11710,15793,15793.0
100,California,US,36.1162,-119.6816,0,0,0,0,0,0,...,340,426,557,698,751,952,1177,1364,1642,1642.0
101,Massachusetts,US,42.2302,-71.5301,0,0,0,0,0,0,...,138,164,197,218,218,328,413,525,646,646.0
102,Diamond Princess,US,35.4437,139.6380,0,0,0,0,0,0,...,46,46,47,47,47,47,49,49,49,49.0
103,Grand Princess,US,37.6489,-122.6655,0,0,0,0,0,0,...,21,21,20,21,21,22,23,23,30,30.0
104,Georgia,US,33.0406,-83.6431,0,0,0,0,0,0,...,66,99,121,146,199,287,420,507,600,600.0
105,Colorado,US,39.0598,-105.3111,0,0,0,0,0,0,...,101,131,160,160,184,277,363,390,476,476.0
106,Florida,US,27.7663,-81.6868,0,0,0,0,0,0,...,76,115,155,216,314,417,563,659,830,830.0
107,New Jersey,US,40.2989,-74.5210,0,0,0,0,0,0,...,69,98,178,267,267,742,890,1327,1914,1914.0


In [4]:
# Get key columns (4:) and last date column only:
df_confd_latest = dfConfirmed.drop(dfConfirmed.columns[4:len(dfConfirmed.columns)-1], axis='columns')
df_confd_latest.columns = [*df_confd_latest.columns[:-1],'Confirmed']
df_confd_latest.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Confirmed
0,,Thailand,15.0,101.0,599.0
1,,Japan,36.0,138.0,1086.0
2,,Singapore,1.2833,103.8333,455.0
3,,Nepal,28.1667,84.25,2.0
4,,Malaysia,2.5,112.5,1306.0


In [5]:
# Convert "columns" into "rows" using melt()
key_columns = dfConfirmed.columns.to_list()[:4]
date_columns = dfConfirmed.columns.to_list()[4:]

dfConfirmed_clean = pd.melt(
    dfConfirmed
    , id_vars=key_columns
    , value_vars=date_columns
    , var_name='Date'
    , value_name='Confirmed'
)

dfDeaths_clean = pd.melt(
    dfDeaths
    , id_vars=key_columns
    , value_vars=date_columns
    , var_name='Date'
    , value_name='Deaths'
)

dfRecovered_clean = pd.melt(
    dfRecovered
    , id_vars=key_columns
    , value_vars=date_columns
    , var_name='Date'
    , value_name='Recovered'
)

dfConfirmed_clean.head()

AttributeError: 'Index' object has no attribute 'to_list'

In [34]:
# Merge into single dataframe
dfAll = [dfConfirmed_clean, dfDeaths_clean, dfRecovered_clean]          
dfFinal = reduce(lambda left, right: pd.merge(left, right, on = key_columns+['Date'], how='outer'), dfAll)
   
# Each row should only represent one observation
id_vars = dfFinal.columns[:5]
data_type = ['Confirmed', 'Deaths', 'Recovered']
dfFinal = pd.melt(
    dfFinal
    , id_vars=id_vars
    , value_vars=data_type
    , var_name='type'
    , value_name='Count'
)

dfFinal['Date'] = pd.to_datetime(dfFinal['Date'], format='%m/%d/%y', errors='raise')

dfFinal.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,type,Count
0,,Thailand,15.0,101.0,2020-01-22,Confirmed,2
1,,Japan,36.0,138.0,2020-01-22,Confirmed,2
2,,Singapore,1.2833,103.8333,2020-01-22,Confirmed,0
3,,Nepal,28.1667,84.25,2020-01-22,Confirmed,0
4,,Malaysia,2.5,112.5,2020-01-22,Confirmed,0


In [37]:
dfSums = dfFinal.groupby(['type', 'Date'], as_index=False).agg({'Count':'sum'})
dfSums.head()

Unnamed: 0,type,Date,Count
0,Confirmed,2020-01-22,555
1,Confirmed,2020-01-23,653
2,Confirmed,2020-01-24,941
3,Confirmed,2020-01-25,1434
4,Confirmed,2020-01-26,2118


In [38]:
import plotly_express as px
def plot_timeseries(df):
    fig = px.line(df, x='Date', y='Count', color='type',\
             template='plotly_dark')

    fig.update_layout(legend_orientation="h")
    return(fig)

fig = plot_timeseries(dfSums)
fig.show()

ModuleNotFoundError: No module named 'plotly_express'