# COVID-19 Anlaysis and hosting it on dstack.ai

### Importing the libraries

In [50]:
import pandas as pd
import plotly.express as px
from dstack import create_frame
import matplotlib.pyplot as plt
from dstack import create_frame

### Loading data right from the source:

In [44]:
#Importing csv files based on Timeseries considering the deaths, confirmed cases and recovered cases
df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
recovered_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
country_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv')

In [8]:
# this function displays the first 5 rows of the dataframe
df.head() 

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,40,42,43,47,50,57,58,60,64,68
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,27,27,27,27,28,28,30,30,31,31
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,402,407,415,419,425,432,437,444,450,453
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,37,37,40,40,40,40,41,42,42,43
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2


In [45]:
#Checking the confirmed data dataframe
confirmed_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,1351,1463,1531,1703,1828,1939,2171,2335,2469,2704
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,678,712,726,736,750,766,773,782,789,795
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,3127,3256,3382,3517,3649,3848,4006,4154,4295,4474
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,731,738,738,743,743,743,745,745,747,748
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,25,25,26,27,27,27,27,30,35,35


### Manipulating a dataframe using pandas

In [46]:
cols = [df.columns[1]] + list(df.columns[-2:])
# country + two recent days
last_2_days = df[df["Province/State"].isnull()][cols].copy()
last_2_days # as you might've noticed above, the value of the expression in the end of the code cell is displayed in the output

Unnamed: 0,Country/Region,5/2/20,5/3/20
0,Afghanistan,72,85
1,Albania,31,31
2,Algeria,459,463
3,Andorra,44,45
4,Angola,2,2
...,...,...,...
261,Western Sahara,0,0
262,Sao Tome and Principe,1,1
263,Yemen,2,2
264,Comoros,0,0


### Trying to analyse the growth in last two days

In [52]:
d1 = last_2_days.columns[-1]
d2 = last_2_days.columns[-2]

last_2_days["delta"] = last_2_days[d1] - last_2_days[d2]
last_2_days["delta%"] = last_2_days["delta"] / last_2_days[d2]
last_2_days # displaying the resulting dataframe

Unnamed: 0,Country/Region,5/2/20,5/3/20,delta,delta%
0,Afghanistan,72,85,13,0.180556
1,Albania,31,31,0,0.000000
2,Algeria,459,463,4,0.008715
3,Andorra,44,45,1,0.022727
4,Angola,2,2,0,0.000000
...,...,...,...,...,...
261,Western Sahara,0,0,0,
262,Sao Tome and Principe,1,1,0,0.000000
263,Yemen,2,2,0,0.000000
264,Comoros,0,0,0,


### Analyzing data using matplotlib and plotly and hosting results on dstack

In [54]:
min_cases = 50
# create frame and set stack name
top_speed_frame = create_frame("covid19/speed")
# top countries
sort_by_cols = ["delta", "delta%"]
for col in sort_by_cols:
    top = last_2_days[last_2_days[last_2_days.columns[1]]>min_cases].sort_values(by=[col], ascending=False).head(50)

    top_speed_frame.commit(top, f"Top 50 countries with the fastest growing number of confirmed Covid-19 cases (at least {min_cases})", {"Sort by": col})

top_speed_frame.push()

### Displaying Confirmed Cases growth in SPAIN

In [57]:
cdf = df[(df["Country/Region"]=="Spain") & (df["Province/State"].isnull())][df.columns[4:]].T
cdf = cdf.rename(columns={cdf.columns[0]:"confirmed"}) # set the name of the new column resulted as a transposition of date dataframe columns

In [64]:
fig = px.line(cdf, x=cdf.index, y="confirmed", title='Growth of cases in Spain')
fig.show() # displays the `plotly`'s figure

In [60]:
delta = (cdf.shift(-1) - cdf)
delta.tail() # display the last 5 rows of the dataframe to make sure the operation was correct

Unnamed: 0,confirmed
4/29/20,268.0
4/30/20,0.0
5/1/20,557.0
5/2/20,164.0
5/3/20,


In [63]:
fig = px.line(delta, x=delta.index, y="confirmed", title='Everyday growth of cases in Spain')
fig.show()

### Defining a function to display all the three charts with Country name as input

In [65]:
def plots_by_country(country):
    cdf = df[(df["Country/Region"]==country) & (df["Province/State"].isnull())][df.columns[4:]].T
    cdf = cdf.rename(columns={cdf.columns[0]:"confirmed"})
    cfig = px.line(cdf, x=cdf.index, y="confirmed")
    delta = (cdf.shift(-1) - cdf).rename(columns={"confirmed": "confirmed per day"})
    cdfig = px.line(delta, x=cdf.index, y="confirmed per day")
    delta_p = ((cdf.shift(-1) - cdf) / cdf.shift(-1)).rename(columns={"confirmed": "confirmed per day %"})
    cdpfig = px.line(delta_p, x=cdf.index, y="confirmed per day %")
    return (cfig, cdfig, cdpfig)

### Checking the function by passing US

In [69]:
(fig1, fig2, fig3) = plots_by_country("US")
fig1.show()
fig2.show()
fig3.show()

### Pushing data on dstack.ai (Check the link in the last to see the dashboard)

In [25]:
# get top 30 countries by the number of new cases on the last day
countries = df[df["Province/State"].isnull()].sort_values(by=[df.columns[-1]], ascending=False)[["Country/Region"]].head(30)

# create a frame and iterate over the top countries to commit three plots for every country: new absolute cases, increase in absolute numbers, and increase in percent
frame = create_frame("covid19/speed_by_country")
for c in countries["Country/Region"].tolist():
    print(c)
    (fig1, fig2, fig3) = plots_by_country(c)
    frame.commit(fig1, f"Confirmed cases in {c}", {"Country": c, "Chart": "All cases"})
    frame.commit(fig2, f"New confirmed cases in {c}", {"Country": c, "Chart": "New cases"})
    frame.commit(fig3, f"New confirmed cases in {c} in %", {"Country": c, "Chart": "New cases (%)"})

frame.push()

US
Italy
United Kingdom
France
Spain
Belgium
Germany
Brazil
Iran
Netherlands
Turkey
Sweden
Mexico
Switzerland
Ireland
India
Russia
Peru
Ecuador
Portugal
Indonesia
Romania
Poland
Austria
Philippines
Denmark
Japan
Algeria
Pakistan
Egypt


'https://dstack.ai/jayshil97/covid19/speed_by_country'

In [71]:
# filter US, transpose date dataframe columns into dataframe rows
t1 = df[(df["Country/Region"]=="US") & (df["Province/State"].isnull())][df.columns[4:]].T
# set the new column name
t1 = t1.rename(columns={t1.columns[0]:"confirmed"})
# make the dataframe's index a regular column; we'll later need it to highlight each country with its own color
t1.reset_index()
# add country column
t1["Country/Region"] = "US"
t1.tail() # display the last 5 rows to make sure everything is correct

Unnamed: 0,confirmed,Country/Region
4/29/20,60967,US
4/30/20,62996,US
5/1/20,64943,US
5/2/20,66369,US
5/3/20,67682,US


### Function to return three dataframes: absolute new cases, absolute increase, percent increase

In [74]:
def country_df(country):
    cdf = df[(df["Country/Region"]==country) & (df["Province/State"].isnull())][df.columns[4:]].T
    cdf = cdf.rename(columns={cdf.columns[0]:"confirmed"})
    delta = (cdf.shift(-1) - cdf).rename(columns={"confirmed": "confirmed per day"})
    delta.reset_index()
    delta["Country/Region"] = country
    delta_p = ((cdf.shift(-1) - cdf) / cdf.shift(-1)).rename(columns={"confirmed": "confirmed per day %"})
    delta_p.reset_index()
    delta_p["Country/Region"] = country
    cdf.reset_index()
    cdf["Country/Region"] = country
    return (cdf, delta, delta_p)

### Top 10 countries all time growth graph

In [75]:
top10 = df[df["Province/State"].isnull()].sort_values(by=[df.columns[-1]], ascending=False)[["Country/Region"]].head(10)

# make a single lists of dataframes for all countries
top = []
top_delta = []
top_delta_p = []
for c in top10["Country/Region"].tolist():
    (x, y, z) = country_df(c)
    top.append(x)
    top_delta.append(y)
    top_delta_p.append(z)

test = pd.concat(top) # make a pandas dataframe out for the new cases
# plot the resulted dataframe of new cases to make sure everything is correct
px.line(test, x=test.index, y="confirmed", color='Country/Region').show()

In [31]:

frame = create_frame("covid19/speed_by_country_all")

top10df = pd.concat(top)
fig = px.line(top10df, x=top10df.index, y="confirmed", color='Country/Region')
frame.commit(fig, "Confirmed cases in top 10 countries", {"Country": "Top 10", "Chart": "All cases"})

top10df_delta = pd.concat(top_delta)
fig = px.line(top10df_delta, x=top10df_delta.index, y="confirmed per day", color='Country/Region')
frame.commit(fig, "New confirmed cases in top 10 countries", {"Country": "Top 10", "Chart": "New cases"})

top10df_delta_p = pd.concat(top_delta_p)
fig = px.line(top10df_delta_p, x=top10df_delta_p.index, y="confirmed per day %", color='Country/Region')
frame.commit(fig, "New confirmed cases in top 10 countries in %", {"Country": "Top 10", "Chart": "New cases (%)"})

for c in countries["Country/Region"].tolist():
    print(c)
    (fig1, fig2, fig3) = plots_by_country(c)
    frame.commit(fig1, f"Confirmed cases in {c}", {"Country": c, "Chart": "All cases"})
    frame.commit(fig2, f"New confirmed cases in {c}", {"Country": c, "Chart": "New cases"})
    frame.commit(fig3, f"New confirmed cases in {c} in %", {"Country": c, "Chart": "New cases (%)"})

frame.push()

US
Italy
United Kingdom
France
Spain
Belgium
Germany
Brazil
Iran
Netherlands
Turkey
Sweden
Mexico
Switzerland
Ireland
India
Russia
Peru
Ecuador
Portugal
Indonesia
Romania
Poland
Austria
Philippines
Denmark
Japan
Algeria
Pakistan
Egypt


'https://dstack.ai/jayshil97/covid19/speed_by_country_all'

In [76]:
top10df = pd.concat(top)
fig = px.line(top10df, x=top10df.index, y="confirmed", color='Country/Region', title = 'Top 10 countries all time growth graph')
frame.commit(fig, "Confirmed cases in top 10 countries", {"Country": "Top 10", "Chart": "All cases"})
fig

In [77]:
top10df_delta = pd.concat(top_delta)
fig = px.line(top10df_delta, x=top10df_delta.index, y="confirmed per day", color='Country/Region', title = 'Top 10 countries by daily growth')
frame.commit(fig, "New confirmed cases in top 10 countries", {"Country": "Top 10", "Chart": "New cases"})
fig

In [79]:
top10df_delta_p = pd.concat(top_delta_p)
fig = px.line(top10df_delta_p, x=top10df_delta_p.index, y="confirmed per day %", color='Country/Region', title = 'Top 10 countries by Delta Rate')
frame.commit(fig, "New confirmed cases in top 10 countries in %", {"Country": "Top 10", "Chart": "New cases (%)"})
fig

# THE END