## Covid 19 - Exploratory Data Analysis

### Dataset Understanding

In [1]:
#pip install folium for map visualization

In [2]:
## Imports Package
#px and go plot our data in canvas
import plotly.express as px
import plotly.graph_objects as go
#ff for stetic 
import plotly.figure_factory as ff
#for subplots
from plotly.subplots import make_subplots

import folium

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import math
import random
from datetime import timedelta

import warnings
warnings.filterwarnings('ignore')

#color pallette
cnf = '#393e46'
dth = '#ff2e63'
rec = '#21bf73'
act = '#fe9801'

### Dataset Preparation

In [3]:
import plotly as py
py.offline.init_notebook_mode(connected=True)

In [4]:
import os

In [5]:
try: 
    os.system("rm -rf Covid-19-Preprocessed-Dataset")
except: 
    print("File does not exist")

In [6]:
df = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/covid_19_data_cleaned.csv', parse_dates=['Date'])

country_daywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/country_daywise.csv', parse_dates=['Date'])
countrywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/countrywise.csv')
daywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/daywise.csv', parse_dates=['Date'])



In [7]:
df['Province/State'] = df['Province/State'].fillna(" ")
df.head()

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,,Afghanistan,33.0,65.0,0,0,0,0
1,2020-01-23,,Afghanistan,33.0,65.0,0,0,0,0
2,2020-01-24,,Afghanistan,33.0,65.0,0,0,0,0
3,2020-01-25,,Afghanistan,33.0,65.0,0,0,0,0
4,2020-01-26,,Afghanistan,33.0,65.0,0,0,0,0


In [8]:
country_daywise.head()

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered,Active,New Cases,New Deaths,New Recovered
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0
1,2020-01-22,Albania,0,0,0,0,0,0,0
2,2020-01-22,Algeria,0,0,0,0,0,0,0
3,2020-01-22,Andorra,0,0,0,0,0,0,0
4,2020-01-22,Angola,0,0,0,0,0,0,0


In [9]:
countrywise.head()

Unnamed: 0,Country,Confirmed,Deaths,Recovered,Active,New Cases,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Population,Cases / Million People,Confirmed last week,1 week change,1 week % increase
0,Afghanistan,4033,115,502,3416,255,2.85,12.45,22.91,38928341,104.0,2469,1564,63.35
1,Albania,856,31,627,198,6,3.62,73.25,4.94,2877800,297.0,789,67,8.49
2,Algeria,5558,494,2546,2518,189,8.89,45.81,19.4,43851043,127.0,4295,1263,29.41
3,Andorra,754,48,545,161,2,6.37,72.28,8.81,77265,9759.0,747,7,0.94
4,Angola,43,2,13,28,0,4.65,30.23,15.38,32866268,1.0,35,8,22.86


In [10]:
daywise.head()

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active,New Cases,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,No. of Countries
0,2020-01-22,555,17,28,510,0,3.06,5.05,60.71,6
1,2020-01-23,654,18,30,606,99,2.75,4.59,60.0,8
2,2020-01-24,941,26,36,879,287,2.76,3.83,72.22,9
3,2020-01-25,1434,42,39,1353,493,2.93,2.72,107.69,11
4,2020-01-26,2118,56,52,2010,684,2.64,2.46,107.69,13


In [11]:
#Day by Day confirmed cases
confirmed = df.groupby(['Date']).sum()['Confirmed'].reset_index()
recovered = df.groupby(['Date']).sum()['Recovered'].reset_index()
deaths = df.groupby(['Date']).sum()['Deaths'].reset_index()
deaths.head()

Unnamed: 0,Date,Deaths
0,2020-01-22,17
1,2020-01-23,18
2,2020-01-24,26
3,2020-01-25,42
4,2020-01-26,56


In [12]:
df.isnull().sum()

Date              0
Province/State    0
Country           0
Lat               0
Long              0
Confirmed         0
Recovered         0
Deaths            0
Active            0
dtype: int64

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29430 entries, 0 to 29429
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            29430 non-null  datetime64[ns]
 1   Province/State  29430 non-null  object        
 2   Country         29430 non-null  object        
 3   Lat             29430 non-null  float64       
 4   Long            29430 non-null  float64       
 5   Confirmed       29430 non-null  int64         
 6   Recovered       29430 non-null  int64         
 7   Deaths          29430 non-null  int64         
 8   Active          29430 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 2.0+ MB


In [14]:
df.query('Country=="US"')

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
24525,2020-01-22,,US,37.0902,-95.7129,1,0,0,1
24526,2020-01-23,,US,37.0902,-95.7129,1,0,0,1
24527,2020-01-24,,US,37.0902,-95.7129,2,0,0,2
24528,2020-01-25,,US,37.0902,-95.7129,2,0,0,2
24529,2020-01-26,,US,37.0902,-95.7129,5,0,0,5
...,...,...,...,...,...,...,...,...,...
24629,2020-05-05,,US,37.0902,-95.7129,1204351,189791,71064,943496
24630,2020-05-06,,US,37.0902,-95.7129,1229331,189910,73455,965966
24631,2020-05-07,,US,37.0902,-95.7129,1257023,195036,75662,986325
24632,2020-05-08,,US,37.0902,-95.7129,1283929,198993,77180,1007756


### Worldwide Total confirmed, recovered, and deaths

In [15]:
confirmed.tail()

Unnamed: 0,Date,Confirmed
104,2020-05-05,3662691
105,2020-05-06,3756069
106,2020-05-07,3845718
107,2020-05-08,3938064
108,2020-05-09,4024009


In [16]:
recovered.tail()

Unnamed: 0,Date,Recovered
104,2020-05-05,1198832
105,2020-05-06,1245413
106,2020-05-07,1284741
107,2020-05-08,1322050
108,2020-05-09,1375624


In [17]:
deaths.tail()

Unnamed: 0,Date,Deaths
104,2020-05-05,257239
105,2020-05-06,263855
106,2020-05-07,269567
107,2020-05-08,274898
108,2020-05-09,279311


In [18]:
#Create a grafh object in plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=confirmed['Date'],y=confirmed['Confirmed'],
                         mode='lines+markers', name='Confirmed', line=dict(color='Orange', width=2)))
fig.add_trace(go.Scatter(x=recovered['Date'],y=recovered['Recovered'],
                         mode='lines+markers', name='Recovered', line=dict(color='Green', width=2)))
fig.add_trace(go.Scatter(x=deaths['Date'],y=deaths['Deaths'],
                         mode='lines+markers', name='Deaths', line=dict(color='Red', width=2)))

fig.update_layout(title='Worldwide Covid-19 Cases', xaxis_tickfont_size=14, yaxis=dict(title='Number of Cases'))

fig.show()

## Cases Density Animation on World Map

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29430 entries, 0 to 29429
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            29430 non-null  datetime64[ns]
 1   Province/State  29430 non-null  object        
 2   Country         29430 non-null  object        
 3   Lat             29430 non-null  float64       
 4   Long            29430 non-null  float64       
 5   Confirmed       29430 non-null  int64         
 6   Recovered       29430 non-null  int64         
 7   Deaths          29430 non-null  int64         
 8   Active          29430 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 2.0+ MB


In [20]:
df['Date'] = df['Date'].astype(str)

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29430 entries, 0 to 29429
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Date            29430 non-null  object 
 1   Province/State  29430 non-null  object 
 2   Country         29430 non-null  object 
 3   Lat             29430 non-null  float64
 4   Long            29430 non-null  float64
 5   Confirmed       29430 non-null  int64  
 6   Recovered       29430 non-null  int64  
 7   Deaths          29430 non-null  int64  
 8   Active          29430 non-null  int64  
dtypes: float64(2), int64(4), object(3)
memory usage: 2.0+ MB


In [22]:
fig = px.density_mapbox(data_frame=df,lat='Lat',lon='Long', hover_name='Country', 
                        hover_data=['Confirmed', 'Recovered', 'Deaths'], animation_frame='Date', color_continuous_scale='Portland', 
                        radius = 7, zoom=0, height=700)

fig.update_layout(title = 'Worldwide Covid-19 cases with time laps')
fig.update_layout(mapbox_style = 'open-street-map', mapbox_center_lon=0)

fig.show()

## Total Cases on Ships

In [23]:
df['Date'] = pd.to_datetime(df['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29430 entries, 0 to 29429
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            29430 non-null  datetime64[ns]
 1   Province/State  29430 non-null  object        
 2   Country         29430 non-null  object        
 3   Lat             29430 non-null  float64       
 4   Long            29430 non-null  float64       
 5   Confirmed       29430 non-null  int64         
 6   Recovered       29430 non-null  int64         
 7   Deaths          29430 non-null  int64         
 8   Active          29430 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 2.0+ MB


In [24]:
## Ships Row
ship_rows = df['Province/State'].str.contains('Grand Princess') | df['Province/State'].str.contains('Diamond Princess') | df['Country'].str.contains('Grand Princess') | df['Country'].str.contains('Diamond Princess') | df['Country'].str.contains('MS Zaandam')
ship = df[ship_rows]

df = df[~ship_rows]

In [25]:
ship_latest = ship[ship['Date'] == max(ship['Date'])]
ship_latest

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
4141,2020-05-09,Grand Princess,Canada,37.6489,-122.6655,13,0,0,13
9700,2020-05-09,,Diamond Princess,0.0,0.0,712,645,13,54
25287,2020-05-09,Diamond Princess,Canada,0.0,0.0,1,0,1,0
27576,2020-05-09,,MS Zaandam,0.0,0.0,9,0,2,7


In [26]:
ship_latest.style.background_gradient(cmap='Pastel1_r')

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
4141,2020-05-09 00:00:00,Grand Princess,Canada,37.6489,-122.6655,13,0,0,13
9700,2020-05-09 00:00:00,,Diamond Princess,0.0,0.0,712,645,13,54
25287,2020-05-09 00:00:00,Diamond Princess,Canada,0.0,0.0,1,0,1,0
27576,2020-05-09 00:00:00,,MS Zaandam,0.0,0.0,9,0,2,7


## Cases Over the Time with Area Plot

In [27]:
temp = df.groupby('Date')['Confirmed','Recovered','Deaths','Active'].sum().reset_index()
#temp

In [28]:
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
temp


Unnamed: 0,Date,Confirmed,Recovered,Deaths,Active
0,2020-05-09,4023274,1374979,279295,2369000


In [29]:
tm = temp.melt(id_vars = 'Date', value_vars = ['Active','Deaths','Recovered'])
tm

Unnamed: 0,Date,variable,value
0,2020-05-09,Active,2369000
1,2020-05-09,Deaths,279295
2,2020-05-09,Recovered,1374979


In [30]:
fig = px.treemap(tm, path=['variable'], values='value', height=250, width=800, color_discrete_sequence=[act,rec,dth])

fig.data[0].textinfo = 'label+text+value'
fig.show()

In [31]:
temp = df.groupby('Date')['Recovered','Deaths','Active'].sum().reset_index()
temp = temp.melt(id_vars='Date', value_vars=['Recovered', 'Deaths', 'Active'], var_name='Case', value_name='Count')
temp

Unnamed: 0,Date,Case,Count
0,2020-01-22,Recovered,28
1,2020-01-23,Recovered,30
2,2020-01-24,Recovered,36
3,2020-01-25,Recovered,39
4,2020-01-26,Recovered,52
...,...,...,...
322,2020-05-05,Active,2206546
323,2020-05-06,Active,2246727
324,2020-05-07,Active,2291336
325,2020-05-08,Active,2341042


In [32]:
fig = px.area(temp, x='Date', y='Count', color='Case', height=600, title='Cases over time', color_discrete_sequence=[rec,dth,act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

## Folium Maps

In [33]:
temp = df[df['Date']==max(df['Date'])]
temp

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
108,2020-05-09,,Afghanistan,33.000000,65.000000,4033,502,115,3416
217,2020-05-09,,Albania,41.153300,20.168300,856,627,31,198
326,2020-05-09,,Algeria,28.033900,1.659600,5558,2546,494,2518
435,2020-05-09,,Andorra,42.506300,1.521800,754,545,48,161
544,2020-05-09,,Angola,-11.202700,17.873900,43,13,2,28
...,...,...,...,...,...,...,...,...,...
28993,2020-05-09,,Tajikistan,38.861034,71.276093,612,0,20,592
29102,2020-05-09,,Canada,56.130400,-106.346800,0,31262,0,-31262
29211,2020-05-09,,Mozambique,-18.665700,35.529600,0,34,0,-34
29320,2020-05-09,,Syria,34.802100,38.996800,0,29,0,-29


In [34]:
m = folium.Map(location=[0, 0], tiles='cartodbpositron', min_zoom = 1, max_zoom=4, zoom_start=1)

for i in range(0, len(temp)):
    folium.Circle(location=[temp.iloc[i]['Lat'], temp.iloc[i]['Long']], color = 'crimson', fill = 'crimson',
                 tooltip = '<li><bold> Country: ' + str(temp.iloc[i]['Country'])+
                           '<li><bold> Province: ' + str(temp.iloc[i]['Province/State'])+
                           '<li><bold> Confirmed: ' + str(temp.iloc[i]['Confirmed'])+
                           '<li><bold> Deaths: ' + str(temp.iloc[i]['Deaths']),
                 radius = int(temp.iloc[i]['Confirmed'])**0.5).add_to(m)

m

## Confirmed Cases with Choropleth Map

In [35]:
country_daywise.head(10)

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered,Active,New Cases,New Deaths,New Recovered
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0
1,2020-01-22,Albania,0,0,0,0,0,0,0
2,2020-01-22,Algeria,0,0,0,0,0,0,0
3,2020-01-22,Andorra,0,0,0,0,0,0,0
4,2020-01-22,Angola,0,0,0,0,0,0,0
5,2020-01-22,Antigua and Barbuda,0,0,0,0,0,0,0
6,2020-01-22,Argentina,0,0,0,0,0,0,0
7,2020-01-22,Armenia,0,0,0,0,0,0,0
8,2020-01-22,Australia,0,0,0,0,0,0,0
9,2020-01-22,Austria,0,0,0,0,0,0,0


In [36]:
fig = px.choropleth(country_daywise, locations= 'Country', locationmode='country names', color = np.log(country_daywise['Confirmed']),
                   hover_name = 'Country', animation_frame=country_daywise['Date'].dt.strftime('%Y-%m-%d'),
                   title='Cases over time', color_continuous_scale=px.colors.sequential.Inferno)

fig.update(layout_coloraxis_showscale = True)
fig.show()

## Confirmed and Death Cases with Static Colormap 

In [37]:
fig_c = px.choropleth(countrywise, locations='Country', locationmode='country names',
                     color = np.log(countrywise['Confirmed']), hover_name = 'Country',
                     hover_data = ['Confirmed'])

temp = countrywise[countrywise['Deaths']>0]
fig_d = px.choropleth(temp, locations='Country', locationmode='country names',
                     color = np.log(temp['Deaths']), hover_name = 'Country',
                     hover_data = ['Deaths'])

fig = make_subplots(rows = 1, cols = 2, subplot_titles= ['Confirmed', 'Deaths'],
                   specs = [[{'type': 'choropleth'}, {'type': 'choropleth'}]])

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.update(layout_coloraxis_showscale = False) #Remove column bar

fig.show()

## Deaths and Recoveries per 100 Cases 

In [38]:
daywise.head()

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active,New Cases,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,No. of Countries
0,2020-01-22,555,17,28,510,0,3.06,5.05,60.71,6
1,2020-01-23,654,18,30,606,99,2.75,4.59,60.0,8
2,2020-01-24,941,26,36,879,287,2.76,3.83,72.22,9
3,2020-01-25,1434,42,39,1353,493,2.93,2.72,107.69,11
4,2020-01-26,2118,56,52,2010,684,2.64,2.46,107.69,13


In [39]:
fig_c = px.bar(daywise, x = 'Date', y = 'Confirmed', color_discrete_sequence=[act])
fig_d = px.bar(daywise, x = 'Date', y = 'Deaths', color_discrete_sequence=[dth])

fig = make_subplots(rows = 1, cols = 2, shared_xaxes=False, horizontal_spacing=0.1,
                   subplot_titles=('Confirmed Cases', 'Deaths Cases'))

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.update_layout(height = 400)

fig.show()

In [40]:
daywise.columns

Index(['Date', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'New Cases',
       'Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered',
       'No. of Countries'],
      dtype='object')

In [41]:
fig1 = px.line(daywise, x = 'Date', y = 'Deaths / 100 Cases', color_discrete_sequence=[dth])
fig2 = px.line(daywise, x = 'Date', y = 'Recovered / 100 Cases', color_discrete_sequence=[rec])
fig3 = px.line(daywise, x = 'Date', y = 'Deaths / 100 Recovered', color_discrete_sequence=['aqua'])

fig = make_subplots(rows = 1, cols = 3, shared_xaxes=False,
                   subplot_titles=('Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered'))

fig.add_trace(fig1['data'][0], row = 1, col = 1)
fig.add_trace(fig2['data'][0], row = 1, col = 2)
fig.add_trace(fig3['data'][0], row = 1, col = 3)

fig.update_layout(height = 400)
fig.show()

### New Cases and No. of Countries

In [42]:
fig_c = px.bar(daywise, x = 'Date', y = 'Confirmed', color_discrete_sequence=[act])
fig_d = px.bar(daywise, x = 'Date', y = 'No. of Countries', color_discrete_sequence=[dth])

fig = make_subplots(rows = 1, cols = 2, shared_xaxes=False, horizontal_spacing=0.1,
                   subplot_titles=('No. of New Cases per Day', 'No. of Countries'))

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.show()

### Top 15 Countries Case Analysis

In [43]:
countrywise.columns

Index(['Country', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'New Cases',
       'Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered',
       'Population', 'Cases / Million People', 'Confirmed last week',
       '1 week change', '1 week % increase'],
      dtype='object')

In [44]:
top  = 15

fig_c = px.bar(countrywise.sort_values('Confirmed').tail(top), x = 'Confirmed', y = 'Country',
              text = 'Confirmed', orientation='h', color_discrete_sequence=[cnf])
fig_d = px.bar(countrywise.sort_values('Deaths').tail(top), x = 'Deaths', y = 'Country',
              text = 'Deaths', orientation='h', color_discrete_sequence=[dth])


fig_a = px.bar(countrywise.sort_values('Active').tail(top), x = 'Active', y = 'Country',
              text = 'Active', orientation='h', color_discrete_sequence=['#434343'])
fig_r = px.bar(countrywise.sort_values('Recovered').tail(top), x = 'Recovered', y = 'Country',
              text = 'Recovered', orientation='h', color_discrete_sequence=[rec])


fig_dc = px.bar(countrywise.sort_values('Deaths / 100 Cases').tail(top), x = 'Deaths / 100 Cases', y = 'Country',
              text = 'Deaths / 100 Cases', orientation='h', color_discrete_sequence=['#f84351'])
fig_rc = px.bar(countrywise.sort_values('Recovered / 100 Cases').tail(top), x = 'Recovered / 100 Cases', y = 'Country',
              text = 'Recovered / 100 Cases', orientation='h', color_discrete_sequence=['#a45398'])


fig_nc = px.bar(countrywise.sort_values('New Cases').tail(top), x = 'New Cases', y = 'Country',
              text = 'New Cases', orientation='h', color_discrete_sequence=['#f04341'])
temp = countrywise[countrywise['Population']>1000000]
fig_p = px.bar(temp.sort_values('Cases / Million People').tail(top), x = 'Cases / Million People', y = 'Country',
              text = 'Cases / Million People', orientation='h', color_discrete_sequence=['#b40398'])



fig_wc = px.bar(countrywise.sort_values('1 week change').tail(top), x = '1 week change', y = 'Country',
              text = '1 week change', orientation='h', color_discrete_sequence=['#c04041'])
temp = countrywise[countrywise['Confirmed']>100]
fig_wi = px.bar(temp.sort_values('1 week % increase').tail(top), x = '1 week % increase', y = 'Country',
              text = '1 week % increase', orientation='h', color_discrete_sequence=['#b00398'])


fig = make_subplots(rows = 5, cols = 2, shared_xaxes=False, horizontal_spacing=0.2, 
                    vertical_spacing=.05,
                   subplot_titles=('Confirmed Cases', 'Deaths Reported', 'Recovered Cases', 'Active Cases',
                                  'Deaths / 100 Cases', 'Recovered / 100 Cases',
                                  'New Cases', 'Cases / Million People',
                                  '1 week change', '1 week % increase'))

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.add_trace(fig_r['data'][0], row = 2, col = 1)
fig.add_trace(fig_a['data'][0], row = 2, col = 2)

fig.add_trace(fig_dc['data'][0], row = 3, col = 1)
fig.add_trace(fig_rc['data'][0], row = 3, col = 2)

fig.add_trace(fig_nc['data'][0], row = 4, col = 1)
fig.add_trace(fig_p['data'][0], row = 4, col = 2)

fig.add_trace(fig_wc['data'][0], row = 5, col = 1)
fig.add_trace(fig_wi['data'][0], row = 5, col = 2)

fig.update_layout(height = 3000)
fig.show()


# Save Static Plots¶

In [45]:
# install
# conda install -c plotly plotly-orca==1.2.1 psutil requests

In [46]:
if not os.path.exists('images'):
    os.mkdir('images')

In [47]:
#fig.write_image('images/fig.png')

In [48]:
#fig.write_image('images/fig.jpeg')

In [49]:
#fig.write_image('images/fig.pdf')

# Scatter Plot for Deaths vs Confirmed Cases¶

In [50]:
top = 15
fig = px.scatter(countrywise.sort_values('Deaths', ascending = False).head(top), 
                x = 'Confirmed', y = 'Deaths', color = 'Country', size = 'Confirmed', height = 600,
                text = 'Country', log_x = True, log_y = True, title='Deaths vs Confirmed Cases (Cases are on log10 scale)')

fig.update_traces(textposition = 'top center')
fig.update_layout(showlegend = True)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()

# Confirmed, Deaths, New Cases vs Country and Date

## Bar Plot

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'Confirmed', color = 'Country', height = 600,
            title='Confirmed', color_discrete_sequence=px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'Deaths', color = 'Country', height = 600,
            title='Deaths', color_discrete_sequence=px.colors.cyclical.mygbm)
fig.show()

In [None]:
#fig = px.bar(country_daywise, x = 'Date', y = 'Recovered', color = 'Country', height = 600,
#            title='Recovered', color_discrete_sequence=px.colors.cyclical.mygbm)
#fig.show()

In [None]:
#fig = px.bar(country_daywise, x = 'Date', y = 'New Cases', color = 'Country', height = 600,
#            title='New Cases', color_discrete_sequence=px.colors.cyclical.mygbm)
#fig.show()

## Line Plot

In [None]:
fig = px.line(country_daywise, x = 'Date', y = 'Confirmed', color = 'Country', height = 600,
             title='Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.line(country_daywise, x = 'Date', y = 'Deaths', color = 'Country', height = 600,
             title='Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()


In [None]:
fig = px.line(country_daywise, x = 'Date', y = 'Recovered', color = 'Country', height = 600,
             title='Recovered', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

## Growth Rate after 100 Cases

In [None]:
df.head()

In [None]:
gt_100 = country_daywise[country_daywise['Confirmed']>100]['Country'].unique()
temp = df[df['Country'].isin(gt_100)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>100]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']

In [None]:
from_100th_case = pd.merge(temp, min_date, on = 'Country')
from_100th_case['N days'] = (from_100th_case['Date'] - from_100th_case['Min Date']).dt.days
from_100th_case

In [None]:
fig = px.line(from_100th_case, x = 'N days', y = 'Confirmed', color = 'Country', title = 'N days from 100 case', height = 600)
fig.show()

## Growth Rate after 1000 Cases

In [None]:
gt_1000 = country_daywise[country_daywise['Confirmed']>1000]['Country'].unique()
temp = df[df['Country'].isin(gt_1000)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>1000]


min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']


from_1000th_case = pd.merge(temp, min_date, on = 'Country')
from_1000th_case['N days'] = (from_1000th_case['Date'] - from_1000th_case['Min Date']).dt.days

fig = px.line(from_1000th_case, x = 'N days', y = 'Confirmed', color = 'Country', title = 'N days from 1000 case', height = 600)
fig.show()

## Growth Rate after 10,000 Cases

In [None]:
gt_10000 = country_daywise[country_daywise['Confirmed']>10000]['Country'].unique()
temp = df[df['Country'].isin(gt_10000)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>10000]


min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']


from_10000th_case = pd.merge(temp, min_date, on = 'Country')
from_10000th_case['N days'] = (from_10000th_case['Date'] - from_10000th_case['Min Date']).dt.days

fig = px.line(from_10000th_case, x = 'N days', y = 'Confirmed', color = 'Country', title = 'N days from 10000 case', height = 600)
fig.show()

## Growth Rate After 100k Cases

In [None]:
gt_100000 = country_daywise[country_daywise['Confirmed']>100000]['Country'].unique()
temp = df[df['Country'].isin(gt_100000)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>100000]


min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']


from_100000th_case = pd.merge(temp, min_date, on = 'Country')
from_100000th_case['N days'] = (from_100000th_case['Date'] - from_100000th_case['Min Date']).dt.days

fig = px.line(from_100000th_case, x = 'N days', y = 'Confirmed', color = 'Country', title = 'N days from 100000 case', height = 600)
fig.show()

## Tree Map Analysis

### Confirmed Cases

In [None]:
 max(df['Date'])

In [None]:
full_latest = df[df['Date'] == max(df['Date'])]

fig = px.treemap(full_latest.sort_values(by = 'Confirmed', ascending = False).reset_index(drop = True),
                path = ['Country', 'Province/State'], values = 'Confirmed', height = 700,
                title = 'Number of Confirmed Cases',
                color_discrete_sequence = px.colors.qualitative.Dark2)

fig.data[0].textinfo = 'label+text+value'
fig.show()

### Deaths Cases

In [None]:
full_latest = df[df['Date'] == max(df['Date'])]

fig = px.treemap(full_latest.sort_values(by = 'Deaths', ascending = False).reset_index(drop = True),
                path = ['Country', 'Province/State'], values = 'Deaths', height = 700,
                title = 'Number of Deaths Cases',
                color_discrete_sequence = px.colors.qualitative.Dark2)

fig.data[0].textinfo = 'label+text+value'
fig.show()

### First and Last Case Report Time

In [None]:
first_date = df[df['Confirmed']>0]
first_date = first_date.groupby('Country')['Date'].agg(['min']).reset_index()


last_date = df.groupby(['Country', 'Date'])['Confirmed', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index()

mask = (last_date['Country'] != last_date['Country'].shift(1))
mask

In [None]:
last_date.loc[mask, 'Confirmed'] = np.nan
last_date.loc[mask, 'Deaths'] = np.nan
last_date.loc[mask, 'Recovered'] = np.nan

last_date = last_date[last_date['Confirmed']>0]
last_date = last_date.groupby('Country')['Date'].agg(['max']).reset_index()
last_date

In [None]:
first_last = pd.concat([first_date, last_date['max']], axis = 1)
first_last

In [None]:
first_last['max'] = first_last['max'] + timedelta(days = 1)

first_last['Days'] = first_last['max'] - first_last['min']
first_last['Task'] = first_last['Country']

first_last.columns = ['Country', 'Start', 'Finish', 'Days', 'Task']

first_last = first_last.sort_values('Days')
first_last

In [None]:
colors = ['#' + ''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(first_last))]
colors

In [None]:
fig = ff.create_gantt(first_last, index_col = 'Country', colors = colors, show_colorbar = False,
                     bar_width=0.2, showgrid_x = True, showgrid_y=True, height = 2500)

fig.show()

## Confirmed Cases Country and Day wise

In [None]:
country_daywise.head()

In [None]:
temp = country_daywise.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Country'].isin(gt_10000)]

countries = temp['Country'].unique()
countries

In [None]:
ncols = 3
nrows = math.ceil(len(countries)/ncols)

fig = make_subplots(rows=nrows, cols = ncols, shared_xaxes= False, subplot_titles=countries)

for ind, country in enumerate(countries):
    row = int((ind/ncols)+1)
    col = int((ind%ncols)+1)
    fig.add_trace(go.Bar(x = temp['Date'], y = temp.loc[temp['Country']==country, 'Confirmed'], name = country), row = row, col = col)
    
fig.update_layout(height=4000, title_text = 'Confirmed Cases in Each Country')
fig.update_layout(showlegend = False)
fig.show()

# Covid-19 vs Other Similar Epidemics

In [None]:
full_latest

In [None]:
# Wikipedia Source

epidemics = pd.DataFrame({
    'epidemic' : ['COVID-19', 'SARS', 'EBOLA', 'MERS', 'H1N1'],
    'start_year' : [2019, 2002, 2013, 2012, 2009],
    'end_year' : [2020, 2004, 2016, 2020, 2010],
    'confirmed' : [full_latest['Confirmed'].sum(), 8422, 28646, 2519, 6724149],
    'deaths' : [full_latest['Deaths'].sum(), 813, 11323, 866, 19654]
})

epidemics['mortality'] = round((epidemics['deaths']/epidemics['confirmed'])*100, 2)

epidemics.head()

In [None]:
temp = epidemics.melt(id_vars='epidemic', value_vars=['confirmed', 'deaths', 'mortality'],
                     var_name='Case', value_name='Value')

temp

In [None]:
fig = px.bar(temp, x = 'epidemic', y = 'Value', color = 'epidemic', text = 'Value', facet_col = 'Case',
            color_discrete_sequence= px.colors.qualitative.Bold)

fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize = 8, uniformtext_mode = 'hide')
fig.update_yaxes(showticklabels = False)
fig.layout.yaxis2.update(matches = None)
fig.layout.yaxis3.update(matches = None)
fig.show()