In [18]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import folium


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import math
import random
from datetime import timedelta

import warnings
warnings.filterwarnings('ignore')

# COLOR PALLETTES
cnf = '#393e46'
dtf = '#ff2e63'
rec = '#21bf73'
act = '#fe9801'

## Dataset Preparation

In [19]:
import plotly as py
py.offline.init_notebook_mode(connected=True)

In [20]:
df = pd.read_csv(r"C:\Users\HP\Downloads\Data Analysis\New folder\Preprocessed Covid19 Dataset\Preprocessed\covid_19_cleaned_data.csv", parse_dates=['Date'])

country_daywise = pd.read_csv(r"C:\Users\HP\Downloads\Data Analysis\New folder\Preprocessed Covid19 Dataset\Preprocessed\country_daywise_data.csv", parse_dates=['Date'])

countrywise = pd.read_csv(r"C:\Users\HP\Downloads\Data Analysis\New folder\Preprocessed Covid19 Dataset\Preprocessed\countrywise_data.csv")

daywise = pd.read_csv(r"C:\Users\HP\Downloads\Data Analysis\New folder\Preprocessed Covid19 Dataset\Preprocessed\daywise_data.csv", parse_dates=['Date'])

In [21]:
df['Province/State'] = df['Province/State'].fillna("")
df.head()

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,,Afghanistan,33.93911,67.709953,0,0,0,0
1,2020-01-23,,Afghanistan,33.93911,67.709953,0,0,0,0
2,2020-01-24,,Afghanistan,33.93911,67.709953,0,0,0,0
3,2020-01-25,,Afghanistan,33.93911,67.709953,0,0,0,0
4,2020-01-26,,Afghanistan,33.93911,67.709953,0,0,0,0


In [22]:
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
recovered = df.groupby('Date').sum()['Recovered'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
deaths.head()

Unnamed: 0,Date,Deaths
0,2020-01-22,17
1,2020-01-23,18
2,2020-01-24,26
3,2020-01-25,42
4,2020-01-26,56


In [23]:
df.isnull().sum()

Date              0
Province/State    0
Country           0
Lat               0
Long              0
Confirmed         0
Recovered         0
Deaths            0
Active            0
dtype: int64

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175275 entries, 0 to 175274
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   Date            175275 non-null  datetime64[ns]
 1   Province/State  175275 non-null  object        
 2   Country         175275 non-null  object        
 3   Lat             175275 non-null  float64       
 4   Long            175275 non-null  float64       
 5   Confirmed       175275 non-null  int64         
 6   Recovered       175275 non-null  int64         
 7   Deaths          175275 non-null  int64         
 8   Active          175275 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 12.0+ MB


In [25]:
df.query('Country == "US"')

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
156210,2020-01-22,,US,40.0,-100.0,1,0,0,1
156211,2020-01-23,,US,40.0,-100.0,1,0,0,1
156212,2020-01-24,,US,40.0,-100.0,2,0,0,2
156213,2020-01-25,,US,40.0,-100.0,2,0,0,2
156214,2020-01-26,,US,40.0,-100.0,5,0,0,5
...,...,...,...,...,...,...,...,...,...
156820,2021-09-23,,US,40.0,-100.0,42675416,0,684573,41990843
156821,2021-09-24,,US,40.0,-100.0,42852871,0,687084,42165787
156822,2021-09-25,,US,40.0,-100.0,42900402,0,687746,42212656
156823,2021-09-26,,US,40.0,-100.0,42931354,0,688032,42243322


## Worldwide Total Confirmed, Recovered and Deaths

In [26]:
confirmed.tail()

Unnamed: 0,Date,Confirmed
610,2021-09-23,230604728
611,2021-09-24,231152493
612,2021-09-25,231515976
613,2021-09-26,231846936
614,2021-09-27,232316272


In [27]:
recovered.tail()

Unnamed: 0,Date,Recovered
610,2021-09-23,0
611,2021-09-24,0
612,2021-09-25,0
613,2021-09-26,0
614,2021-09-27,0


In [28]:
deaths.tail()

Unnamed: 0,Date,Deaths
610,2021-09-23,4729076
611,2021-09-24,4737904
612,2021-09-25,4743196
613,2021-09-26,4748335
614,2021-09-27,4756251


In [9]:
# fig = go.Figure()

# fig.add_trace(go.Scatter(x = confirmed['Date'], y = confirmed['Confirmed'], mode = 'lines+markers', name = 'Confirmed', line = dict(color = "Orange", width = 2)))
# fig.add_trace(go.Scatter(x = recovered['Date'], y = recovered['Recovered'], mode = 'lines+markers', name = 'Recovered', line = dict(color = "Green", width = 2)))
# fig.add_trace(go.Scatter(x = deaths['Date'], y = deaths['Deaths'], mode = 'lines+markers', name = 'Deaths', line = dict(color = "Red", width = 2)))
# fig.update_layout(title = 'Worldwide COVID-19 Cases', xaxis_tickfont_size = 14, yaxis = dict(title = 'Number of Cases'))
# fig.show()

## Cases Density Animation on World Map

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175275 entries, 0 to 175274
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   Date            175275 non-null  datetime64[ns]
 1   Province/State  175275 non-null  object        
 2   Country         175275 non-null  object        
 3   Lat             175275 non-null  float64       
 4   Long            175275 non-null  float64       
 5   Confirmed       175275 non-null  int64         
 6   Recovered       175275 non-null  int64         
 7   Deaths          175275 non-null  int64         
 8   Active          175275 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 12.0+ MB


In [31]:
df['Date'] = df['Date'].astype(str)

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175275 entries, 0 to 175274
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   Date            175275 non-null  object 
 1   Province/State  175275 non-null  object 
 2   Country         175275 non-null  object 
 3   Lat             175275 non-null  float64
 4   Long            175275 non-null  float64
 5   Confirmed       175275 non-null  int64  
 6   Recovered       175275 non-null  int64  
 7   Deaths          175275 non-null  int64  
 8   Active          175275 non-null  int64  
dtypes: float64(2), int64(4), object(3)
memory usage: 12.0+ MB


In [8]:
# fig = px.density_mapbox(df, lat = 'Lat', lon = 'Long', hover_name = 'Country', hover_data = ['Confirmed', 'Recovered', 'Deaths'], animation_frame = 'Date', color_continuous_scale = 'Portland', radius = 7, zoom = 0, height = 700)
# fig.update_layout(title = 'Worldwide COVID-19 Cases with Time Laps')
# fig.update_layout(mapbox_style = 'open-street-map', mapbox_center_lon = 0)
# fig.show()

## Total Cases on Ships

In [34]:
df['Date'] = pd.to_datetime(df['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175275 entries, 0 to 175274
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   Date            175275 non-null  datetime64[ns]
 1   Province/State  175275 non-null  object        
 2   Country         175275 non-null  object        
 3   Lat             175275 non-null  float64       
 4   Long            175275 non-null  float64       
 5   Confirmed       175275 non-null  int64         
 6   Recovered       175275 non-null  int64         
 7   Deaths          175275 non-null  int64         
 8   Active          175275 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 12.0+ MB


In [10]:
# ship_rows = df['Province/State'].str.contains('Grand Princess') | df['Province/State'].str.contains('Diamond Princess') | df['Country'].str.contains('Grand Princess') | df['Country'].str.contains('Diamond Princess') | df['Country'].str.contains('MS Zaandam')
# ship = df[ship_rows]

# df = df[~ship_rows]

In [42]:
ship_latest= ship[ship['Date'] == max(ship['Date'])]
ship_latest

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
25829,2021-09-27,Diamond Princess,Canada,0.0,0.0,0,0,1,-1
26444,2021-09-27,Grand Princess,Canada,0.0,0.0,13,0,0,13
65189,2021-09-27,,Diamond Princess,0.0,0.0,712,0,13,699
107009,2021-09-27,,MS Zaandam,0.0,0.0,9,0,2,7


In [43]:
ship_latest.style.background_gradient(cmap = 'Pastel1_r')

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
25829,2021-09-27 00:00:00,Diamond Princess,Canada,0.0,0.0,0,0,1,-1
26444,2021-09-27 00:00:00,Grand Princess,Canada,0.0,0.0,13,0,0,13
65189,2021-09-27 00:00:00,,Diamond Princess,0.0,0.0,712,0,13,699
107009,2021-09-27 00:00:00,,MS Zaandam,0.0,0.0,9,0,2,7


## Cases Over the Time with Area Plot

In [7]:
# temp = df.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
# temp = temp[temp['Date'] == max(temp['Date'])].reset_index(drop = True)

# temp
# tm = temp.melt(id_vars = 'Date', value_vars = ['Active', 'Deaths', 'Recovered'])
# fig = px.treemap(tm, path = ['variable'], values = 'value', height = 250, width = 800, color_discrete_sequence = [act,rec, dtf])

# fig.data[0].textinfo = 'label+text+value'
# fig.show()

In [6]:
# temp = df.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
# temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered', 'Deaths', 'Active'], var_name = 'Case', value_name = 'Count')

# fig = px.area(temp, x = 'Date', y = 'Count', color = 'Case', height = 400, title = 'Cases over time', color_discrete_sequence = [rec, dtf, act])
# fig.update_layout(xaxis_rangeslider_visible = True)
# fig.show()

## Folium Maps

In [61]:
# Worldwise Cases on Folium Maps

In [5]:
# temp = df[df['Date'] == max(df['Date'])]

# m = folium.Map(location=[0, 0], tiles='cartodbpositron', min_zoom = 1, max_zoom = 4, zom_start = 1)

# for i in range(0, len(temp)):
#     folium.Circle(location=[temp.iloc[i]['Lat'], temp.iloc[i]['Long']], color = 'crimson', fill = 'crimson',
#                   tooltip = '<li><bold> Country: ' + str(temp.iloc[i]['Country'])+
#                             '<li><bold> Province: ' + str(temp.iloc[i]['Province/State'])+
#                             '<li><bold> Confirmed: ' + str(temp.iloc[i]['Confirmed'])+
#                             '<li><bold> Deaths: ' + str(temp.iloc[i]['Deaths']),
#                             radius = int(temp.iloc[i]['Confirmed'])**0.5).add_to(m)
# m

## Confirmed Cases wth Choropleth Map

In [68]:
country_daywise.head()

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered,Active,New Cases,New Recovered,New Deaths
0,2020-01-23,Afghanistan,0,0,0,0,0,0,0
1,2020-01-24,Afghanistan,0,0,0,0,0,0,0
2,2020-01-25,Afghanistan,0,0,0,0,0,0,0
3,2020-01-26,Afghanistan,0,0,0,0,0,0,0
4,2020-01-27,Afghanistan,0,0,0,0,0,0,0


In [11]:
# fig = px.choropleth(country_daywise, locations = 'Country', locationmode = 'country names', color = country_daywise['Confirmed'],
#                    hover_name = 'Country', animation_frame = country_daywise['Date'].dt.strftime("%Y-%m-%d"),
#                    title = 'Cases over time', color_continuous_scale = px.colors.sequential.Inferno)

# fig.update(layout_coloraxis_showscale=True)
# fig.show()

## Deaths and Recoveries per 100 Cases

In [76]:
daywise.head()

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active,New Cases,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,No. of Countries
0,2020-01-23,655,18,32,605,99,2.75,4.89,56.25,8
1,2020-01-24,941,26,39,876,288,2.76,4.14,66.67,9
2,2020-01-25,1434,42,42,1350,495,2.93,2.93,100.0,11
3,2020-01-26,2118,56,56,2006,684,2.64,2.64,100.0,13
4,2020-01-27,2927,82,65,2780,809,2.8,2.22,126.15,16


In [1]:
# fig_c = px.bar(daywise, x = 'Date', y = 'Confirmed', color_discrete_sequence = [act])
# fig_d = px.bar(daywise, x = 'Date', y = 'Deaths', color_discrete_sequence = [dtf])

# fig = make_subplots(rows = 1, cols = 2, shared_xaxes = False, horizontal_spacing = 0.1,
#                    subplot_titles = ('Confirmed Cases', 'Deaths Cases'))

# fig.add_trace(fig_c['data'][0], row = 1, col = 1)
# fig.add_trace(fig_d['data'][0], row = 1, col = 2)

# fig.update_layout(height = 400)

# fig.show()

## Confirmed and Death Cases with Static Colormap

In [2]:
# fig_c = px.choropleth(countrywise, locations = 'Country', locationmode = 'country names', color = np.log(countrywise['Confirmed']), hover_name = 'Country', hover_data = ['Confirmed'])

# temp = countrywise[countrywise['Deaths']>0]

# fig_d = px.choropleth(temp, locations = 'Country', locationmode = 'country names', color = np.log(temp['Deaths']), hover_name = 'Country', hover_data = ['Deaths'])

# fig = make_subplots(rows = 1, cols = 2, subplot_titles = ['Confirmed', 'Deaths'], specs = [[{'type':'choropleth'},{'type':'choropleth'}]])

# fig.add_trace(fig_c['data'][0], row = 1, col = 1)
# fig.add_trace(fig_d['data'][0], row = 1, col = 2)

# fig.update(layout_coloraxis_showscale = False)

# fig.show()

In [99]:
daywise.columns

Index(['Date', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'New Cases',
       'Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered',
       'No. of Countries'],
      dtype='object')

In [3]:
# fig1 = px.line(daywise, x = 'Date', y = 'Deaths / 100 Cases', color_discrete_sequence=[dtf])
# fig2 = px.line(daywise, x = 'Date', y = 'Recovered / 100 Cases', color_discrete_sequence=[rec])
# fig3 = px.line(daywise, x = 'Date', y = 'Deaths / 100 Recovered', color_discrete_sequence=['aqua'])

# fig = make_subplots(rows = 1, cols = 3, shared_xaxes = False,
#                     subplot_titles = ('Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered'))

# fig.add_trace(fig1['data'][0], row = 1, col = 1)
# fig.add_trace(fig2['data'][0], row = 1, col = 2)
# fig.add_trace(fig3['data'][0], row = 1, col = 3)

# fig.update_layout(height = 400)

# fig.show()

## New Cases and No. of Countries

In [4]:
# fig_c= px.bar(daywise, x = 'Date', y = 'Confirmed', color_discrete_sequence=[act])
# fig_d = px.bar(daywise, x = 'Date', y = 'No. of Countries', color_discrete_sequence=[dtf])

# fig = make_subplots(rows = 1,cols = 2, shared_xaxes = False, horizontal_spacing=0.1,
#                    subplot_titles = ('No. of New Cases per Day', 'No. of Countries'))

# fig.add_trace(fig_c['data'][0], row = 1, col = 1)
# fig.add_trace(fig_d['data'][0], row = 1, col = 2)

## Top 15 Countries Case Analysis

In [107]:
countrywise.columns

Index(['Country', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'New Cases',
       'Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered',
       'Population', 'Cases / Million People', 'Confirmed last week',
       '1 week change', '1 week % increase'],
      dtype='object')

In [17]:
# top = 15

# fig_c = px.bar(countrywise.sort_values('Confirmed').tail(top), x = 'Confirmed', y = 'Country',
#               text = 'Confirmed', orientation='h', color_discrete_sequence=[act])

# fig_d = px.bar(countrywise.sort_values('Deaths').tail(top), x = 'Deaths', y = 'Country',
#               text = 'Deaths', orientation='h', color_discrete_sequence=[dtf])


# fig_a = px.bar(countrywise.sort_values('Active').tail(top), x = 'Active', y = 'Country',
#               text = 'Active', orientation='h', color_discrete_sequence=['#434343'])

# fig_r = px.bar(countrywise.sort_values('Recovered').tail(top), x = 'Recovered', y = 'Country',
#               text = 'Recovered', orientation='h', color_discrete_sequence=[rec])


# fig_dc = px.bar(countrywise.sort_values('Deaths / 100 Cases').tail(top), x = 'Deaths / 100 Cases', y = 'Country',
#               text = 'Deaths / 100 Cases', orientation='h', color_discrete_sequence=['#f84351'])

# fig_rc = px.bar(countrywise.sort_values('Recovered / 100 Cases').tail(top), x = 'Recovered / 100 Cases', y = 'Country',
#               text = 'Recovered / 100 Cases', orientation='h', color_discrete_sequence=['#a45398'])


# fig_nc = px.bar(countrywise.sort_values('New Cases').tail(top), x = 'New Cases', y = 'Country',
#               text = 'New Cases', orientation='h', color_discrete_sequence=['#f04341'])

# temp = countrywise[countrywise['Population']>1000000]
# fig_p = px.bar(temp.sort_values('Cases / Million People').tail(top), x = 'Cases / Million People', y = 'Country',
#               text = 'Cases / Million People', orientation='h', color_discrete_sequence=['#b40398'])


# fig_wc = px.bar(countrywise.sort_values('1 week change').tail(top), x = '1 week change', y = 'Country',
#               text = '1 week change', orientation='h', color_discrete_sequence=['#c04041'])

# temp = countrywise[countrywise['Confirmed']>100]
# fig_wi = px.bar(temp.sort_values('1 week % increase').tail(top), x = '1 week % increase', y = 'Country',
#               text = '1 week % increase', orientation='h', color_discrete_sequence=['#b00398'])



# fig = make_subplots(rows = 5, cols = 2, shared_xaxes=False, horizontal_spacing=0.2,
#                    vertical_spacing=0.05,
#                    subplot_titles=('Confirmed Cases', 'Deaths Reported', 'Recovered Cases', 'Active Cases',
#                                    'Deaths / 100 Cases', 'Recovered / 100 Cases', 'New Cases', 'Cases / Million People',
#                                     '1 Week Change', '1 week % increase'))

# fig.add_trace(fig_c['data'][0], row = 1, col = 1)
# fig.add_trace(fig_d['data'][0], row = 1, col = 2)

# fig.add_trace(fig_r['data'][0], row = 2, col = 1)
# fig.add_trace(fig_a['data'][0], row = 2, col = 2)

# fig.add_trace(fig_dc['data'][0], row = 3, col = 1)
# fig.add_trace(fig_rc['data'][0], row = 3, col = 2)

# fig.add_trace(fig_nc['data'][0], row = 4, col = 1)
# fig.add_trace(fig_p['data'][0], row = 4, col = 2)

# fig.add_trace(fig_wc['data'][0], row = 5, col = 1)
# fig.add_trace(fig_wi['data'][0], row = 5, col = 2)

# fig.update_layout(height = 3000)

# fig.show()

## Save Static Plots

In [123]:
# import os
# if not os.path.exists('images'):
#     os.mkdir('images')

In [125]:
# fig.write_image('image/fig.png')
# fig.write_image('image/fig.jpeg')
# fig.write_image('image/fig.pdf')

## Scatter Plot for Deaths vs Confirmed Cases

In [16]:
# top = 15
# fig = px.scatter(countrywise.sort_values('Deaths', ascending=False).head(top),
#                 x = 'Confirmed', y = 'Deaths', color = 'Country', size = 'Confirmed',
#                 height = 600, text = 'Country', log_x = True, log_y = True,
#                 title = 'Deaths vs Confirmed Cases (Cases are on log10 scale)')

# fig.update_traces(textposition = 'top center')
# fig.update_layout(showlegend = True)
# fig.update_layout(xaxis_rangeslider_visible = True)
# fig.show()

## Confirmed, Deaths, New Cases vs Country and Date

## Bar Plot

In [15]:
# fig = px.bar(country_daywise, x = 'Date', y = 'Confirmed', color = 'Country', height = 600,
#             title = 'Confirmed', color_discrete_sequence=px.colors.cyclical.mygbm)

# fig.show()

In [14]:
# fig = px.bar(country_daywise, x = 'Date', y = 'Deaths', color = 'Country', height = 600,
#             title = 'Deaths', color_discrete_sequence=px.colors.cyclical.mygbm)

# fig.show()

In [13]:
# fig = px.bar(country_daywise, x = 'Date', y = 'Recovered', color = 'Country', height = 600,
#             title = 'Recovered', color_discrete_sequence=px.colors.cyclical.mygbm)

# fig.show()

In [12]:
# fig = px.bar(country_daywise, x = 'Date', y = 'New Cases', color = 'Country', height = 600,
#             title = 'New Cases', color_discrete_sequence=px.colors.cyclical.mygbm)

# fig.show()

## Line Plot

In [11]:
# fig = px.line(country_daywise, x = 'Date', y = 'Confirmed', color = 'Country', height = 600,
#              title = 'Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)

# fig.show()

# fig = px.line(country_daywise, x = 'Date', y = 'Deaths', color = 'Country', height = 600,
#              title = 'Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)

# fig.show()

# fig = px.line(country_daywise, x = 'Date', y = 'Recovered', color = 'Country', height = 600,
#              title = 'Recovered', color_discrete_sequence = px.colors.cyclical.mygbm)

# fig.show()

## Growth Rate after 100 Cases

In [139]:
df.head()

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,,Afghanistan,33.93911,67.709953,0,0,0,0
1,2020-01-23,,Afghanistan,33.93911,67.709953,0,0,0,0
2,2020-01-24,,Afghanistan,33.93911,67.709953,0,0,0,0
3,2020-01-25,,Afghanistan,33.93911,67.709953,0,0,0,0
4,2020-01-26,,Afghanistan,33.93911,67.709953,0,0,0,0


In [10]:
# gt_100 = country_daywise[country_daywise['Confirmed']>100]['Country'].unique()
# temp = df[df['Country'].isin(gt_100)]

# temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
# temp = temp[temp['Confirmed']>100]


# min_date = temp.groupby('Country')['Date'].min().reset_index()
# min_date.columns = ['Country', 'Min Date']


# from_100th_case = pd.merge(temp, min_date, on = 'Country')
# from_100th_case['N days'] = (from_100th_case['Date'] - from_100th_case['Min Date']).dt.days


# fig = px.line(from_100th_case, x = 'N days', y = 'Confirmed', color = 'Country',
#              title = 'N days from 100 case', height = 600)

# fig.show()

## Growth Rate after 1000 Cases

In [9]:
# gt_1000 = country_daywise[country_daywise['Confirmed']>1000]['Country'].unique()
# temp = df[df['Country'].isin(gt_1000)]

# temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
# temp = temp[temp['Confirmed']>1000]


# min_date = temp.groupby('Country')['Date'].min().reset_index()
# min_date.columns = ['Country', 'Min Date']


# from_1000th_case = pd.merge(temp, min_date, on = 'Country')
# from_1000th_case['N days'] = (from_1000th_case['Date'] - from_1000th_case['Min Date']).dt.days


# fig = px.line(from_1000th_case, x = 'N days', y = 'Confirmed', color = 'Country',
#              title = 'N days from 1000 case', height = 600)

# fig.show()

## Growth Rate after 1000 Cases

In [6]:
# gt_10000 = country_daywise[country_daywise['Confirmed']>10000]['Country'].unique()
# temp = df[df['Country'].isin(gt_10000)]

# temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
# temp = temp[temp['Confirmed']>10000]


# min_date = temp.groupby('Country')['Date'].min().reset_index()
# min_date.columns = ['Country', 'Min Date']


# from_10000th_case = pd.merge(temp, min_date, on = 'Country')
# from_10000th_case['N days'] = (from_10000th_case['Date'] - from_10000th_case['Min Date']).dt.days


# fig = px.line(from_10000th_case, x = 'N days', y = 'Confirmed', color = 'Country',
#              title = 'N days from 10000 case', height = 600)

# fig.show()

## Growth Rate After 100k Cases

In [4]:
# gt_100000 = country_daywise[country_daywise['Confirmed']>100000]['Country'].unique()
# temp = df[df['Country'].isin(gt_100000)]

# temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
# temp = temp[temp['Confirmed']>100000]


# min_date = temp.groupby('Country')['Date'].min().reset_index()
# min_date.columns = ['Country', 'Min Date']


# from_100000th_case = pd.merge(temp, min_date, on = 'Country')
# from_100000th_case['N days'] = (from_100000th_case['Date'] - from_100000th_case['Min Date']).dt.days


# fig = px.line(from_100000th_case, x = 'N days', y = 'Confirmed', color = 'Country',
#              title = 'N days from 100000 case', height = 600)

# fig.show()

## Tree Map Analysis

### Confirmed Cases

In [5]:
# full_latest = df[df['Date'] == max(df['Date'])]

# fig = px.treemap(full_latest.sort_values(by = 'Confirmed', ascending = False).reset_index(drop = True),
#                 path = ['Country', 'Province/State'], values = 'Confirmed', height = 700,
#                 title = 'Number of Confirmed Cases',
#                 color_discrete_sequence = px.colors.qualitative.Dark2)

# fig.data[0].textinfo = 'label+text+value'
# fig.show()

### Deaths Cases

In [8]:
# full_latest = df[df['Date'] == max(df['Date'])]

# fig = px.treemap(full_latest.sort_values(by = 'Deaths', ascending = False).reset_index(drop = True),
#                 path = ['Country', 'Province/State'], values = 'Confirmed', height = 700,
#                 title = 'Number of Deaths Cases',
#                 color_discrete_sequence = px.colors.qualitative.Dark2)

# fig.data[0].textinfo = 'label+text+value'
# fig.show()

## First and Last Case Report Time

In [7]:
# first_date = df[df['Confirmed']>0]
# first_date = first_date.groupby('Country')['Date'].agg(['min']).reset_index()


# last_date = df.groupby(['Country', 'Date'])['Confirmed', 'Deaths', 'Recovered']
# last_date = last_date.sum().diff().reset_index()

# mask= last_date['Country'] != last_date['Country'].shift(1)

# last_date.loc[mask, 'Confirmed'] = np.nan
# last_date.loc[mask, 'Deaths'] = np.nan
# last_date.loc[mask, 'Recovered'] = np.nan

# last_date = last_date[last_date['Confirmed']>0]
# last_date = last_date.groupby('Country')['Date'].agg(['max']).reset_index()


# first_last = pd.concat([first_date, last_date['max']], axis = 1)
# first_last['max'] = first_last['max'] + timedelta(days = 1)

# first_last['Days'] = first_last['max'] - first_last['min']
# first_last['Task'] = first_last['Country']

# first_last.columns = ['Country', 'Start', 'Finish', 'Days', 'Task']

# first_last = first_last.sort_values('Days')

# colors = ['#' + ''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(first_last))]

# fig = ff.create_gantt(first_last, index_col = 'Country', colors = colors, show_colorbar = False,
#                      bar_width = 0.2, showgrid_x = True, showgrid_y = True, height = 2500)

# fig.show()

## Confirmed Cases Country and Day wise

In [171]:
country_daywise.head()

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered,Active,New Cases,New Recovered,New Deaths
0,2020-01-23,Afghanistan,0,0,0,0,0,0,0
1,2020-01-24,Afghanistan,0,0,0,0,0,0,0
2,2020-01-25,Afghanistan,0,0,0,0,0,0,0
3,2020-01-26,Afghanistan,0,0,0,0,0,0,0
4,2020-01-27,Afghanistan,0,0,0,0,0,0,0


In [174]:
# temp = country_daywise.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
# temp = temp[temp['Confirmed'].isin(gt_10000)]

# countries = temp['Country'].unique()

# ncols = 3
# nrows = math.ceil(len(countries)/ncols)

# fig = make_subplots(rows=nrows, cols = ncols, shared_xaxes= False, subplot_titles = countries)

# for ind, country in enumerate(countries):
#     row = int((ind/ncols)+1)
#     col = int((ind/ncols)+1)
#     fig.add_trace(go.Bar(x = temp['Date'], y = temp.loc[temp['Country']==country, 'Confirmed'], name = country), row = row, col = col)
    
# fig.update_layout(height = 4000, title_text = 'Confirmed Cases in Each Country')
# fig.update_layout(showlegend = False)
# fig.show()

## Covid-19 vs Other Similar Epidemics

In [175]:
full_latest

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
614,2021-09-27,,Afghanistan,33.93911,67.709953,155072,0,7200,147872
1229,2021-09-27,,Albania,41.15330,20.168300,168188,0,2653,165535
1844,2021-09-27,,Algeria,28.03390,1.659600,202877,0,5786,197091
2459,2021-09-27,,Andorra,42.50630,1.521800,15189,0,130,15059
3074,2021-09-27,,Angola,-11.20270,17.873900,55583,0,1513,54070
...,...,...,...,...,...,...,...,...,...
172814,2021-09-27,Hebei,China,37.89570,114.904200,0,0,0,0
173429,2021-09-27,Henan,China,33.88200,113.614000,0,0,0,0
174044,2021-09-27,,Mozambique,-18.66570,35.529600,0,0,0,0
174659,2021-09-27,,Syria,34.80210,38.996800,0,0,0,0


In [179]:
# wikipedia Source

epidemics = pd.DataFrame({
    'epidemic' : ['COVID-19', 'SARS', 'EBOLA', 'MERS', 'H1N1'],
    'start_year' : [2019, 2002, 2013, 2012, 2009],
    'end_year' : [2020, 2004, 2016, 2020, 2010],
    'confirmed' : [full_latest['Confirmed'].sum(), 8422, 28646, 2519, 6724149],
    'deaths' : [full_latest['Deaths'].sum(), 813, 11323, 866, 19654]
})

epidemics['mortality'] = round((epidemics['deaths']/epidemics['confirmed'])*100, 2)

epidemics.head()

Unnamed: 0,epidemic,start_year,end_year,confirmed,deaths,mortality
0,COVID-19,2019,2020,232315538,4756235,2.05
1,SARS,2002,2004,8422,813,9.65
2,EBOLA,2013,2016,28646,11323,39.53
3,MERS,2012,2020,2519,866,34.38
4,H1N1,2009,2010,6724149,19654,0.29


In [185]:
temp = epidemics.melt(id_vars = 'epidemic', value_vars = ['confirmed', 'deaths', 'mortality'],
                     var_name = 'Case', value_name = 'Value')
fig = px.bar(temp, x = 'epidemic', y = 'Value', color = 'epidemic', text = 'Value', facet_col = 'Case',
            color_discrete_sequence=px.colors.qualitative.Bold)

fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize = 8, uniformtext_mode = 'hide')
fig.update_yaxes(showticklabels = False)
fig.layout.yaxis2.update(matches = None)
fig.layout.yaxis3.update(matches = None)
fig.show()