# New Cases and No. of Countries

In [None]:
# import
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import folium

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import math 
import random
from datetime import timedelta

import warnings 
warnings.filterwarnings('ignore')

# color pallette 
cnf = '#393e46'
dth = '#ff2e63'
rec = '#9ACD32'
act = '#fe9801'

In [None]:
import plotly as py
py.offline.init_notebook_mode(connected=True)

In [None]:
import os

In [None]:
try:
    os.system("rm - rf Covid-19-Preprocessed-Dataset")
except:
    print("File does not exist")

In [None]:
!git clone https://github.com/laxmimerit/Covid-19-Preprocessed-Dataset

In [None]:
df = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/covid_19_data_cleaned.csv', parse_dates=['Date'])
country_daywise= pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/country_daywise.csv', parse_dates=['Date'])
countrywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/countrywise.csv')
daywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/daywise.csv', parse_dates=['Date'])

In [None]:
df['Date']= pd.to_datetime(df['Date'])
df.info()

In [None]:
df['Province/State']= df['Province/State'].fillna("")
df.tail()

In [None]:
fig_c = px.bar(daywise, x = 'Date', y = 'Confirmed', color_discrete_sequence = [act])
fig_d = px.bar(daywise, x = 'Date', y = 'No. of Countries', color_discrete_sequence = [dth])

fig = make_subplots(rows = 1, cols = 2, shared_xaxes=False, horizontal_spacing=0.1,
                    subplot_titles=('No. of New Cases per Day', 'No. of Countries'))

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.show()

# Top 15 Countries Case Analysis

In [None]:
countrywise.columns

In [None]:
top = 20

fig_c = px.bar(countrywise.sort_values('Confirmed').tail(top), x='Confirmed', y ='Country',
               text = 'Confirmed', orientation = 'h', color_discrete_sequence=[cnf])
fig_d = px.bar(countrywise.sort_values('Deaths').tail(top), x='Deaths', y ='Country',
               text = 'Deaths', orientation = 'h', color_discrete_sequence=[dth])


fig_a = px.bar(countrywise.sort_values('Active').tail(top), x = 'Active', y = 'Country',
               text= 'Active', orientation='h', color_discrete_sequence=[act])
fig_r = px.bar(countrywise.sort_values('Recovered').tail(top), x = 'Recovered', y = 'Country',
               text= 'Recovered', orientation='h', color_discrete_sequence=[rec])


fig_dc = px.bar(countrywise.sort_values('Deaths / 100 Cases'). tail(top), x = 'Deaths / 100 Cases',y = 'Country',
                text = 'Deaths / 100 Cases', orientation = 'h', color_discrete_sequence=['#f84351'])
fig_rc = px.bar(countrywise.sort_values('Recovered / 100 Cases'). tail(top), x = 'Recovered / 100 Cases',y = 'Country',
                text = 'Recovered / 100 Cases', orientation = 'h', color_discrete_sequence=['#a45398'])


fig_nc = px.bar(countrywise.sort_values('New Cases'). tail(top), x = 'New Cases',y = 'Country',
                text = 'New Cases', orientation = 'h', color_discrete_sequence=['#f84351'])
temp = countrywise[countrywise['Population']>1000000]
fig_p = px.bar(countrywise.sort_values('Cases / Million People'). tail(top), x = 'Cases / Million People',y = 'Country',
                text = 'Cases / Million People', orientation = 'h', color_discrete_sequence=['#b40398'])

fig_wc = px.bar(countrywise.sort_values('1 week change'). tail(top), x = '1 week change',y = 'Country',
                text = '1 week change', orientation = 'h', color_discrete_sequence=['#c04041'])
temp = countrywise[countrywise['Confirmed']>100]
fig_wi = px.bar(countrywise.sort_values('1 week % increase'). tail(top), x = '1 week % increase',y = 'Country',
                text = '1 week % increase', orientation = 'h', color_discrete_sequence=['#b00398'])






fig = make_subplots(rows = 5 , cols = 2, shared_xaxes = False, horizontal_spacing=0.2,
                    vertical_spacing =.05,
                    subplot_titles = ('Confirmed Cases', 'Deaths Reported', 'Active Cases','Recovered Cases',
                                      'Deaths / 100 Cases','Recovered / 100 Cases','New Cases','Cases / Million People',
                                       '1 week change', '1 week % increase'))

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.add_trace(fig_a['data'][0], row = 2, col = 1)
fig.add_trace(fig_r['data'][0], row = 2, col = 2)

fig.add_trace(fig_dc['data'][0], row = 3, col = 1)
fig.add_trace(fig_rc['data'][0], row = 3, col = 2)

fig.add_trace(fig_nc['data'][0], row = 4, col = 1)
fig.add_trace(fig_p['data'][0], row = 4, col = 2)

fig.add_trace(fig_wc['data'][0], row = 5, col = 1)
fig.add_trace(fig_wi['data'][0], row = 5, col = 2)





fig.update_layout(height = 4000)
fig.show()

#Since the number of recoveries as of August 5 is not included in the data set, it appears as 0 as of August 5, 2021, which has improved in the graphics and tables.

# Save Static Plots

In [None]:
# install
# conda install -c plotly plotly-orca=1.2.1 psutil requests

In [None]:
if not os.path.exists('images'):
    os.mkdir('images')

In [None]:
fig.write_image('images/fig.png')

In [None]:
fig.write_image('images/fig.jpeg')

In [None]:
fig.write_image('images/fig.pdf')

# Scatter Plot for Deaths vs Confirmed Cases

In [None]:
countrywise.sort_values('Deaths', ascending=False).iloc[:15, :]

#Since the number of recoveries as of August 5 is not included in the data set, it appears as 0 as of August 5, 2021, which has improved in the graphics and tables.

In [None]:
top = 20
fig = px.scatter(countrywise.sort_values('Deaths', ascending=False).head(top),
                 x ='Confirmed', y='Deaths', color = 'Country', size = 'Confirmed', height = 700,
                 text = 'Country', log_x= True, log_y = True, title = 'Deaths vs Confirmed Cases (Cases are on log10 scale)')
fig.update_traces(textposition = 'top center')
fig.update_layout(showlegend = True)
fig.update_layout(xaxis_rangeslider_visible = True)

In [None]:
countrywise.sort_values('Deaths', ascending=False).head(15)

# Confirmed, Deaths, New Cases vs Country and Date

# Bar Plot

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'Confirmed', color = 'Country', height = 600,
            title = 'Confirmed', color_discrete_sequence=px.colors.cyclical.mygbm)

#fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'Deaths', color = 'Country', height = 600,
            title = 'Deaths', color_discrete_sequence=px.colors.cyclical.mygbm)

#fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'Recovered', color = 'Country', height = 600,
            title = 'Recovered', color_discrete_sequence=px.colors.cyclical.mygbm)

#fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'New Cases', color = 'Country', height = 600,
            title = 'New Cases', color_discrete_sequence=px.colors.cyclical.mygbm)

#fig.show()

# Line Plot

In [None]:
fig = px.line(country_daywise, x = 'Date', y = 'Confirmed', color = 'Country', height = 600,
            title = 'Confirmed', color_discrete_sequence=px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.line(country_daywise, x = 'Date', y = 'Deaths', color = 'Country', height = 600,
            title = 'Deaths', color_discrete_sequence=px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.line(country_daywise, x = 'Date', y = 'Recovered', color = 'Country', height = 600,
            title = 'Recovered', color_discrete_sequence=px.colors.cyclical.mygbm)
fig.show()

#Since the number of recoveries as of August 5 is not included in the data set, it appears as 0 as of August 5, 2021, which has improved in the graphics and tables.

# Growth Rate after 100 Cases

In [None]:
df.head()

In [None]:
gt_100 = country_daywise[country_daywise['Confirmed']>100]['Country'].unique()
temp = df[df['Country'].isin(gt_100)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>100]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']


from_100th_case = pd.merge(temp,min_date, on = 'Country')
from_100th_case['N days'] = (from_100th_case['Date'] - from_100th_case['Min Date']).dt.days
from_100th_case

In [None]:
df['Country'].isin(gt_100)

In [None]:
gt_100 = country_daywise[country_daywise['Confirmed']>100]['Country'].unique()
temp = df[df['Country'].isin(gt_100)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>100]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']


from_100th_case = pd.merge(temp,min_date, on = 'Country')
from_100th_case['N days'] = (from_100th_case['Date'] - from_100th_case['Min Date']).dt.days

fig = px.line(from_100th_case, x = 'N days', y = 'Confirmed', color = 'Country', title = 'N days from 100 case', height = 600)
fig.show()

# Growth Rate after 1000 Cases

In [None]:
gt_1000 = country_daywise[country_daywise['Confirmed']>1000]['Country'].unique()
temp = df[df['Country'].isin(gt_1000)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>1000]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']


from_1000th_case = pd.merge(temp,min_date, on = 'Country')
from_1000th_case['N days'] = (from_1000th_case['Date'] - from_1000th_case['Min Date']).dt.days

fig = px.line(from_1000th_case, x = 'N days', y = 'Confirmed', color = 'Country', title = 'N days from 1000 case', height = 600)
fig.show()

# Growth Rate after 10000 Cases

In [None]:
gt_10000 = country_daywise[country_daywise['Confirmed']>10000]['Country'].unique()
temp = df[df['Country'].isin(gt_10000)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>10000]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']


from_10000th_case = pd.merge(temp,min_date, on = 'Country')
from_10000th_case['N days'] = (from_10000th_case['Date'] - from_10000th_case['Min Date']).dt.days

fig = px.line(from_10000th_case, x = 'N days', y = 'Confirmed', color = 'Country', title = 'N days from 10000 case', height = 600)
fig.show()

# Growth Rate after 100k Cases

In [None]:
gt_100000 = country_daywise[country_daywise['Confirmed']>100000]['Country'].unique()
temp = df[df['Country'].isin(gt_100000)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>100000]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']


from_100000th_case = pd.merge(temp,min_date, on = 'Country')
from_100000th_case['N days'] = (from_100000th_case['Date'] - from_100000th_case['Min Date']).dt.days

fig = px.line(from_100000th_case, x = 'N days', y = 'Confirmed', color = 'Country', title = 'N days from 100000 case', height = 600)
fig.show()

# Tree Map Analysis

# Confirmed Cases

In [None]:
full_latest = df[df['Date'] == max (df['Date'])]


fig = px.treemap(full_latest.sort_values(by = 'Confirmed', ascending = False).reset_index(drop = True),
                 path = ['Country', 'Province/State'], values = 'Confirmed', height = 700,
                 title = 'Number of Confirmed Cases',
                  color_discrete_sequence = px.colors.qualitative.Dark2)

fig.data[0].textinfo = 'label+text+value'
fig.show()

# Deaths Cases

In [None]:
full_latest = df[df['Date'] == max(df['Date'])]

fig = px.treemap(full_latest.sort_values(by = 'Deaths', ascending = False).reset_index(drop = True),
                 path = ['Country', 'Province/State'], values = 'Deaths', height = 700,
                 title = 'Number of Deaths Cases',
                  color_discrete_sequence = px.colors.qualitative.Dark2)

fig.data[0].textinfo = 'label+text+value'
fig.show()

# First and Last Case Report Time

In [None]:
first_date = df[df['Confirmed']>0]
first_date = first_date.groupby('Country')['Date'].agg(['min']).reset_index()

last_date = df.groupby(['Country', 'Date'])['Confirmed', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index()
last_date

#Since the number of recoveries as of August 5 is not included in the data set, it appears as 0 as of August 5, 2021, which has improved in the graphics and tables.

In [None]:
first_date = df[df['Confirmed']>0]
first_date = first_date.groupby('Country')['Date'].agg(['min']).reset_index()

last_date = df.groupby(['Country', 'Date'])['Confirmed', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index()

mask = (last_date['Country'] != last_date['Country'].shift(1))

last_date.loc[mask, 'Confirmed'] = np.nan
last_date.loc[mask, 'Deaths'] = np.nan
last_date.loc[mask, 'Recovered'] = np.nan

last_date = last_date[last_date['Confirmed']> 0]
last_date = last_date.groupby('Country')['Date'].agg(['max']).reset_index()

first_last = pd.concat([first_date, last_date['max']], axis = 1)
first_last['max'] = first_last['max'] + timedelta(days=1)

first_last['Days'] = first_last['max'] - first_last['min']
first_last['Task'] = first_last['Country']

first_last.columns = ['Country', 'Start', 'Finish', 'Days', 'Task']

first_last = first_last.sort_values('Days')


colors = ['#' + ''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(first_last))]

fig = ff.create_gantt(first_last, index_col = 'Country', colors = colors, show_colorbar = False,
                      bar_width=0.2, showgrid_x = True, showgrid_y = True, height = 2500)

fig.show()

# Confirmed Cases Country and Day wise

In [None]:
country_daywise.head()

In [None]:
temp = country_daywise.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Country'].isin(gt_10000)]

countries = temp['Country'].unique()

ncols = 3
nrows = math.ceil(len(countries)/ncols)

fig = make_subplots(rows=nrows, cols = ncols, shared_xaxes = False, subplot_titles=countries)


for ind, country in enumerate(countries):
    row = int((ind/ncols)+1)
    col = int((ind%ncols)+1)
    fig.add_trace(go.Bar(x = temp['Date'], y = temp.loc[temp['Country']==country, 'Confirmed'], name= country), row = row, col = col)
    
fig.update_layout(height=4000, title_text = 'Confirmed Cases in Each Country')
fig.update_layout(showlegend = False)
fig.show()


# Covid-19 vs Other Similar Epidemics

In [None]:
full_latest

In [None]:
# Wikipedia Source

epidemics = pd.DataFrame({
    
    'epidemic' : ['COVID-19', 'SARS', 'EBOLA',  'MERS', 'H1N1'],
    'start_year' : [2019,2002,2013,2012,2009],
    'end_year' : [2020, 2004,2016,2020,2010],
    'Confirmed' : [full_latest['Confirmed'].sum(),8422,28646,2519,6724149],
    'Deaths' : [full_latest['Deaths'].sum(),813,1323,866,19654]
})

epidemics['mortality'] = round((epidemics['Deaths']/epidemics['Confirmed'])*100,2)

epidemics.head()

In [None]:
temp = epidemics.melt(id_vars = 'epidemic', value_vars=['Confirmed', 'Deaths','mortality'],
                     var_name = 'Case', value_name = 'Value')

fig = px.bar(temp, x = 'epidemic', y= 'Value', color = 'epidemic', text = 'Value', facet_col = 'Case',
            color_discrete_sequence=px.colors.qualitative.Bold)


fig.update_traces(textposition = 'outside')
fig.update_layout(uniformtext_minsize = 8, uniformtext_mode = 'hide')
fig.update_yaxes(showticklabels = False)
fig.layout.yaxis2.update(matches = None)
fig.layout.yaxis3.update(matches = None)
fig.show()