# Libraries

In [None]:
pip install geopandas



In [None]:
# utility libraries
from datetime import timedelta
import math

# storing and anaysis
import numpy as np
import pandas as pd

# visualization
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import plotly.express as px

In [None]:
# network analysis and visualization
import networkx as nx

# to embed HTML objects

# dealing with geographic data
import geopandas as gpd
from geopandas.tools import geocode

# implicitly registered datetime converter for a matplotlib plotting method
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [None]:
# offline plotly
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

### COVID-19 Dataset

In [None]:
# covid_19 dataset
covid_19 = pd.read_csv('/content/country_wise_latest.csv', 
                       parse_dates=['Date'])

# selecting important columns only
covid_19 = covid_19[['Date', 'Country/Region', 'Confirmed', 'Deaths', 'Recovered']]

# renaming columns
covid_19.columns = ['Date', 'Country', 'Cases', 'Deaths', 'Recovered']

# group by date and country
covid_19 = covid_19.groupby(['Date', 'Country'])['Cases', 'Deaths', 'Recovered']
covid_19 = covid_19.sum().reset_index()

#covid_19.head()


Columns (0,17) have mixed types.Specify dtype option on import or set low_memory=False.


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [None]:
# latest
c_lat = covid_19[covid_19['Date'] == max(covid_19['Date'])].reset_index()

# latest grouped by country
c_lat_grp = c_lat.groupby('Country')['Cases', 'Deaths', 'Recovered'].sum().reset_index()

# nth day
covid_19['nth_day'] = (covid_19['Date'] - min(covid_19['Date'])).dt.days


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [None]:
# day by day
c_dbd = covid_19.groupby('Date')['Cases', 'Deaths', 'Recovered'].sum().reset_index()

# nth day
c_dbd['nth_day'] = covid_19.groupby('Date')['nth_day'].max().values

# no. of countries
temp = covid_19[covid_19['Cases']>0]
c_dbd['n_countries'] = temp.groupby('Date')['Country'].apply(len).values

c_dbd['new_cases'] = c_dbd['Cases'].diff()
c_dbd['new_deaths'] = c_dbd['Deaths'].diff()
c_dbd['epidemic'] = 'COVID-19'


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



### ZikaDataset

In [None]:
# ZIKA dataset
Zika = pd.read_csv("/content/zika_2014_2016_clean.csv", 
                       parse_dates=['Date'])

# selecting important columns only
Zika = Zika[['Date', 'Country', 
                     'Cumulative no. of confirmed, probable and suspected cases',
                     'Cumulative no. of confirmed, probable and suspected deaths']]

# renaming columns
Zika.columns = ['Date', 'Country', 'Cases', 'Deaths']
Zika.head()

# group by date and country
Zika = Zika.groupby(['Date', 'Country'])['Cases', 'Deaths']
Zika = Zika.sum().reset_index()

# filling missing values
Zika['Cases'] = Zika['Cases'].fillna(0)
Zika['Deaths'] = Zika['Deaths'].fillna(0)

# converting datatypes
Zika['Cases'] = Zika['Cases'].astype('int')
Zika['Deaths'] = Zika['Deaths'].astype('int')

Zika.head()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,Date,Country,Cases,Deaths
0,2014-08-29,Guinea,648,430
1,2014-08-29,Liberia,1378,694
2,2014-08-29,Nigeria,19,7
3,2014-08-29,Sierra Leone,1026,422
4,2014-09-05,Guinea,812,517


In [None]:
# latest
e_lat = Zika[Zika['Date'] == max(Zika['Date'])].reset_index()

# latest grouped by country
e_lat_grp = e_lat.groupby('Country')['Cases', 'Deaths'].sum().reset_index()

# nth day
Zika['nth_day'] = (Zika['Date'] - min(Zika['Date'])).dt.days


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [None]:
# day by day
e_dbd = Zika.groupby('Date')['Cases', 'Deaths'].sum().reset_index()

# nth day
e_dbd['nth_day'] = Zika.groupby('Date')['nth_day'].max().values

# no. of countries
temp = Zika[Zika['Cases']>0]
e_dbd['n_countries'] = temp.groupby('Date')['Country'].apply(len).values

e_dbd['new_cases'] = e_dbd['Cases'].diff()
e_dbd['new_deaths'] = e_dbd['Deaths'].diff()
e_dbd['epidemic'] = 'ZIKA'


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



### Combined data

In [None]:
# name of the epidemic
epidemic = ['COVID-19', 'MERS', 'SARS', 'Zika', 'H1N1']
# no. of confirmed cases
confirmed = [c_lat['Cases'].sum(), '2521', '8098', '28646', '491382']
# no. of deaths reported
deaths = [c_lat['Deaths'].sum(), '866', '774', '11323', '18449']
# no. of recovered cases
recovered = [c_lat['Recovered'].sum(), '', '', '', '']

In [None]:
# no. of countries or regions to which epidemic spread
no_of_countries_or_region = ['216', '28', '29', '10', '214']
# first reported place
first_reported_place = ['Wuhan, China', 'Jeddah, Saudi Arabia', 'Foshan City, Guangdong, China', 'Meliandou, Guinea', 'Veracruz, Mexico']
# first reported country
first_reported_country = ['China', 'Saudi Arabia', 'China', 'Guinea', 'North America']

In [None]:
# first reported year
first_reported_year = ['2019', '2012', '2002', '2014', '2009']
# first reported data
first_reported_date = ['8 December, 2019', 'June 13, 2012', 'November 16, 2002', 'March 23, 2014', 'April 18, 2009']
# epidemic span
span = ['2019-2020', '2012-Present', '2002-2004', '2014-2016', '2009-2010']

# day on which no. of cases reached 100 
date_100th_case = ['', 'July 22, 2013', '', '31 March, 2014', '28, April 2009']
# day on which no. of cases reached 1000
date_1000th_case = ['25 January, 2020', '3 December, 2014', '26 March, 2003', '23 July, 2014', '4 May, 2009 ']
# day on which no. of cases reached 10000
date_10000th_case = ['1 February 2020', '', '', '19 October, 2014', '20th May, 2009']
# day on which no. of cases reached 100000
date_100000th_case = ['6 March, 2020', '', '', '', '']

In [None]:
# estimated no. of people infected by a individual with diseases
ro = ['2-2.5', '0.3-0.8', '2-4', '1.6-2', '1.2-1.6']

# cases fatality rate
cfr = ['1.38-3.4', '34.4-43', '9.6-11', '49-53', '0.05-0.1']


In [None]:
# create a dataframe
df = pd.DataFrame({'Epidemic' : epidemic, 
                   'Confirmed': confirmed, 
                   'Deaths': deaths, 
                   'Recovered': recovered, 
                   
                   'No. of Countries': no_of_countries_or_region, 
                   'First Reported Place': first_reported_place, 
                   'First Reported Country': first_reported_country, 
                   
                   'First Reported Year': first_reported_year, 
                   'First Reported Date': first_reported_date, 
                   '100th Case On': date_100th_case, 
                   '1000th Case On': date_1000th_case, 
                   '10000th Case On': date_10000th_case, 
                   '100000th Case On': date_100000th_case, 
                   'Ro': ro, 
                   'CFR': cfr, 
                   })
    

In [None]:
# fix numerical values
for col in ['Confirmed', 'Deaths', 'Recovered', 'No. of Countries', 
            'First Reported Year']:
    df[col] = df[col].replace('', 0)
    df[col] = df[col].astype('int')
    
    
# fix datetime values
for col in ['First Reported Date', '100th Case On', '1000th Case On',
            '10000th Case On', '100000th Case On']:
    # df[col] = df[col].replace('', 0)
    df[col] = pd.to_datetime(df[col])
    
    
    
# dataframe
df

Unnamed: 0,Epidemic,Confirmed,Deaths,Recovered,No. of Countries,First Reported Place,First Reported Country,First Reported Year,First Reported Date,100th Case On,1000th Case On,10000th Case On,100000th Case On,Ro,CFR
0,COVID-19,16480485,654036,9468087,216,"Wuhan, China",China,2019,2019-12-08,NaT,2020-01-25,2020-02-01,2020-03-06,2-2.5,1.38-3.4
1,MERS,2521,866,0,28,"Jeddah, Saudi Arabia",Saudi Arabia,2012,2012-06-13,2013-07-22,2014-12-03,NaT,NaT,0.3-0.8,34.4-43
2,SARS,8098,774,0,29,"Foshan City, Guangdong, China",China,2002,2002-11-16,NaT,2003-03-26,NaT,NaT,2-4,9.6-11
3,Zika,28646,11323,0,10,"Meliandou, Guinea",Guinea,2014,2014-03-23,2014-03-31,2014-07-23,2014-10-19,NaT,1.6-2,49-53
4,H1N1,491382,18449,0,214,"Veracruz, Mexico",North America,2009,2009-04-18,2009-04-28,2009-05-04,2009-05-20,NaT,1.2-1.6,0.05-0.1
