# importing libraries

In [223]:
# essential libraries
import json
import random
from urllib.request import urlopen

# storing and analysis

import numpy as np
import pandas as pd

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import calmap
import folium

# color pallette
cnf = '#393e46' # confirmed - grey
dth = '#ff2e63' # death - red
rec = '#21bf73' # recovered - cyan
act = '#fe9801' # active case - yellow

# converter
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

# hide warnings
import warnings
warnings.filterwarnings('ignore')

# html embedding
from IPython.display import Javascript
from IPython.core.display import display
from IPython.core.display import HTML

# importing dataset

In [224]:
full_table=pd.read_csv('covid_19_clean_complete.csv',parse_dates=['Date'])
full_table.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Thailand,15.0,101.0,2020-01-22,2,0,0
1,,Japan,36.0,138.0,2020-01-22,2,0,0
2,,Singapore,1.2833,103.8333,2020-01-22,0,0,0
3,,Nepal,28.1667,84.25,2020-01-22,0,0,0
4,,Malaysia,2.5,112.5,2020-01-22,0,0,0


In [225]:
full_table.shape

(11960, 8)

In [226]:
# checking any null values
print(full_table.info())
print(full_table.isna().any())
print(full_table.isna().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11960 entries, 0 to 11959
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Province/State  5876 non-null   object        
 1   Country/Region  11960 non-null  object        
 2   Lat             11960 non-null  float64       
 3   Long            11960 non-null  float64       
 4   Date            11960 non-null  datetime64[ns]
 5   Confirmed       11960 non-null  int64         
 6   Deaths          11960 non-null  int64         
 7   Recovered       11960 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(3), object(2)
memory usage: 747.6+ KB
None
Province/State     True
Country/Region    False
Lat               False
Long              False
Date              False
Confirmed         False
Deaths            False
Recovered         False
dtype: bool
Province/State    6084
Country/Region       0
Lat                  0
Long       

In [227]:
cases=['Confirmed','Deaths','Recovered','Active']

In [228]:
# active case=confirmed-death-recovered
full_table['Active']=full_table['Confirmed']-full_table['Deaths']\
                    -full_table['Recovered']
full_table.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
0,,Thailand,15.0,101.0,2020-01-22,2,0,0,2
1,,Japan,36.0,138.0,2020-01-22,2,0,0,2
2,,Singapore,1.2833,103.8333,2020-01-22,0,0,0,0
3,,Nepal,28.1667,84.25,2020-01-22,0,0,0,0
4,,Malaysia,2.5,112.5,2020-01-22,0,0,0,0


In [229]:
full_table['Country/Region']=full_table['Country/Region']\
            .replace('Mainland China','China')

In [230]:
# list of all countries
full_table['Country/Region'].unique()

array(['Thailand', 'Japan', 'Singapore', 'Nepal', 'Malaysia', 'Canada',
       'Australia', 'Cambodia', 'Sri Lanka', 'Germany', 'Finland',
       'United Arab Emirates', 'Philippines', 'India', 'Italy', 'Sweden',
       'Spain', 'Belgium', 'Egypt', 'Lebanon', 'Iraq', 'Oman',
       'Afghanistan', 'Bahrain', 'Kuwait', 'Algeria', 'Croatia',
       'Switzerland', 'Austria', 'Israel', 'Pakistan', 'Brazil',
       'Georgia', 'Greece', 'North Macedonia', 'Norway', 'Romania',
       'Estonia', 'Netherlands', 'San Marino', 'Belarus', 'Iceland',
       'Lithuania', 'Mexico', 'New Zealand', 'Nigeria', 'Ireland',
       'Luxembourg', 'Monaco', 'Qatar', 'Ecuador', 'Azerbaijan',
       'Armenia', 'Dominican Republic', 'Indonesia', 'Portugal',
       'Andorra', 'Latvia', 'Morocco', 'Saudi Arabia', 'Senegal',
       'Argentina', 'Chile', 'Jordan', 'Ukraine', 'Hungary',
       'Liechtenstein', 'Poland', 'Tunisia', 'Bosnia and Herzegovina',
       'Slovenia', 'South Africa', 'Bhutan', 'Cameroon', 'Colo

In [231]:
# handling missing values
full_table[['Province/State']]=full_table[['Province/State']]\
.fillna('')
full_table[cases]=full_table[cases].fillna(0)


In [232]:
full_table.isnull().any()

Province/State    False
Country/Region    False
Lat               False
Long              False
Date              False
Confirmed         False
Deaths            False
Recovered         False
Active            False
dtype: bool

In [233]:
# cases in the ships
ship=full_table[full_table['Province/State'].str.contains('Grand Princess')|full_table['Country/Region'].str.contains('Cruise Ship')]
ship

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
105,Grand Princess,US,37.6489,-122.6655,2020-01-22,0,0,0,0
167,Diamond Princess,Cruise Ship,35.4437,139.6380,2020-01-22,0,0,0,0
226,Grand Princess,Canada,37.6489,-122.6655,2020-01-22,0,0,0,0
335,Grand Princess,US,37.6489,-122.6655,2020-01-23,0,0,0,0
397,Diamond Princess,Cruise Ship,35.4437,139.6380,2020-01-23,0,0,0,0
...,...,...,...,...,...,...,...,...,...
11667,Diamond Princess,Cruise Ship,35.4437,139.6380,2020-03-12,696,7,325,364
11726,Grand Princess,Canada,37.6489,-122.6655,2020-03-12,0,0,0,0
11835,Grand Princess,US,37.6489,-122.6655,2020-03-13,21,0,0,21
11897,Diamond Princess,Cruise Ship,35.4437,139.6380,2020-03-13,696,7,325,364


In [234]:
# china and the row
china=full_table[full_table['Country/Region']=='China']
row=full_table[full_table['Country/Region']!='China']

In [235]:
# latest cases
full_latest=full_table[full_table['Date']==max(full_table['Date'])].reset_index()
china_latest=full_latest[full_latest['Country/Region']=='China']
row_latest=full_latest[full_latest['Country/Region']!='China']
full_latest.head()

Unnamed: 0,index,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
0,11730,,Thailand,15.0,101.0,2020-03-13,75,1,35,39
1,11731,,Japan,36.0,138.0,2020-03-13,701,19,118,564
2,11732,,Singapore,1.2833,103.8333,2020-03-13,200,0,97,103
3,11733,,Nepal,28.1667,84.25,2020-03-13,1,0,1,0
4,11734,,Malaysia,2.5,112.5,2020-03-13,197,0,26,171


In [236]:
# latest condensed
full_latest_grouped=full_latest.groupby('Country/Region')[cases].sum().reset_index()
china_latest_grouped=china_latest.groupby('Province/State')[cases].sum().reset_index()
row_latest_grouped=row_latest.groupby('Country/Region')[cases].sum().reset_index()
row_latest_grouped.head(),full_latest_grouped.head(),china_latest_grouped.head()

(        Country/Region  Confirmed  Deaths  Recovered  Active
 0          Afghanistan          7       0          0       7
 1              Albania         33       1          0      32
 2              Algeria         26       2          8      16
 3              Andorra          1       0          0       1
 4  Antigua and Barbuda          1       0          0       1,
         Country/Region  Confirmed  Deaths  Recovered  Active
 0          Afghanistan          7       0          0       7
 1              Albania         33       1          0      32
 2              Algeria         26       2          8      16
 3              Andorra          1       0          0       1
 4  Antigua and Barbuda          1       0          0       1,
   Province/State  Confirmed  Deaths  Recovered  Active
 0          Anhui        990       6        984       0
 1        Beijing        436       8        342      86
 2      Chongqing        576       6        566       4
 3         Fujian        296  

In [237]:
# Latest Complete Data
temp=full_table.groupby(['Country/Region','Province/State'])[cases].max()
temp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Deaths,Recovered,Active
Country/Region,Province/State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,,7,0,0,7
Albania,,33,1,0,32
Algeria,,26,2,8,20
Andorra,,1,0,1,1
Antigua and Barbuda,,1,0,0,1


In [238]:
# latest condensed data
temp=full_table.groupby('Date')[cases].sum().reset_index()
temp=temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1')

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active
0,2020-03-13 00:00:00,145193,5404,70251,69538


In [341]:
tm=temp.melt(id_vars='Date',value_vars=['Active','Deaths','Recovered'])
fig=px.treemap(tm,path=['variable'],values='value',height=400,width=600,
              color_discrete_sequence=[rec,act,dth])
fig.show()

In [240]:
# country wise data In each country
temp_f=full_latest_grouped.sort_values(by='Confirmed',ascending=False)
temp_f=temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,China,80945,3180,64196,13569
1,Italy,17660,1266,1439,14955
2,Iran,11364,514,2959,7891
3,"Korea, South",7979,66,510,7403
4,Spain,5232,133,193,4906
5,Germany,3675,7,46,3622
6,France,3667,79,12,3576
7,US,2179,47,12,2120
8,Switzerland,1139,11,4,1124
9,Norway,996,0,1,995


In [241]:
# Countries with death Reported
temp_flg=temp_f[temp_f['Deaths']>0][['Country/Region','Deaths']]
temp_flg.sort_values('Deaths',ascending=False).reset_index(drop=True)
temp_flg.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Deaths
0,China,3180
1,Italy,1266
2,Iran,514
3,"Korea, South",66
4,Spain,133
5,Germany,7
6,France,79
7,US,47
8,Switzerland,11
10,Sweden,1


In [242]:
# countries where no1 is recovered
temp_norecoved=temp_f[temp_f['Recovered']==0][['Country/Region','Confirmed','Deaths','Recovered']]
temp_norecoved.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered
12,Netherlands,804,10,0
18,Qatar,320,0,0
23,Greece,190,1,0
27,Brazil,151,0,0
28,Slovenia,141,0,0
29,Czechia,141,0,0
33,Ireland,90,1,0
38,San Marino,80,5,0
41,Estonia,79,0,0
45,Poland,68,2,0


In [243]:
# countries with all cases died
temp=row_latest_grouped[row_latest_grouped['Confirmed']==row_latest_grouped['Deaths']]
temp=temp[['Country/Region','Confirmed','Deaths']]
temp=temp.sort_values('Confirmed',ascending=True).reset_index(drop=True)
temp.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths
0,Guyana,1,1
1,Sudan,1,1


In [244]:
# countries with all cases recovered
temp=row_latest_grouped[row_latest_grouped['Confirmed']==row_latest_grouped["Recovered"]]
temp=temp[['Country/Region','Confirmed','Recovered']]
temp.sort_values('Confirmed',ascending=False).reset_index(drop=True)
temp.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Recovered
64,Jordan,1,1
83,Nepal,1,1


Countries with no affected case anymore

In [245]:
temp=row_latest_grouped[row_latest_grouped['Confirmed']==row_latest_grouped["Recovered"]+row_latest_grouped["Deaths"]]
temp=temp[['Country/Region','Confirmed','Deaths','Recovered']]
temp.sort_values('Confirmed',ascending=False).reset_index(drop=True)
temp.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered
50,Guyana,1,1,0
64,Jordan,1,0,1
83,Nepal,1,0,1
111,Sudan,1,1,0


# Countries with no affected case anymore

In [246]:
temp.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered
50,Guyana,1,1,0
64,Jordan,1,0,1
83,Nepal,1,0,1
111,Sudan,1,1,0


In [247]:
# Chinese province wise data
temp_f=china_latest_grouped[['Province/State','Confirmed','Deaths','Recovered']]
temp_f=temp_f.sort_values('Confirmed',ascending=False).reset_index(drop=True)
temp_f.style.background_gradient(cmap='Reds')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Hubei,67786,3062,51553
1,Guangdong,1356,8,1296
2,Henan,1273,22,1249
3,Zhejiang,1215,1,1197
4,Hunan,1018,4,1005
5,Anhui,990,6,984
6,Jiangxi,935,1,934
7,Shandong,760,7,739
8,Jiangsu,631,0,630
9,Chongqing,576,6,566


In [248]:
# Provinces with no cases recovered
temp_f[temp_f["Recovered"]==0]


Unnamed: 0,Province/State,Confirmed,Deaths,Recovered


In [249]:
# province with all cases died
temp_f=temp_f[temp_f["Confirmed"]==temp_f['Deaths']]
temp_f

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered


In [252]:
temp_f=china_latest_grouped[china_latest_grouped["Confirmed"]==china_latest_grouped['Recovered']]
temp_f=temp_f.sort_values("Confirmed",ascending=False)
temp_f.style.background_gradient(cmap='Blues')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered,Active
26,Shanxi,133,0,133,0
22,Qinghai,18,0,18,0
20,Macau,10,0,10,0
29,Tibet,1,0,1,0


In [255]:
temp_f=china_latest_grouped[china_latest_grouped["Confirmed"]==china_latest_grouped['Recovered']+china_latest_grouped["Deaths"]]
temp_f=temp_f.sort_values("Confirmed",ascending=False)
temp_f.style.background_gradient(cmap='Blues')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered,Active
0,Anhui,990,6,984,0
17,Jiangxi,935,1,934,0
3,Fujian,296,1,295,0
26,Shanxi,133,0,133,0
30,Xinjiang,76,3,73,0
22,Qinghai,18,0,18,0
20,Macau,10,0,10,0
29,Tibet,1,0,1,0


In [277]:
m = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=10, zoom_start=1)

for i in range(0, len(full_latest)):
    folium.Circle(
        location=[full_latest.iloc[i]['Lat'], full_latest.iloc[i]['Long']],
        color='gray', 
#         tooltip =   '<li><bold>Country : '+str(full_latest.iloc[i]['Country/Region'])+
#                     '<li><bold>Province : '+str(full_latest.iloc[i]['Province/State'])+
#                     '<li><bold>Confirmed : '+str(full_latest.iloc[i]['Confirmed'])+
#                     '<li><bold>Deaths : '+str(full_latest.iloc[i]['Deaths'])+
#                     '<li><bold>Recovered : '+str(full_latest.iloc[i]['Recovered']),
        radius=int(full_latest.iloc[i]['Confirmed'])**1.1).add_to(m)
m

In [258]:
full_latest

Unnamed: 0,index,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
0,11730,,Thailand,15.0000,101.0000,2020-03-13,75,1,35,39
1,11731,,Japan,36.0000,138.0000,2020-03-13,701,19,118,564
2,11732,,Singapore,1.2833,103.8333,2020-03-13,200,0,97,103
3,11733,,Nepal,28.1667,84.2500,2020-03-13,1,0,1,0
4,11734,,Malaysia,2.5000,112.5000,2020-03-13,197,0,26,171
...,...,...,...,...,...,...,...,...,...,...
225,11955,,Aruba,12.5211,-69.9683,2020-03-13,2,0,0,2
226,11956,Grand Princess,Canada,37.6489,-122.6655,2020-03-13,2,0,0,2
227,11957,,Kenya,-0.0236,37.9062,2020-03-13,1,0,0,1
228,11958,,Antigua and Barbuda,17.0608,-61.7964,2020-03-13,1,0,0,1


In [279]:
fig = px.choropleth(full_latest_grouped, locations="Country/Region", 
                    locationmode='country names', color="Confirmed", 
                    hover_name="Country/Region", range_color=[1,7000], 
                    color_continuous_scale="aggrnyl", 
                    title='Countries with Confirmed Cases')
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [282]:
fig = px.choropleth(full_latest_grouped[full_latest_grouped['Deaths']>0],
                    locations="Country/Region", 
                    locationmode='country names', color="Deaths", 
                    hover_name="Country/Region", range_color=[1,7000], 
                    color_continuous_scale="aggrnyl", 
                    title='Countries with Death Cases')
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [283]:
formated_gdf = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['Confirmed'].pow(0.3)

fig = px.scatter_geo(formated_gdf, locations="Country/Region", locationmode='country names', 
                     color="Confirmed", size='size', hover_name="Country/Region", 
                     range_color= [0, max(formated_gdf['Confirmed'])+2], 
                     projection="natural earth", animation_frame="Date", 
                     title='Spread over time')
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [307]:
a=full_latest[full_latest['Province/State'].str.contains('Grand Princess')|full_latest['Province/State'].str.contains('Diamond Princess')
           |full_latest['Province/State'].str.contains('Grand Princess')]
a=a[["Province/State","Confirmed","Deaths","Recovered"]]
a
# a.groupby("Province/State").sum().reset_index()

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
22,From Diamond Princess,0,0,0
104,Diamond Princess,46,0,0
105,Grand Princess,21,0,0
167,Diamond Princess,696,7,325
226,Grand Princess,2,0,0


In [318]:
temp=ship[ship['Date']==max(ship['Date'])]
temp=temp[['Province/State', 'Confirmed', 'Deaths', 'Recovered']].reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1_r')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Grand Princess,21,0,0
1,Diamond Princess,696,7,325
2,Grand Princess,2,0,0


In [352]:
# Cases over time
temp=full_table.groupby('Date')['Recovered','Deaths','Active'].sum().reset_index()
temp=temp.melt(id_vars='Date',value_vars=['Recovered','Deaths','Active'],
              var_name='Case',value_name='Count')
temp.head()
fig=px.area(temp,x='Date',y='Count',color='Case',title='Cases Over Time',
           color_discrete_sequence=[rec,dth,act])
fig.show()

In [432]:
temp=full_table.groupby('Date').sum().reset_index()

# Adding two more columns
temp['No.of Deaths to 100 Confirmed Cases']=round(temp['Deaths']/temp['Confirmed'],3)*100
temp['No. of Recovered to 100 Confirmed Cases'] = round(temp['Recovered']/temp['Confirmed'], 3)*100
temp=temp.melt(id_vars='Date',value_vars=['No.of Deaths to 100 Confirmed Cases','No. of Recovered to 100 Confirmed Cases'],
     var_name='Ratio',value_name='Value')
fig=px.line(temp,x='Date',y='Value',color='Ratio',log_y=True,
           color_discrete_sequence=[dth,rec])
fig.show()

In [431]:
temp

Unnamed: 0,Date,Ratio,Value
0,2020-01-22,No.of Deaths to 100 Confirmed Cases,3.1
1,2020-01-23,No.of Deaths to 100 Confirmed Cases,2.8
2,2020-01-24,No.of Deaths to 100 Confirmed Cases,2.8
3,2020-01-25,No.of Deaths to 100 Confirmed Cases,2.9
4,2020-01-26,No.of Deaths to 100 Confirmed Cases,2.7
5,2020-01-27,No.of Deaths to 100 Confirmed Cases,2.8
6,2020-01-28,No.of Deaths to 100 Confirmed Cases,2.4
7,2020-01-29,No.of Deaths to 100 Confirmed Cases,2.2
8,2020-01-30,No.of Deaths to 100 Confirmed Cases,2.1
9,2020-01-31,No.of Deaths to 100 Confirmed Cases,2.1
