# Load data from Johns Hopkins

In [10]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.core.display import display, HTML

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import folium
import plotly.graph_objects as go
import seaborn as sns
import ipywidgets as widgets

### Data are updated once a day around 23:59 (UTC), except cases_country with hourly update.

In [11]:
confirmed_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
death_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
recovered_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
us_confirmed_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
us_death_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
country_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv"

In [12]:
confirmed_df = pd.read_csv(confirmed_url)
confirmed_df.sample(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
199,,Nigeria,9.082,8.6753,0,0,0,0,0,0,...,162178,162275,162388,162489,162593,162641,162762,162891,162997,163063
198,,Niger,17.607789,8.081666,0,0,0,0,0,0,...,4939,4964,4972,4972,4987,4987,5001,5001,5021,5021
153,,Jamaica,18.1096,-77.2975,0,0,0,0,0,0,...,36670,37128,37458,37747,38227,38514,38848,39237,39543,39967
229,,Solomon Islands,-9.6457,160.1562,0,0,0,0,0,0,...,18,18,18,18,18,18,18,19,19,19
148,,Iran,32.427908,53.688046,0,0,0,0,0,0,...,1823317,1830823,1838803,1846923,1855674,1864984,1875234,1885564,1897314,1908974


### Data collection starts at 22.Jan 2020, up to the current day

In [13]:
confirmed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 274 entries, 0 to 273
Columns: 441 entries, Province/State to 4/2/21
dtypes: float64(2), int64(437), object(2)
memory usage: 944.1+ KB


In [14]:
confirmed_df.describe()

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
count,273.0,273.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,...,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0
mean,20.534804,23.028143,2.032847,2.390511,3.434307,5.229927,7.729927,10.682482,20.357664,22.507299,...,455617.7,457994.4,460334.5,462465.4,464182.2,465857.2,467930.6,470423.4,473020.6,475315.7
std,25.194592,73.596166,26.879101,26.977077,33.585238,46.743494,65.324493,88.014971,215.981285,217.304706,...,2162368.0,2169969.0,2177760.0,2184772.0,2189957.0,2195954.0,2202630.0,2210443.0,2219010.0,2226723.0
min,-51.7963,-178.1165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.152149,-19.0208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1021.0,1021.0,1021.5,1025.25,1031.25,1043.75,1043.75,1043.75,1044.5,1061.25
50%,21.694,20.9394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14434.5,14434.5,14434.5,14434.5,14434.5,14434.5,14434.5,14947.0,14947.0,14947.0
75%,41.1129,84.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,191983.2,193694.8,195499.0,197358.0,198381.8,198943.2,199532.5,200056.8,200821.2,201418.5
max,71.7069,178.065,444.0,444.0,549.0,761.0,1058.0,1423.0,3554.0,3554.0,...,30012610.0,30080060.0,30157370.0,30219450.0,30263140.0,30332560.0,30393800.0,30460830.0,30539870.0,30609690.0


In [15]:
death_df = pd.read_csv(death_url)
death_df.sample(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
239,,Syria,34.802075,38.996815,0,0,0,0,0,0,...,1195,1206,1216,1227,1239,1247,1254,1265,1274,1288
113,,Estonia,58.5953,25.0136,0,0,0,0,0,0,...,823,836,847,860,868,879,896,902,908,922
69,Henan,China,37.8957,114.9042,0,0,0,0,1,1,...,22,22,22,22,22,22,22,22,22,22
123,New Caledonia,France,-20.904305,165.618042,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14,Victoria,Australia,-37.8136,144.9631,0,0,0,0,0,0,...,820,820,820,820,820,820,820,820,820,820


In [16]:
death_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 274 entries, 0 to 273
Columns: 441 entries, Province/State to 4/2/21
dtypes: float64(2), int64(437), object(2)
memory usage: 944.1+ KB


In [17]:
death_df.describe()

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
count,273.0,273.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,...,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0
mean,20.534804,23.028143,0.062044,0.065693,0.094891,0.153285,0.20438,0.29927,0.478102,0.485401,...,10014.437956,10056.361314,10100.843066,10136.572993,10160.164234,10189.182482,10231.375912,10276.127737,10319.416058,10356.60219
std,25.194592,73.596166,1.027008,1.028562,1.451957,2.41755,3.142847,4.592341,7.551622,7.552125,...,43234.07499,43406.150157,43584.724894,43731.233417,43812.597722,43906.987548,44077.357752,44258.129471,44434.05198,44581.538159
min,-51.7963,-178.1165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.152149,-19.0208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.0,7.0,7.25,7.25,7.25,7.25,7.25,8.0,8.0,8.25
50%,21.694,20.9394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,181.0,181.0,181.0,181.0,181.0,181.0,182.0,182.0,182.5,182.5
75%,41.1129,84.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2992.0,2992.0,3000.75,3003.0,3003.0,3003.0,3011.75,3011.75,3012.5,3012.5
max,71.7069,178.065,17.0,17.0,24.0,40.0,52.0,76.0,125.0,125.0,...,545492.0,547051.0,548172.0,548913.0,549420.0,550121.0,550996.0,552072.0,553136.0,554103.0


In [18]:
recovered_df = pd.read_csv(recovered_url)
recovered_df.sample(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
65,Qinghai,China,35.7452,95.9956,0,0,0,0,0,0,...,18,18,18,18,18,18,18,18,18,18
104,French Polynesia,France,-17.6797,149.4068,0,0,0,0,0,0,...,4842,4842,4842,4842,4842,4842,4842,4842,4842,4842
207,,Senegal,14.4974,-14.4524,0,0,0,0,0,0,...,35508,35811,36107,36322,36753,37040,37228,37321,37434,37532
240,British Virgin Islands,United Kingdom,18.4207,-64.64,0,0,0,0,0,0,...,131,131,131,131,131,131,131,131,131,131
85,,Czechia,49.8175,15.473,0,0,0,0,0,0,...,1289924,1295205,1299552,1299930,1315571,1342113,1354424,1366461,1371074,1374823


In [19]:
recovered_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259 entries, 0 to 258
Columns: 441 entries, Province/State to 4/2/21
dtypes: float64(2), int64(437), object(2)
memory usage: 892.5+ KB


In [20]:
recovered_df.describe()

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
count,259.0,259.0,259.0,259.0,259.0,259.0,259.0,259.0,259.0,259.0,...,259.0,259.0,259.0,259.0,259.0,259.0,259.0,259.0,259.0,259.0
mean,19.102739,28.495821,0.11583,0.123552,0.150579,0.162162,0.216216,0.250965,0.416988,0.490347,...,273307.6,274511.5,275919.2,277097.7,278367.1,279542.6,280821.5,282283.0,283597.0,284808.5
std,24.64088,70.936043,1.743788,1.747697,1.940118,2.005022,2.641273,2.839838,5.005023,5.50501,...,1071255.0,1075752.0,1080969.0,1085416.0,1089808.0,1093001.0,1098028.0,1104350.0,1109635.0,1113932.0
min,-51.7963,-178.1165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,4.715658,-8.826999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,854.0,856.5,856.5,864.5,864.5,864.5,864.5,865.5,865.5,866.0
50%,19.85627,24.6032,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9002.0,9330.0,9330.0,9391.0,9454.0,9594.0,9594.0,9668.0,9710.0,9749.0
75%,39.01895,90.39495,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,141897.0,142771.0,143582.5,143963.0,145128.5,146039.0,146821.5,147496.5,148260.0,148873.5
max,71.7069,178.065,28.0,28.0,31.0,32.0,42.0,45.0,80.0,88.0,...,11231650.0,11264640.0,11295020.0,11323760.0,11355990.0,11393020.0,11434300.0,11474680.0,11525040.0,11569240.0


In [21]:
us_confirmed_df = pd.read_csv(us_confirmed_url)
us_confirmed_df.sample(5)

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
2621,84047037,US,USA,840,47037.0,Davidson,Tennessee,US,36.170074,-86.786461,...,84612,84787,84917,84917,84917,85234,85331,85496,85672,85672
1067,84021069,US,USA,840,21069.0,Fleming,Kentucky,US,38.367583,-83.696575,...,1108,1109,1112,1112,1114,1116,1117,1119,1120,1120
47,84001095,US,USA,840,1095.0,Marshall,Alabama,US,34.36976,-86.304867,...,11816,11828,11830,11839,11839,11842,11851,11854,11865,11877
92,84002195,US,USA,840,2195.0,Petersburg,Alaska,US,57.139789,-132.9541,...,152,153,153,153,153,153,153,153,159,159
952,84020053,US,USA,840,20053.0,Ellsworth,Kansas,US,38.696777,-98.204628,...,1214,1214,1214,1214,1214,1215,1215,1215,1215,1215


In [22]:
us_death_df = pd.read_csv(us_death_url)
us_death_df.sample(5)

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
1429,84027119,US,USA,840,27119.0,Polk,Minnesota,US,47.773725,-96.401346,...,65,65,65,66,66,66,66,66,66,66
1034,84021003,US,USA,840,21003.0,Allen,Kentucky,US,36.751976,-86.194575,...,32,32,32,32,32,32,32,32,33,33
1645,84029510,US,USA,840,29510.0,St. Louis City,Missouri,US,38.635557,-90.243492,...,445,445,445,447,447,448,449,450,452,452
3230,84054087,US,USA,840,54087.0,Roane,West Virginia,US,38.716227,-81.352436,...,8,8,8,8,8,8,8,8,8,8
466,84013113,US,USA,840,13113.0,Fayette,Georgia,US,33.413578,-84.490894,...,154,155,156,156,156,156,156,156,157,157


In [23]:
us_death_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3342 entries, 0 to 3341
Columns: 449 entries, UID to 4/2/21
dtypes: float64(3), int64(440), object(6)
memory usage: 11.4+ MB


In [24]:
us_death_df.describe()

Unnamed: 0,UID,code3,FIPS,Lat,Long_,Population,1/22/20,1/23/20,1/24/20,1/25/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21
count,3342.0,3342.0,3332.0,3342.0,3342.0,3342.0,3342.0,3342.0,3342.0,3342.0,...,3342.0,3342.0,3342.0,3342.0,3342.0,3342.0,3342.0,3342.0,3342.0,3342.0
mean,83429920.0,834.494913,33043.078932,36.721617,-88.642045,99603.57,0.0,0.0,0.0,0.0,...,163.22322,163.689707,164.025135,164.246858,164.398564,164.608318,164.870138,165.192101,165.510473,165.79982
std,4314076.0,36.487378,18648.808931,9.079322,21.776287,324166.1,0.0,0.0,0.0,0.0,...,650.138376,651.938417,653.198937,654.136463,654.974061,655.58361,656.293257,657.409255,658.745798,660.012395
min,16.0,16.0,60.0,-14.271,-174.1596,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,84018110.0,840.0,19076.5,33.896803,-97.803595,9917.25,0.0,0.0,0.0,0.0,...,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0
50%,84029210.0,840.0,31012.0,38.00561,-89.488865,24891.5,0.0,0.0,0.0,0.0,...,41.0,41.0,41.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0
75%,84046120.0,840.0,47129.5,41.579255,-82.313398,64975.25,0.0,0.0,0.0,0.0,...,101.0,101.0,101.75,101.75,101.75,102.0,102.0,102.0,102.0,102.0
max,84100000.0,850.0,99999.0,69.314792,145.6739,10039110.0,0.0,0.0,0.0,0.0,...,22965.0,23022.0,23056.0,23080.0,23103.0,23101.0,23111.0,23144.0,23191.0,23236.0


In [25]:
country_df = pd.read_csv(country_url)
country_df.sample(5)

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
33,Central African Republic,2021-04-03 19:20:49,6.6111,20.9394,5245.0,72.0,4957.0,216.0,108.597439,,,1.372736,140,CAF
89,Kazakhstan,2021-04-03 19:20:49,48.0196,66.9237,301818.0,3236.0,271216.0,27366.0,1607.406453,,,1.072169,398,KAZ
157,Slovenia,2021-04-03 19:20:49,46.1512,14.9955,219420.0,4068.0,201446.0,13906.0,10554.45777,,,1.853979,705,SVN
188,West Bank and Gaza,2021-04-03 19:20:49,31.9522,35.2332,248482.0,2681.0,220418.0,25383.0,4870.843703,,,1.078951,275,PSE
42,Cote d'Ivoire,2021-04-03 19:20:49,7.54,-5.5471,44445.0,250.0,41281.0,2914.0,168.490927,,,0.562493,384,CIV


In [26]:
country_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192 entries, 0 to 191
Data columns (total 14 columns):
Country_Region         192 non-null object
Last_Update            192 non-null object
Lat                    190 non-null float64
Long_                  190 non-null float64
Confirmed              192 non-null float64
Deaths                 192 non-null float64
Recovered              188 non-null float64
Active                 191 non-null float64
Incident_Rate          190 non-null float64
People_Tested          0 non-null float64
People_Hospitalized    0 non-null float64
Mortality_Rate         192 non-null float64
UID                    192 non-null int64
ISO3                   190 non-null object
dtypes: float64(10), int64(1), object(3)
memory usage: 21.1+ KB


In [27]:
country_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Lat,190.0,19.51585,23.91467,-40.9006,4.643279,17.589241,40.383525,64.9631
Long_,190.0,19.770528,62.02765,-172.1046,-6.706225,20.535638,46.701734,178.065
Confirmed,192.0,680032.21875,2641669.0,1.0,8410.75,81264.5,307879.75,30662900.0
Deaths,192.0,14803.973958,52752.02,0.0,111.5,986.5,6271.75,554717.0
Recovered,188.0,393506.367021,1293791.0,1.0,5305.75,48899.0,222211.25,11569240.0
Active,191.0,123750.586387,516678.2,0.0,383.5,5406.0,33581.5,4401462.0
Incident_Rate,190.0,2726.288197,3279.121,0.673488,158.162413,1338.40771,4803.299634,15756.16
People_Tested,0.0,,,,,,,
People_Hospitalized,0.0,,,,,,,
Mortality_Rate,192.0,2.044371,2.379885,0.0,0.964147,1.66169,2.516426,22.22222


## Conclusion

The first six dataframes contain time series, and we could use them to forecast the evolution of the pandemic. The country_df dataframe contains the last snapshot (last hour) with cumulated data, and we can use it to identify the worst affected countries. 

# Exploratory Data Analysis

### Renaming the df column names to lowercase

In [28]:
country_df.columns = map(str.lower, country_df.columns)
confirmed_df.columns = map(str.lower, confirmed_df.columns)
death_df.columns = map(str.lower, death_df.columns)
recovered_df.columns = map(str.lower, recovered_df.columns)

In [29]:
country_df.head()

Unnamed: 0,country_region,last_update,lat,long_,confirmed,deaths,recovered,active,incident_rate,people_tested,people_hospitalized,mortality_rate,uid,iso3
0,Afghanistan,2021-04-03 19:20:49,33.93911,67.709953,56595.0,2496.0,51798.0,2301.0,145.382512,,,4.410284,4,AFG
1,Albania,2021-04-03 19:20:49,41.1533,20.1683,126183.0,2256.0,93173.0,30754.0,4384.703593,,,1.78788,8,ALB
2,Algeria,2021-04-03 19:20:49,28.0339,1.6596,117429.0,3099.0,81729.0,32601.0,267.790666,,,2.639041,12,DZA
3,Andorra,2021-04-03 19:20:49,42.5063,1.5218,12174.0,117.0,11428.0,629.0,15756.163852,,,0.961065,20,AND
4,Angola,2021-04-03 19:20:49,-11.2027,17.8739,22467.0,538.0,20867.0,1062.0,68.358841,,,2.394623,24,AGO


### Changing province/state to state and country/region to country

In [30]:
confirmed_df = confirmed_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
recovered_df = recovered_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
death_df = death_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
country_df = country_df.rename(columns={'country_region': 'country'})

In [31]:
pd.options.display.float_format = '{:,}'.format

In [32]:
confirmed_df_total = confirmed_df.iloc[:, 4:].sum().map('{:,d}'.format)
confirmed_df_total

1/22/20            557
1/23/20            655
1/24/20            941
1/25/20          1,433
1/26/20          2,118
              ...     
3/29/21    127,644,861
3/30/21    128,212,983
3/31/21    128,896,025
4/1/21     129,607,651
4/2/21     130,236,504
Length: 437, dtype: object

In [33]:
recovered_df_total = recovered_df.iloc[:, 4:].sum().map('{:,d}'.format)
recovered_df_total

1/22/20            30
1/23/20            32
1/24/20            39
1/25/20            42
1/26/20            56
              ...    
3/29/21    72,401,542
3/30/21    72,732,773
3/31/21    73,111,302
4/1/21     73,451,626
4/2/21     73,765,414
Length: 437, dtype: object

In [34]:
death_df_total = death_df.iloc[:, 4:].sum().map('{:,d}'.format)
death_df_total

1/22/20           17
1/23/20           18
1/24/20           26
1/25/20           42
1/26/20           56
             ...    
3/29/21    2,791,836
3/30/21    2,803,397
3/31/21    2,815,659
4/1/21     2,827,520
4/2/21     2,837,709
Length: 437, dtype: object

In [46]:
def my_value(number):
    return ("{:,}".format(number))

In [48]:
confirmed_total = int(country_df['confirmed'].sum())
deaths_total = int(country_df['deaths'].sum())
recovered_total = int(country_df['recovered'].sum())
active_total = int(country_df['active'].sum())
print('confirmed: ', my_value(confirmed_total))
print('deaths: ', my_value(deaths_total))
print('recovered: ', my_value(recovered_total))
print('active: ', my_value(active_total))

confirmed:  130,566,186
deaths:  2,842,363
recovered:  73,979,197
active:  23,636,362


### Color-coding the confirmed/death/recovered columns

In [49]:
fig = go.FigureWidget( layout=go.Layout() )
def highlight_col(x):
    r = 'background-color: red'
    y = 'background-color: purple'
    g = 'background-color: grey'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[:, 4] = y
    df1.iloc[:, 5] = r
    df1.iloc[:, 6] = g
    
    return df1

In [50]:
def show_latest_cases(n):
    n = int(n)
    return country_df.sort_values('confirmed', ascending= False).head(n).style.apply(highlight_col, axis=None)

## COVID-19 Confirmed/Death/Recovered cases by countries

In [51]:
interact(show_latest_cases, n='10')

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)

interactive(children=(Text(value='10', description='n'), Output()), _dom_classes=('widget-interact',))

VBox(children=(FigureWidget({
    'data': [], 'layout': {'template': '...'}
}),), layout=Layout(border='solid …

## Worst hit countries

In [56]:
sorted_country_df = country_df.sort_values('confirmed', ascending= False)

In [57]:
# # plotting the 20 worst hit countries

def bubble_chart(n):
    fig = px.scatter(sorted_country_df.head(n), x="country", y="confirmed", size="confirmed", color="country",
               hover_name="country", size_max=60)
    fig.update_layout(
    title=str(n) +" Worst hit countries",
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
    width = 700
    )
    fig.show();

interact(bubble_chart, n=10)

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none'
widgets.VBox([fig], layout=ipywLayout)

interactive(children=(IntSlider(value=10, description='n', max=30, min=-10), Output()), _dom_classes=('widget-…

VBox(children=(FigureWidget({
    'data': [], 'layout': {'autosize': True, 'template': '...'}
}),), layout=Lay…

## Details for a specific country

In [60]:
def plot_cases_of_a_country(country):
    labels = ['confirmed', 'deaths']
    colors = ['blue', 'red']
    mode_size = [6, 8]
    line_size = [4, 5]
    
    df_list = [confirmed_df, death_df]
    
    fig = go.Figure();
    
    for i, df in enumerate(df_list):
        if country == 'World' or country == 'world':
            x_data = np.array(list(df.iloc[:, 20:].columns))
            y_data = np.sum(np.asarray(df.iloc[:,4:]),axis = 0)
            
        else:    
            x_data = np.array(list(df.iloc[:, 20:].columns))
            y_data = np.sum(np.asarray(df[df['country'] == country].iloc[:,20:]),axis = 0)
        print(i)
        fig.add_trace(go.Scatter(x=x_data, y=y_data, mode='lines+markers',
        name=labels[i],
        line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
        text = "Total " + str(labels[i]) +": "+ str(y_data[-1])
        ));
    
    fig.update_layout(
        title="COVID 19 cases of " + country,
        xaxis_title='Date',
        yaxis_title='No. of Confirmed Cases',
        margin=dict(l=20, r=20, t=40, b=20),
        paper_bgcolor="lightgrey",
        width = 800,
        
    );
    
    fig.update_yaxes(type="linear")
    fig.show();

In [61]:
interact(plot_cases_of_a_country, country='World')

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)

interactive(children=(Text(value='World', description='country'), Output()), _dom_classes=('widget-interact',)…

VBox(children=(FigureWidget({
    'data': [], 'layout': {'autosize': True, 'template': '...'}
}),), layout=Lay…

## 10 worst hit countries - Confirmed cases

In [89]:
sorted_country_df = country_df.sort_values('confirmed', ascending= False)

In [62]:
px.bar(
    sorted_country_df.head(10),
    x = "country",
    y = "confirmed",
    title= "Top 10 worst affected countries", # the axis names
    color_discrete_sequence=["pink"], 
    height=500,
    width=800
)

## 10 worst hit countries - Death cases

In [91]:
sorted_country_df = country_df.sort_values('deaths', ascending= False)

In [92]:
px.bar(
    sorted_country_df.head(10),
    x = "country",
    y = "deaths",
    title= "Top 10 worst affected countries", # the axis names
    color_discrete_sequence=["pink"], 
    height=500,
    width=800
)

## Worst hit countries - Recovered cases

In [93]:
sorted_country_df = country_df.sort_values('recovered', ascending= False)

In [94]:
px.bar(
    sorted_country_df.head(10),
    x = "country",
    y = "recovered",
    title= "Top 10 worst affected countries", # the axis names
    color_discrete_sequence=["pink"], 
    height=500,
    width=800
)

## Worst hit countries - Active cases

In [95]:
sorted_country_df = country_df.sort_values('active', ascending= False)

In [96]:
px.bar(
    sorted_country_df.head(10),
    x = "country",
    y = "active",
    title= "Top 10 worst affected countries", # the axis names
    color_discrete_sequence=["pink"], 
    height=500,
    width=800
)

## Worst hit countries - Mortality cases

In [97]:
sorted_country_df = country_df.sort_values('mortality_rate', ascending= False)

In [98]:
px.bar(
    sorted_country_df.head(10),
    x = "country",
    y = "mortality_rate",
    title= "Top 10 worst affected countries", # the axis names
    color_discrete_sequence=["pink"], 
    height=500,
    width=800
)

## Worst hit countries - Incident rate

In [99]:
sorted_country_df = country_df.sort_values('incident_rate', ascending= False)

In [100]:
px.bar(
    sorted_country_df.head(10),
    x = "country",
    y = "incident_rate",
    title= "Top 10 worst affected countries", # the axis names
    color_discrete_sequence=["pink"], 
    height=500,
    width=800
)

In [81]:
confirmed_df[confirmed_df.lat.isnull() | confirmed_df.long.isnull()] 

Unnamed: 0,state,country,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21


In [82]:
death_df[death_df.lat.isnull() | death_df.long.isnull()] 

Unnamed: 0,state,country,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21


In [74]:
confirmed_df=confirmed_df.dropna(subset=['long'])
dconfirmed_df=confirmed_df.dropna(subset=['lat'])
death_df=death_df.dropna(subset=['long'])
death_df=death_df.dropna(subset=['lat'])

In [75]:

world_map = folium.Map(location=[11,0], tiles="cartodbpositron", zoom_start=2, max_zoom = 6, min_zoom = 2)


for i in range(0,len(confirmed_df)):
    folium.Circle(
        location=[confirmed_df.iloc[i]['lat'], confirmed_df.iloc[i]['long']],
        fill=True,
        radius=(int((np.log(confirmed_df.iloc[i,-1]+1.00001)))+0.2)*50000,
        color='red',
        fill_color='indigo',
        tooltip = "<div style='margin: 0; background-color: black; color: white;'>"+
                    "<h4 style='text-align:center;font-weight: bold'>"+confirmed_df.iloc[i]['country'] + "</h4>"
                    "<hr style='margin:10px;color: white;'>"+
                    "<ul style='color: white;;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
                        "<li>Confirmed: "+str(confirmed_df.iloc[i,-1])+"</li>"+
                        "<li>Deaths:   "+str(death_df.iloc[i,-1])+"</li>"+
                        "<li>Death Rate: "+ str(np.round(death_df.iloc[i,-1]/(confirmed_df.iloc[i,-1]+1.00001)*100,2))+ "</li>"+
                    "</ul></div>",
        ).add_to(world_map)

world_map
