In [1]:
# All Imports Required Go Here

import requests
from datetime import datetime
import os
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
filenames = ['time_series_covid19_confirmed_global.csv', 'time_series_covid19_deaths_global.csv', 'time_series_covid19_recovered_global.csv']
for i in filenames:
    url = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/{i}'
    response = requests.get(url)
    with open(f'data/{i}', 'wb') as file:
        file.write(response.content)
    print(f'{i} has been downloaded')
with open('data/country_cases.csv', 'wb') as file:
    response = requests.get('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv')
    file.write(response.content)
    print('Country wise caases, recoveries, and deaths downloaded')

time_series_covid19_confirmed_global.csv has been downloaded
time_series_covid19_deaths_global.csv has been downloaded
time_series_covid19_recovered_global.csv has been downloaded
Country wise caases, recoveries, and deaths downloaded


In [3]:
# Data from the John Hopkins University Dataset on GitHub
# https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series

confirmed_global = pd.read_csv('data/time_series_covid19_confirmed_global.csv')
deaths_global = pd.read_csv('data/time_series_covid19_deaths_global.csv')
recovered_global = pd.read_csv('data/time_series_covid19_recovered_global.csv')
country_cases = pd.read_csv('data/country_cases.csv')

In [4]:
# Global Confirmed Cases
confirmed_global.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,1531,1703,1828,1939,2171,2335,2469,2704,2894,3224
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,726,736,750,766,773,782,789,795,803,820
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,3382,3517,3649,3848,4006,4154,4295,4474,4648,4838
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,738,743,743,743,745,745,747,748,750,751
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,26,27,27,27,27,30,35,35,35,36


In [5]:
# Global Deaths
deaths_global.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,50,57,58,60,64,68,72,85,90,95
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,28,28,30,30,31,31,31,31,31,31
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,425,432,437,444,450,453,459,463,465,470
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,40,40,41,42,42,43,44,45,45,46
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2


In [6]:
# Global Recoveries
recovered_global.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,207,220,228,252,260,310,331,345,397,421
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,410,422,431,455,470,488,519,531,543,570
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,1508,1558,1651,1702,1779,1821,1872,1936,1998,2067
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,344,385,398,423,468,468,472,493,499,514
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,6,6,6,7,7,11,11,11,11,11


In [7]:
# Country Wise Statistics
country_cases.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
0,Australia,2020-05-06 12:32:27,-25.0,133.0,6875,97,5986,792,27.00346,,,1.410909,36,AUS
1,Austria,2020-05-06 12:32:27,47.5162,14.5501,15684,608,13639,1437,174.142832,,,3.876562,40,AUT
2,Canada,2020-05-06 12:32:27,60.001,-95.001,63215,4190,27006,32019,166.989375,,,6.628174,124,CAN
3,China,2020-05-06 12:32:27,30.5928,114.3055,83968,4637,78900,431,5.977747,,,5.522342,156,CHN
4,Denmark,2020-05-06 12:32:27,56.0,10.0,10136,506,7689,1941,174.993867,,,4.992107,208,DNK


In [8]:
# Simple Data Cleaning - Removing and renaming the Columns

# Removing the Province/State column, as it is pretty much not of any use
confirmed_global.drop(columns = 'Province/State', inplace = True)
deaths_global.drop(columns = 'Province/State', inplace = True)
recovered_global.drop(columns = 'Province/State', inplace = True)

# Renaming the columns for easier access
confirmed_global.rename(columns = {"Country_Region": "country", "Last_Update": "last"})
deaths_global.rename(columns = {"Country_Region": "country", "Last_Update": "last"})
recovered_global.rename(columns = {"Country_Region": "country", "Last_Update": "last"})

country_cases.rename(columns = {
    "Country_Region" : "country",
    "Last_Update": "last",
    "Confirmed": "confirmed",
    "Deaths": "deaths",
    "Recovered" : "recovered",
    "Active" : "active",
    "Mortality_Rate": "mortality"
}, inplace = True)
country_cases.columns

Index(['country', 'last', 'Lat', 'Long_', 'confirmed', 'deaths', 'recovered',
       'active', 'Incident_Rate', 'People_Tested', 'People_Hospitalized',
       'mortality', 'UID', 'ISO3'],
      dtype='object')

In [9]:
# Countries with the Highest Number Of Confirmed Cases

def highlight_cols(x):
    temp_df = pd.DataFrame('', index = x.index, columns = x.columns)
    red = 'background-color: red'
    purple = 'background-color: purple'
    grey = 'background-color: grey'
    yellow = 'background-color: yellow'
    temp_df.iloc[:, 4] = red
    temp_df.iloc[:, 5] = purple
    temp_df.iloc[:, 6] = grey
    temp_df.iloc[:, 0] = yellow
    return temp_df
    
sorted_country_cases = country_cases.sort_values('confirmed', ascending = False)
sorted_country_cases.head(10).style.apply(highlight_cols, axis=None)

Unnamed: 0,country,last,Lat,Long_,confirmed,deaths,recovered,active,Incident_Rate,People_Tested,People_Hospitalized,mortality,UID,ISO3
17,US,2020-05-06 12:32:27,40.0,-100.0,1204475,71078,189791,972538,365.583692,,,5.90116,840,USA
161,Spain,2020-05-06 12:32:27,40.463667,-3.74922,219329,25613,123486,70230,469.104947,,,11.67789,724,ESP
10,Italy,2020-05-06 12:32:27,41.8719,12.5674,213013,29315,85231,98467,352.30989,,,13.762071,380,ITA
16,United Kingdom,2020-05-06 12:32:27,55.0,-3.0,196243,29501,926,165816,289.07726,,,15.032893,826,GBR
6,France,2020-05-06 12:32:27,46.2276,2.2137,170694,25538,52859,92297,261.50577,,,14.961276,250,FRA
7,Germany,2020-05-06 12:32:27,51.1657,10.4515,167007,6993,137400,22614,199.330552,,,4.18725,276,DEU
13,Russia,2020-05-06 12:32:27,61.524,105.3188,165929,1537,21327,143065,113.701041,,,0.9263,643,RUS
174,Turkey,2020-05-06 12:32:27,38.9637,35.2433,129491,3520,73285,52686,153.536202,,,2.718336,792,TUR
39,Brazil,2020-05-06 12:32:27,-14.235,-51.9253,116299,7966,48221,60112,54.713645,,,6.849586,76,BRA
90,Iran,2020-05-06 12:32:27,32.427908,53.688046,101650,6418,81587,13645,121.022058,,,6.313822,364,IRN


In [12]:
# Scatter Plot Showing Countries with the Highest Number of Confirmed Cases

fig = px.scatter(sorted_country_cases.head(10), x = 'country', y = 'confirmed', size='confirmed',
                 hover_name = 'country', size_max = 50, template = 'plotly_dark', color = 'confirmed')
fig.show()