# COVID-19 Outbreak Analysis

## Data Preprocessing

In [1]:
# All Imports Required Go Here

import requests
from datetime import datetime
import os
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# Data from the John Hopkins University Dataset on GitHub
# https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series

filenames = ['time_series_covid19_confirmed_global.csv',
             'time_series_covid19_deaths_global.csv', 'time_series_covid19_recovered_global.csv']
for i in filenames:
    url = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/{i}'
    response = requests.get(url)
    with open(f'data/{i}', 'wb') as file:
        file.write(response.content)
    print(f'{i} has been downloaded')
with open('data/country_cases.csv', 'wb') as file:
    response = requests.get('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv')
    file.write(response.content)
    print('Country wise caases, recoveries, and deaths downloaded')

time_series_covid19_confirmed_global.csv has been downloaded
time_series_covid19_deaths_global.csv has been downloaded
time_series_covid19_recovered_global.csv has been downloaded
Country wise caases, recoveries, and deaths downloaded


In [3]:
confirmed_global = pd.read_csv('data/time_series_covid19_confirmed_global.csv')
deaths_global = pd.read_csv('data/time_series_covid19_deaths_global.csv')
recovered_global = pd.read_csv('data/time_series_covid19_recovered_global.csv')
country_cases = pd.read_csv('data/country_cases.csv')

## Outbreak Statistics

In [4]:
# Global Confirmed Cases
confirmed_global.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,1703,1828,1939,2171,2335,2469,2704,2894,3224,3392
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,736,750,766,773,782,789,795,803,820,832
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,3517,3649,3848,4006,4154,4295,4474,4648,4838,4997
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,743,743,743,745,745,747,748,750,751,751
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,27,27,27,27,30,35,35,35,36,36


In [5]:
# Global Deaths
deaths_global.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,57,58,60,64,68,72,85,90,95,104
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,28,30,30,31,31,31,31,31,31,31
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,432,437,444,450,453,459,463,465,470,476
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,40,41,42,42,43,44,45,45,46,46
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2


In [6]:
# Global Recoveries
recovered_global.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,220,228,252,260,310,331,345,397,421,458
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,422,431,455,470,488,519,531,543,570,595
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,1558,1651,1702,1779,1821,1872,1936,1998,2067,2197
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,385,398,423,468,468,472,493,499,514,521
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,6,6,7,7,11,11,11,11,11,11


In [7]:
# Country Wise Statistics
country_cases.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
0,Australia,2020-05-07 11:32:27,-25.0,133.0,6897,97,6035,765,27.089871,,,1.406409,36,AUS
1,Austria,2020-05-07 11:32:27,47.5162,14.5501,15752,609,13698,1445,174.89785,,,3.866176,40,AUT
2,Canada,2020-05-07 11:32:27,60.001,-95.001,64694,4366,28184,32144,170.896316,,,6.748694,124,CAN
3,China,2020-05-07 11:32:27,30.5928,114.3055,83974,4637,78929,408,5.978174,,,5.521947,156,CHN
4,Denmark,2020-05-07 11:32:27,56.0,10.0,10281,506,7689,2086,177.497232,,,4.9217,208,DNK


In [8]:
# Simple Data Cleaning - Removing and renaming the Columns

# Removing the Province/State column, as it is pretty much not of any use
confirmed_global.drop(columns = 'Province/State', inplace = True)
deaths_global.drop(columns = 'Province/State', inplace = True)
recovered_global.drop(columns = 'Province/State', inplace = True)

# Renaming the columns for easier access
confirmed_global.rename(columns = {"Country_Region": "country", "Last_Update": "last"})
deaths_global.rename(columns = {"Country_Region": "country", "Last_Update": "last"})
recovered_global.rename(columns = {"Country_Region": "country", "Last_Update": "last"})

country_cases.rename(columns = {
    "Country_Region" : "country",
    "Last_Update": "last",
    "Confirmed": "confirmed",
    "Deaths": "deaths",
    "Recovered" : "recovered",
    "Active" : "active",
    "Mortality_Rate": "mortality"
}, inplace = True)
country_cases.columns

Index(['country', 'last', 'Lat', 'Long_', 'confirmed', 'deaths', 'recovered',
       'active', 'Incident_Rate', 'People_Tested', 'People_Hospitalized',
       'mortality', 'UID', 'ISO3'],
      dtype='object')

## Worldwide Outbreak Visualisations

### Highest Number of Confirmed Cases

In [9]:
def highlight_cols(x):
    temp_df = pd.DataFrame('', index = x.index, columns = x.columns)
    red = 'background-color: red'
    purple = 'background-color: purple'
    grey = 'background-color: grey'
    yellow = 'background-color: yellow'
    temp_df.iloc[:, 4] = red
    temp_df.iloc[:, 5] = purple
    temp_df.iloc[:, 6] = grey
    temp_df.iloc[:, 0] = yellow
    return temp_df
    
sorted_country_cases = country_cases.sort_values('confirmed', ascending = False)
sorted_country_cases.head(10).style.apply(highlight_cols, axis=None)

Unnamed: 0,country,last,Lat,Long_,confirmed,deaths,recovered,active,Incident_Rate,People_Tested,People_Hospitalized,mortality,UID,ISO3
17,US,2020-05-07 11:32:27,40.0,-100.0,1228609,73431,189910,987833,372.908872,,,5.976759,840,USA
161,Spain,2020-05-07 11:32:27,40.463667,-3.74922,220325,25857,126002,68466,471.23521,,,11.735845,724,ESP
10,Italy,2020-05-07 11:32:27,41.8719,12.5674,214457,29684,93245,91528,354.698174,,,13.841469,380,ITA
16,United Kingdom,2020-05-07 11:32:27,55.0,-3.0,202359,30150,957,171252,298.08648,,,14.899263,826,GBR
13,Russia,2020-05-07 11:32:27,61.524,105.3188,177160,1625,23803,151732,121.396961,,,0.91725,643,RUS
6,France,2020-05-07 11:32:27,46.2276,2.2137,174224,25812,54081,94331,266.913783,,,14.81541,250,FRA
7,Germany,2020-05-07 11:32:27,51.1657,10.4515,168162,7275,139900,20987,200.709098,,,4.326185,276,DEU
174,Turkey,2020-05-07 11:32:27,38.9637,35.2433,131744,3584,78202,49958,156.207562,,,2.720427,792,TUR
39,Brazil,2020-05-07 11:32:27,-14.235,-51.9253,126611,8588,51370,66653,59.564994,,,6.782981,76,BRA
90,Iran,2020-05-07 11:32:27,32.427908,53.688046,103135,6486,82744,13905,122.790063,,,6.288845,364,IRN


### Confirmed Cases Graphical
- US has been excluded as it was an outlier and made the graph look sub-par

In [11]:
fig = px.scatter(sorted_country_cases.head(10)[1:], x = 'country', y = 'confirmed', size='confirmed',
                 hover_name = 'country', size_max = 50, template = 'plotly_dark', color = 'confirmed')
fig.show()