# Setup
This notebook used data from **https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases** for plotting

In [32]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import pandas as pd
import pathlib
import glob
import glob2
import os
import folium
import requests

import warnings
warnings.filterwarnings('ignore')
np.random.seed(1234) 
PYTHONHASHSEED = 0

# Data Import 
Source: https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases

## Downloading latest available data

In [2]:
cwd=pathlib.Path(os.getcwd())
data_dir=pathlib.Path(cwd/'../data')
raw_data=pathlib.Path(data_dir/'raw_data')

In [3]:
data=pathlib.Path(raw_data/'data.humdata/')

In [4]:
files=['time_series_covid19_confirmed_global.csv', 'time_series_covid19_deaths_global.csv','time_series_covid19_recovered_global.csv']

for file in files:
    url = f'https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2F{file}&filename={file}'
    r = requests.get(url, allow_redirects=True)
    open(data/file, 'wb').write(r.content)

In [5]:
df_conf=pd.read_csv(data/files[0])
df_deaths=pd.read_csv(data/files[1])
df_recover=pd.read_csv(data/files[2])

df_conf.shape, df_deaths.shape, df_recover.shape

((258, 76), (258, 76), (244, 76))

# Data Pre-processing

In [6]:
def country_total_cases(df_cases):
    df_total=pd.DataFrame()
    for cntry in df_cases['Country/Region'].unique():
        df=df_cases[df_cases['Country/Region']==cntry]
        df['country_total']=df[df.columns[-1]].sum()
        df[['Lat', 'Long']]=df['Lat'].mean() , df['Long'].mean()
        df=df.head(1)
        df_total=df_total.append(df)
    return df_total

In [7]:
df_conf_final=country_total_cases(df_conf)
df_deaths_final=country_total_cases(df_deaths)
df_recover_final=country_total_cases(df_recover)

df_conf_final.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,country_total
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,84,94,110,110,120,170,174,237,273,273
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,146,174,186,197,212,223,243,259,277,277
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,302,367,409,454,511,584,716,847,986,986
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,188,224,267,308,334,370,376,390,428,428
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,3,4,4,5,7,7,7,8,8,8


In [8]:
# Number of countries=number of rows in final dataframe
# If equal, means each row of final dataframe represents data of each of the country and all countries are included

print(df_conf_final['Country/Region'].unique().shape[0], df_conf_final.shape[0])
print(df_deaths_final['Country/Region'].unique().shape[0], df_deaths_final.shape[0])
print(df_recover_final['Country/Region'].unique().shape[0], df_recover_final.shape[0])

181 181
181 181
181 181


In [23]:
print('Total confirmed cases:', df_conf_final['country_total'].sum())

Total confirmed cases: 1013157


# Plotting

## Folium

In [31]:
world_map=folium.Map(titles='ABC')
incidents = folium.map.FeatureGroup()


for cntry in df_conf_final['Country/Region'].unique():
    df=df_conf_final[df_conf_final['Country/Region']==cntry]
    conf=df['country_total']
    
    incidents.add_child(
        folium.CircleMarker(  
            [df['Lat'], df['Long']],
            radius= int(conf)/df_conf_final['country_total'].mean(), 
            popup= str(cntry).upper() + '\nConfirmed:' + str(int(conf)) +
            '\nDeaths:' + str(int(df_deaths_final[df_deaths_final['Country/Region']==cntry]['country_total'])) +
            '\nRecovered:' + str(int(df_recover_final[df_recover_final['Country/Region']==cntry]['country_total'])) ,
            tooltip = 'Click Here',
            color=['#3186cc'],
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.5
        )
    )

world_map.add_child(incidents)