##COVID-19 Open Research Dataset Challenge (CORD-19)
El objetivo de este notebook es analizar las estadísticas de los pacientes confirmados, recuperados y fallecidos a causa del COVID-19.
Los datos son extraídos del repositorio del "Center for Systems Science and Engineering (CSSE)"" de la Universidad de Johns Hopkins.

In [2]:
# Fuente: CORD-19 https://www.kaggle.com/tarunkr/covid-19-case-study-analysis-viz-comparisons
#https://github.com/tarunk04/COVID-19-CaseStudy-and-Predictions/blob/master/covid-19-case-study-analysis-viz-comparisons%20v35.ipynb
#Extracción de datos: https://github.com/CSSEGISandData/COVID-19

In [3]:
#Instalación de librerías
!pip install pycountry_convert #permite una estandarización de nombres, códigos de países y continentes
!pip install folium #visualiza datos de forma interactiva procesados con python.
!pip install calmap #permite visualizar mapas de calor

In [4]:
#Carga de librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker 
import pycountry_convert as pc
import folium
import branca
from datetime import datetime, timedelta,date
from scipy.interpolate import make_interp_spline, BSpline
import json, requests
import calmap
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [5]:
#Descarga de los datos
df_confirmados = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv',error_bad_lines=False)
df_fallecidos = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv', error_bad_lines=False)
df_recuperados = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv',error_bad_lines=False)
df_covid_country = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")

In [6]:
df_confirmados.head(2)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,...,110,110,120,170,174,237,273,281,299,349,367,423,444,484,521,555,607,665,714,784,840,906,933,996,1026,1092,1176,1279,1351,1463,1531,1703,1828,1939,2171,2335,2469,2704,2894,3224
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,186,197,212,223,243,259,277,304,333,361,377,383,400,409,416,433,446,467,475,494,518,539,548,562,584,609,634,663,678,712,726,736,750,766,773,782,789,795,803,820


In [7]:
df_fallecidos.head(2)


Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,4,4,4,4,4,4,6,6,7,7,11,14,14,15,15,18,18,21,23,25,30,30,30,33,36,36,40,42,43,47,50,57,58,60,64,68,72,85,90,95
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,8,10,10,11,15,15,16,17,20,20,21,22,22,23,23,23,23,23,24,25,26,26,26,26,26,26,27,27,27,27,28,28,30,30,31,31,31,31,31,31


In [8]:
df_covid_country.head(2)

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
0,Australia,2020-05-07 00:32:28,-25.0,133.0,6894,97,6039,758,27.078088,,,1.407021,36,AUS
1,Austria,2020-05-07 00:32:28,47.5162,14.5501,15684,608,13639,1437,174.142832,,,3.876562,40,AUT


In [9]:
#Preprocesamiento:
df_confirmados = df_confirmados.rename(columns={"Province/State":"state","Country/Region": "country"})
df_fallecidos = df_fallecidos.rename(columns={"Province/State":"state","Country/Region": "country"})
df_covid_country = df_covid_country.rename(columns={"Country_Region": "country"})
df_covid_country["Active"] = df_covid_country["Confirmed"]-df_covid_country["Recovered"]-df_covid_country["Deaths"]
df_recuperados = df_recuperados.rename(columns={"Province/State":"state","Country/Region": "country"})

In [10]:
# Cambiamos el nombre de los continentes por el requerido en la biblioteca pycountry_convert
df_confirmados.loc[df_confirmados['country'] == "US", "country"] = "USA"
df_fallecidos.loc[df_fallecidos['country'] == "US", "country"] = "USA"
df_covid_country.loc[df_covid_country['country'] == "US", "country"] = "USA"
df_recuperados.loc[df_recuperados['country'] == "US", "country"] = "USA"

df_confirmados.loc[df_confirmados['country'] == 'Korea, South', "country"] = 'South Korea'
df_fallecidos.loc[df_fallecidos['country'] == 'Korea, South', "country"] = 'South Korea'
df_covid_country.loc[df_covid_country['country'] == "Korea, South", "country"] = "South Korea"
df_recuperados.loc[df_recuperados['country'] == 'Korea, South', "country"] = 'South Korea'

df_confirmados.loc[df_confirmados['country'] == 'Taiwan*', "country"] = 'Taiwan'
df_fallecidos.loc[df_fallecidos['country'] == 'Taiwan*', "country"] = 'Taiwan'
df_covid_country.loc[df_covid_country['country'] == "Taiwan*", "country"] = "Taiwan"
df_recuperados.loc[df_recuperados['country'] == 'Taiwan*', "country"] = 'Taiwan'

df_confirmados.loc[df_confirmados['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'
df_fallecidos.loc[df_fallecidos['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'
df_covid_country.loc[df_covid_country['country'] == "Congo (Kinshasa)", "country"] = "Democratic Republic of the Congo"
df_recuperados.loc[df_recuperados['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'

df_confirmados.loc[df_confirmados['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_fallecidos.loc[df_fallecidos['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_covid_country.loc[df_covid_country['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_recuperados.loc[df_recuperados['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"

df_confirmados.loc[df_confirmados['country'] == "Reunion", "country"] = "Réunion"
df_fallecidos.loc[df_fallecidos['country'] == "Reunion", "country"] = "Réunion"
df_covid_country.loc[df_covid_country['country'] == "Reunion", "country"] = "Réunion"
df_recuperados.loc[df_recuperados['country'] == "Reunion", "country"] = "Réunion"

df_confirmados.loc[df_confirmados['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'
df_fallecidos.loc[df_fallecidos['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'
df_covid_country.loc[df_covid_country['country'] == "Congo (Brazzaville)", "country"] = "Republic of the Congo"
df_recuperados.loc[df_recuperados['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'

df_confirmados.loc[df_confirmados['country'] == 'Bahamas, The', "country"] = 'Bahamas'
df_fallecidos.loc[df_fallecidos['country'] == 'Bahamas, The', "country"] = 'Bahamas'
df_covid_country.loc[df_covid_country['country'] == "Bahamas, The", "country"] = "Bahamas"
df_recuperados.loc[df_recuperados['country'] == 'Bahamas, The', "country"] = 'Bahamas'

df_confirmados.loc[df_confirmados['country'] == 'Gambia, The', "country"] = 'Gambia'
df_fallecidos.loc[df_fallecidos['country'] == 'Gambia, The', "country"] = 'Gambia'
df_covid_country.loc[df_covid_country['country'] == "Gambia, The", "country"] = "Gambia"
df_recuperados.loc[df_recuperados['country'] == 'Gambia, The', "country"] = 'Gambia'

#Traemos todos los países
countries = np.asarray(df_confirmados["country"])
countries1 = np.asarray(df_covid_country["country"])
# código - nombres
continents = {
    'NA': 'North America',
    'SA': 'South America', 
    'AS': 'Asia',
    'OC': 'Australia',
    'AF': 'Africa',
    'EU' : 'Europe',
    'na' : 'Others'
}

#Función para obtener el código del continente
def country_to_continent_code(country):
    try:
        return pc.country_alpha2_to_continent_code(pc.country_name_to_country_alpha2(country))
    except :
        return 'na'

#Recolectamos la información por continente
df_confirmados.insert(2,"continent", [continents[country_to_continent_code(country)] for country in countries[:]])
df_fallecidos.insert(2,"continent",  [continents[country_to_continent_code(country)] for country in countries[:]])
df_covid_country.insert(1,"continent",  [continents[country_to_continent_code(country)] for country in countries1[:]])

In [11]:
df_fallecidos[df_fallecidos["continent" ]== 'Others']

Unnamed: 0,state,country,continent,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20
88,,Diamond Princess,Others,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,3,3,3,...,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13
127,,Holy See,Others,41.9029,12.4534,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
236,,Timor-Leste,Others,-8.874217,125.727539,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
241,,West Bank and Gaza,Others,31.9522,35.2332,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,4,4,4,4,2,2,2,2,2,2,2,2,2,2,2
247,,Kosovo,Others,42.602636,20.902977,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1,1,4,5,5,7,7,7,7,8,8,11,12,12,12,15,18,18,19,19,20,21,22,22,22,22,22,22,22,26,26
248,,Burma,Others,21.9162,95.956,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,1,1,1,1,1,1,3,3,3,3,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6
252,,MS Zaandam,Others,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
261,,Western Sahara,Others,24.2155,-12.8858,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
df_confirmados = df_confirmados.replace(np.nan, '', regex=True)
df_fallecidos = df_fallecidos.replace(np.nan, '', regex=True)
df_recuperados = df_recuperados.replace(np.nan, '', regex=True)

In [13]:
#Definición de funciones para visualización
def plot_params(ax,axis_label= None, plt_title = None,label_size=15, axis_fsize = 15, title_fsize = 20, scale = 'linear' ):
    # Tick-Parameters
    ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
    ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
    ax.tick_params(which='both', width=1,labelsize=label_size)
    ax.tick_params(which='major', length=6)
    ax.tick_params(which='minor', length=3, color='0.8')
    
    # Grid
    plt.grid(lw = 1, ls = '-', c = "0.7", which = 'major')
    plt.grid(lw = 1, ls = '-', c = "0.9", which = 'minor')

    # Plot Title
    plt.title( plt_title,{'fontsize':title_fsize})
    
    # Yaxis sacle
    plt.yscale(scale)
    plt.minorticks_on()
    # Plot Axes Labels
    xl = plt.xlabel(axis_label[0],fontsize = axis_fsize)
    yl = plt.ylabel(axis_label[1],fontsize = axis_fsize)
    
def visualize_covid_cases(confirmed, deaths, continent=None , country = None , state = None, period = None, figure = None, scale = "linear"):
    x = 0
    if figure == None:
        f = plt.figure(figsize=(10,10))
        # Sub plot
        ax = f.add_subplot(111)
    else :
        f = figure[0]
        # Sub plot
        ax = f.add_subplot(figure[1],figure[2],figure[3])
    
    plt.tight_layout(pad=10, w_pad=5, h_pad=5)
    
    stats = [confirmed, deaths]
    label = ["Confirmed", "Deaths"]
    
    if continent != None:
        params = ["continent",continent]
    elif country != None:
        params = ["country",country]
    else: 
        params = ["All", "All"]
    color = ["darkcyan","crimson"]
    marker_style = dict(linewidth=3, linestyle='-', marker='o',markersize=4, markerfacecolor='#ffffff')
    for i,stat in enumerate(stats):
        if params[1] == "All" :
            cases = np.sum(np.asarray(stat.iloc[:,5:]),axis = 0)[x:]
        else :
            cases = np.sum(np.asarray(stat[stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
        date = np.arange(1,cases.shape[0]+1)[x:]
        plt.plot(date,cases,label = label[i]+" (Total : "+str(cases[-1])+")",color=color[i],**marker_style)

    if params[1] == "All" :
        Total_confirmed = np.sum(np.asarray(stats[0].iloc[:,5:]),axis = 0)[x:]
        Total_deaths = np.sum(np.asarray(stats[1].iloc[:,5:]),axis = 0)[x:]
    else :
        Total_confirmed =  np.sum(np.asarray(stats[0][stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
        Total_deaths = np.sum(np.asarray(stats[1][stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
        
    text = "Desde "+stats[0].columns[5]+" hasta "+stats[0].columns[-1]+"\n"
    text += "Tasa de mortalidad : "+ str(int(Total_deaths[-1]/(Total_confirmed[-1])*10000)/100)+"\n"
    text += "Últimos 5 días:\n"
    text += "Confirmados : " + str(Total_confirmed[-1] - Total_confirmed[-6])+"\n"
    text += "Fallecidos : " + str(Total_deaths[-1] - Total_deaths[-6])+"\n"
    text += "Últimas 24 horas:\n"
    text += "Confirmados : " + str(Total_confirmed[-1] - Total_confirmed[-2])+"\n"
    text += "Fallecidos : " + str(Total_deaths[-1] - Total_deaths[-2])+"\n"
    
    plt.text(0.02, 0.78, text, fontsize=15, horizontalalignment='left', verticalalignment='top', transform=ax.transAxes,bbox=dict(facecolor='white', alpha=0.4))
    
    # Plot Axes Labels
    axis_label = ["Días ("+df_confirmados.columns[5]+" - "+df_confirmados.columns[-1]+")","No. de casos"]
    
    # Plot Parameters
    plot_params(ax,axis_label,scale = scale)
    
    # Plot Title
    if params[1] == "All" :
        plt.title("Casos de COVID-19 en el mundo",{'fontsize':25})
    else:   
        plt.title("Casos de COVID-19 para "+params[1] ,{'fontsize':25})
        
    # Legend Location
    l = plt.legend(loc= "best",fontsize = 15)
    
    if figure == None:
        plt.show()
        
def get_total_cases(cases, country = "All"):
    if(country == "All") :
        return np.sum(np.asarray(cases.iloc[:,5:]),axis = 0)[-1]
    else :
        return np.sum(np.asarray(cases[cases["country"] == country].iloc[:,5:]),axis = 0)[-1]
    
def get_mortality_rate(confirmed,deaths, continent = None, country = None):
    if continent != None:
        params = ["continent",continent]
    elif country != None:
        params = ["country",country]
    else :
        params = ["All", "All"]
    
    if params[1] == "All" :
        Total_confirmed = np.sum(np.asarray(confirmed.iloc[:,5:]),axis = 0)
        Total_deaths = np.sum(np.asarray(deaths.iloc[:,5:]),axis = 0)
        mortality_rate = np.round((Total_deaths/Total_confirmed)*100,2)
    else :
        Total_confirmed =  np.sum(np.asarray(confirmed[confirmed[params[0]] == params[1]].iloc[:,5:]),axis = 0)
        Total_deaths = np.sum(np.asarray(deaths[deaths[params[0]] == params[1]].iloc[:,5:]),axis = 0)
        mortality_rate = np.round((Total_deaths/Total_confirmed)*100,2)
    
    return np.nan_to_num(mortality_rate)
def dd(date1,date2):
    return (datetime.strptime(date1,'%m/%d/%y') - datetime.strptime(date2,'%m/%d/%y')).days


out = "output/"

In [14]:
#Análisis general de los datos
df_countries_cases = df_covid_country.copy().drop(['Lat','Long_','continent','Last_Update'],axis =1)
df_countries_cases.index = df_countries_cases["country"]
df_countries_cases = df_countries_cases.drop(['country'],axis=1)

df_continents_cases = df_covid_country.copy().drop(['Lat','Long_','country','Last_Update'],axis =1)
df_continents_cases = df_continents_cases.groupby(["continent"]).sum()

In [15]:
#Generando reporte global
#Número total de casos confirmados, fallecidos reportados, recuperados y casos activos en todo el mundo
pd.DataFrame(df_countries_cases.sum()).transpose().style.background_gradient(cmap='Wistia',axis=1)

Unnamed: 0,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID
0,3751070.0,263346,1241850.0,2245870.0,16511,0,0,778.969,97103


In [16]:
#Reporte por continentes
df_continents_cases.style.background_gradient(cmap='Wistia')

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Africa,51171,2006,17629,31536,498.349,0,0,202.54,24338
Asia,602505,20798,326882,254825,2656.2,0,0,102.905,18454
Australia,8408,118,7377,913,60.0325,0,0,2.81831,1430
Europe,1492020,146662,560494,784864,10065.8,0,0,238.953,18538
North America,1341365,80911,239880,1020574,1077.8,0,0,141.802,8292
Others,2154,49,1386,719,1541.05,0,0,31.3469,21343
South America,253446,12802,88206,152438,611.798,0,0,58.6033,4708


In [17]:
df_countries_cases.sort_values('Confirmed', ascending= False).style.background_gradient(cmap='Wistia')

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
USA,1228177,73207,189910,965060,372.778,,,5.96062,840,USA
Spain,220325,25857,126002,68466,471.235,,,11.7358,724,ESP
Italy,214457,29684,93245,91528,354.698,,,13.8415,380,ITA
United Kingdom,202359,30150,934,171275,298.086,,,14.8993,826,GBR
France,174224,25812,54078,94334,266.914,,,14.8154,250,FRA
Germany,168162,7275,137696,23191,200.709,,,4.32619,276,DEU
Russia,165929,1537,21327,143065,113.701,,,0.9263,643,RUS
Turkey,131744,3584,78202,49958,156.208,,,2.72043,792,TUR
Brazil,126148,8566,51370,66212,59.3472,,,6.79044,76,BRA
Iran,101650,6418,81587,13645,121.022,,,6.31382,364,IRN


In [18]:
#Top 10 de países: casos confirmados
f = plt.figure(figsize=(10,5))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_countries_cases.sort_values('Confirmed')["Confirmed"].index[-10:],df_countries_cases.sort_values('Confirmed')["Confirmed"].values[-10:],color="darkcyan")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Casos confirmados",fontsize=18)
plt.title("Top 10 de países (casos confirmados)",fontsize=20)
plt.grid(alpha=0.3)

In [19]:
f = plt.figure(figsize=(10,5))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_countries_cases.sort_values('Deaths')["Deaths"].index[-10:],df_countries_cases.sort_values('Deaths')["Deaths"].values[-10:],color="crimson")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Fallecimientos",fontsize=18)
plt.title("Top 10 de países (fallecimientos)",fontsize=20)
plt.grid(alpha=0.3,which='both')

In [20]:
f = plt.figure(figsize=(10,5))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_countries_cases.sort_values('Active')["Active"].index[-10:],df_countries_cases.sort_values('Active')["Active"].values[-10:],color="darkorange")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Casos activos",fontsize=18)
plt.title("Top 10 de países (casos activos)",fontsize=20)
plt.grid(alpha=0.3,which='both')

In [21]:

f = plt.figure(figsize=(10,5))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_countries_cases.sort_values('Recovered')["Recovered"].index[-10:],df_countries_cases.sort_values('Recovered')["Recovered"].values[-10:],color="limegreen")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Casos recuperados",fontsize=18)
plt.title("Top 10 de países (casos recuperados)",fontsize=20)
plt.grid(alpha=0.3,which='both')

In [22]:
#Análisis de correlaciones
df_countries_cases.corr().style.background_gradient(cmap='Reds')

Unnamed: 0,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID
Confirmed,1.0,0.932654,0.798303,0.979939,0.173458,,,0.131575,0.0144228
Deaths,0.932654,1.0,0.780194,0.890618,0.210393,,,0.254866,0.0108705
Recovered,0.798303,0.780194,1.0,0.663902,0.193219,,,0.163615,-0.0120269
Active,0.979939,0.890618,0.663902,1.0,0.148118,,,0.0981508,0.0218416
Incident_Rate,0.173458,0.210393,0.193219,0.148118,1.0,,,0.0693607,0.0300597
People_Tested,,,,,,,,,
People_Hospitalized,,,,,,,,,
Mortality_Rate,0.131575,0.254866,0.163615,0.0981508,0.0693607,,,1.0,0.158878
UID,0.0144228,0.0108705,-0.0120269,0.0218416,0.0300597,,,0.158878,1.0


In [23]:
#Visualizacion
world_map = folium.Map(location=[10,0], tiles="cartodbpositron", zoom_start=2,max_zoom=6,min_zoom=2)
for i in range(0,len(df_confirmados)):
    folium.Circle(
        location=[df_confirmados.iloc[i]['Lat'], df_confirmados.iloc[i]['Long']],
        tooltip = "<h5 style='text-align:center;font-weight: bold'>"+df_confirmados.iloc[i]['country']+"</h5>"+
                    "<div style='text-align:center;'>"+str(np.nan_to_num(df_confirmados.iloc[i]['state']))+"</div>"+
                    "<hr style='margin:10px;'>"+
                    "<ul style='color: #444;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
        "<li>Confirmed: "+str(df_confirmados.iloc[i,-1])+"</li>"+
        "<li>Deaths:   "+str(df_fallecidos.iloc[i,-1])+"</li>"+
        "<li>Mortality Rate:   "+str(np.round(df_fallecidos.iloc[i,-1]/(df_confirmados.iloc[i,-1]+1.00001)*100,2))+"</li>"+
        "</ul>"
        ,
        radius=(int((np.log(df_confirmados.iloc[i,-1]+1.00001)))+0.2)*50000,
        color='#ff6600',
        fill_color='#ff8533',
        fill=True).add_to(world_map)

world_map

In [24]:
!pip install plotly

In [25]:
#Global Confirmed Cases Heat Map
import plotly.express as px #permite generar gráficas interactivas
temp_df = pd.DataFrame(df_countries_cases['Confirmed'])
temp_df = temp_df.reset_index()
fig = px.choropleth(temp_df, locations="country",
                    color=np.log10(temp_df.iloc[:,-1]), # lifeExp is a column of gapminder
                    hover_name="country", # column to add to hover information
                    hover_data=["Confirmed"],
                    color_continuous_scale=px.colors.sequential.Plasma,locationmode="country names")
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(title_text="Mapa de calor de casos confirmados (escala logarítmica)")
fig.update_coloraxes(colorbar_title="Casos confirmados (escala logarítmica)",colorscale="Reds")
fig.show()

In [26]:
#Global Deaths Heat Map

temp_df = pd.DataFrame(df_countries_cases['Deaths'])
temp_df = temp_df.reset_index()
fig = px.choropleth(temp_df, locations="country",
                    color=np.log10(temp_df.iloc[:,-1]+1), # lifeExp is a column of gapminder
                    hover_name="country", # column to add to hover information
                    hover_data=["Deaths"],
                    color_continuous_scale=px.colors.sequential.Plasma,locationmode="country names")
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(title_text="Mapa de calor de fallecimientos (escala logarítmica)")
fig.update_coloraxes(colorbar_title="Fallecimientos (escala logarítmica)",colorscale="Reds")
fig.show()

In [27]:
#Número de países afectados desde el inicio de la pandemia
case_nums_country = df_confirmados.groupby("country").sum().drop(['Lat','Long'],axis =1).apply(lambda x: x[x > 0].count(), axis =0)
d = [datetime.strptime(date,'%m/%d/%y').strftime("%d %b") for date in case_nums_country.index]

f = plt.figure(figsize=(15,8))
f.add_subplot(111)
marker_style = dict(c="crimson",linewidth=6, linestyle='-', marker='o',markersize=8, markerfacecolor='#ffffff')
plt.plot(d, case_nums_country,**marker_style)
plt.tick_params(labelsize = 14)
plt.xticks(list(np.arange(0,len(d),int(len(d)/5))),d[:-1:int(len(d)/5)]+[d[-1]])

#labels
plt.xlabel("Fecha",fontsize=18)
plt.ylabel("Número de países / regiones",fontsize=1)
plt.grid(alpha = 0.3)
plt.show()
plt.close()

In [28]:
#Tendencia de propagación global de la pandemia
cols = 1
rows = 1
f = plt.figure(figsize=(10,10*rows))
visualize_covid_cases(df_confirmados, df_fallecidos,continent = "All",figure = [f,rows,cols, 1])
plt.show()

In [29]:
#Tendencia de propagación por continente
df_continents= df_confirmados.groupby(["continent"]).sum()
continents = df_continents.sort_values(df_continents.columns[-1],ascending = False).index

cols =2
rows = int(np.ceil(continents.shape[0]/cols))
f = plt.figure(figsize=(20,10*rows))
for i,continent in enumerate(continents):
    visualize_covid_cases(df_confirmados, df_fallecidos, continent = continent,figure = [f,rows,cols, i+1])

plt.show()

In [30]:
#Tendencia de propagación en los países má afectados
df_countries = df_confirmados.groupby(["country"]).sum()
df_countries = df_countries.sort_values(df_countries.columns[-1],ascending = False)

In [31]:
df_countries.head()

Unnamed: 0_level_0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,...,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
USA,37.0902,-95.7129,1,1,2,2,5,5,5,5,5,7,8,8,11,11,11,11,11,11,11,11,12,12,13,13,13,13,13,13,13,13,15,15,15,51,51,57,58,60,...,101657,121465,140909,161831,188172,213242,243622,275367,308650,336802,366317,397121,428654,462780,496535,526396,555313,580619,607670,636350,667592,699706,732197,758809,784326,811865,840351,869170,905358,938154,965785,988197,1012582,1039909,1069424,1103461,1132539,1158040,1180375,1204351
Spain,40.0,-4.0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,6,13,15,32,...,65719,73235,80110,87956,95923,104118,112065,119199,126168,131646,136675,141942,148220,153222,158273,163027,166831,170099,172541,177644,184948,190839,191726,198674,200210,204178,208389,213024,202990,205905,207634,209465,210773,212917,213435,213435,216582,217466,218011,219329
Italy,43.0,12.0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,20,62,155,229,322,453,655,888,...,86498,92472,97689,101739,105792,110574,115242,119827,124632,128948,132547,135586,139422,143626,147577,152271,156363,159516,162488,165155,168941,172434,175925,178972,181228,183957,187327,189973,192994,195351,197675,199414,201505,203591,205463,207428,209328,210717,211938,213013
United Kingdom,270.0299,-482.9247,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,3,3,3,8,8,9,9,9,9,9,9,9,9,9,9,9,9,13,13,13,15,20,...,14745,17312,19780,22453,25481,29865,34173,38689,42477,48436,52279,55949,61474,65872,74605,79874,85206,89570,94845,99483,104145,109769,115314,121172,125856,130172,134638,139246,144640,149569,154037,158348,162350,166441,172481,178685,183500,187842,191832,196243
France,91.3624,59.7192,0,0,2,3,3,3,4,5,5,5,6,6,6,6,6,6,6,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,14,18,38,57,...,33402,38105,40708,45170,52827,57749,59929,65202,69500,71412,75343,79163,83057,87366,91738,94863,121712,125394,130365,133585,146075,148084,148086,153011,155393,158168,155980,158303,159952,161644,162220,165963,169053,166543,167299,167305,168518,168925,169583,170687
