# Visualizando datos de COVID-19, Exceso de Mortalidad y Movilidad

Actualización al 24 de marzo de 2021

In [1]:
# Dependencies
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import math
from scipy.stats import chisquare
#from benfordslaw import benfordslaw
from scipy.stats import chi2
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import grangercausalitytests

import warnings
warnings.filterwarnings('ignore')

## Datos COVID-19

In [2]:
# 

Covid19 = pd.read_csv('Series_Casos_23.03.2021.csv', encoding = 'utf-8-sig')

Covid19 = Covid19[['Fecha', 'Confirmados', 'Defunciones_Sint', 'Defunciones_Muer']]

Covid19.head()

Unnamed: 0,Fecha,Confirmados,Defunciones_Sint,Defunciones_Muer
0,2020-01-01,,,
1,2020-01-02,,,
2,2020-01-03,,,
3,2020-01-04,,,
4,2020-01-05,,,


In [3]:
# Nombres de las columnas
Covid19.columns

Index(['Fecha', 'Confirmados', 'Defunciones_Sint', 'Defunciones_Muer'], dtype='object')

## Datos Mortalidad - Defunciones

In [4]:
# 

Defunciones = pd.read_csv('Series_Defunciones.csv', encoding = 'utf-8-sig')

Defunciones.head()

Unnamed: 0,Fecha,Defunciones,Defunciones_Reg
0,2020-01-01,2436.0,199
1,2020-01-02,2481.0,1476
2,2020-01-03,2226.0,1796
3,2020-01-04,2271.0,1327
4,2020-01-05,2335.0,1081


In [5]:
# Nombres de las columnas
Defunciones.columns

Index(['Fecha', 'Defunciones', 'Defunciones_Reg'], dtype='object')

## Datos Mobility

In [59]:
# 

Mobility = pd.read_csv('Series_Mobility.csv', encoding = 'utf-8-sig')

Mobility = Mobility[['Fecha', 'residential_percent_change_from_baseline', 
                     'retail_and_recreation_percent_change_from_baseline']].rename( 
    columns = {'residential_percent_change_from_baseline': 'Residential', 
               'retail_and_recreation_percent_change_from_baseline': 'Recreation'} )

Mobility.head()

Unnamed: 0,Fecha,Residential,Recreation
0,2020-02-15,-1.0,6.0
1,2020-02-16,-1.0,7.0
2,2020-02-17,-2.0,2.0
3,2020-02-18,-2.0,0.0
4,2020-02-19,-1.0,0.0


In [60]:
# Nombres de las columnas
Mobility.columns

Index(['Fecha', 'Residential', 'Recreation'], dtype='object')

## Uniendo las tablas

In [64]:
#

#Datos = pd.merge(Covid19, Defunciones, how = 'outer', on = 'Fecha')

#Datos = pd.merge(Datos, Mobility, how = 'outer', on = 'Fecha')

Datos = pd.merge(Covid19, Mobility, how = 'outer', on = 'Fecha')

Datos.head()

Unnamed: 0,Fecha,Confirmados,Defunciones_Sint,Defunciones_Muer,Residential,Recreation
0,2020-01-01,,,,,
1,2020-01-02,,,,,
2,2020-01-03,,,,,
3,2020-01-04,,,,,
4,2020-01-05,,,,,


## Pruebas de Raíces Unitarias -- ADF

In [66]:
# Seleccionamos columnas sin NaN's y en una fecha posterior al los últimos días de marzo:

DF = Datos[(Datos['Fecha'] > '2020-03-20')].dropna()

# Calculamos valores en logaritmos

DF['LConfirmados'] = np.log(DF['Confirmados'])

DF['LDefunciones_Sint'] = np.log(DF['Defunciones_Sint'])

DF['LDefunciones_Muer'] = np.log(DF['Defunciones_Muer'])

#DF['LDefunciones'] = np.log(DF['Defunciones'])

#DF['LResidential'] = np.log(DF['Residential'])

#DF['LRecreation'] = np.log(DF['Recreation'])

# Calculamos valores en diferencias logaritmicas

DF['DLConfirmados'] = DF['LConfirmados'].diff(7)

DF['DLDefunciones_Sint'] = DF['LDefunciones_Sint'].diff(7)

DF['DLDefunciones_Muer'] = DF['LDefunciones_Muer'].diff(7)

#DF['DLDefunciones'] = DF['LDefunciones'].diff(7)

DF['DResidential'] = DF['Residential'].diff(7)

DF['DRecreation'] = DF['Recreation'].diff(7)

DF = DF.dropna()

DF.head()

Unnamed: 0,Fecha,Confirmados,Defunciones_Sint,Defunciones_Muer,Residential,Recreation,LConfirmados,LDefunciones_Sint,LDefunciones_Muer,DLConfirmados,DLDefunciones_Sint,DLDefunciones_Muer,DResidential,DRecreation
88,2020-03-29,290.0,62.0,9.0,11.0,-45.0,5.669881,4.127134,2.197225,0.516589,0.831298,1.504077,3.0,-8.0
89,2020-03-30,418.0,75.0,12.0,15.0,-35.0,6.035481,4.317488,2.484907,0.502092,0.556288,2.484907,3.0,-4.0
90,2020-03-31,325.0,69.0,22.0,16.0,-35.0,5.783825,4.234107,3.091042,0.48052,0.707746,2.397895,2.0,0.0
91,2020-04-01,513.0,102.0,20.0,18.0,-37.0,6.240276,4.624973,2.995732,0.776444,0.863773,2.302585,2.0,0.0
92,2020-04-02,469.0,102.0,27.0,19.0,-47.0,6.150603,4.624973,3.295837,0.498114,0.693147,1.216395,3.0,-10.0


In [71]:
# Confirmados

result = adfuller(DF['DLConfirmados'], 
                  regression = 'c', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -0.208874
p-value: 0.937456
Use lags: 17
Obs.: 340
Critical Values:
	1%: -3.450
	5%: -2.870
	10%: -2.571


In [72]:
# Defunciones_Sint

result = adfuller(DF['DLDefunciones_Sint'], 
                  regression = 'c', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: 2.181258
p-value: 0.998867
Use lags: 14
Obs.: 343
Critical Values:
	1%: -3.450
	5%: -2.870
	10%: -2.571


In [73]:
# Defunciones_Muer

result = adfuller(DF['DLDefunciones_Muer'], 
                  regression = 'ct', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -3.395547
p-value: 0.051986
Use lags: 17
Obs.: 340
Critical Values:
	1%: -3.986
	5%: -3.423
	10%: -3.135


In [75]:
# Residential

result = adfuller(DF['DResidential'], 
                  regression = 'ct', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -6.051597
p-value: 0.000002
Use lags: 14
Obs.: 343
Critical Values:
	1%: -3.985
	5%: -3.423
	10%: -3.135


## Granger causality

In [57]:
Data = DF[['DLConfirmados', 'DLResidential']] 

grangercausalitytests(Data, [15], addconst = True, verbose = True)


Granger Causality
number of lags (no zero) 15
ssr based F test:         F=0.8905  , p=0.5755  , df_denom=234, df_num=15
ssr based chi2 test:   chi2=15.1266 , p=0.4423  , df=15
likelihood ratio test: chi2=14.7106 , p=0.4725  , df=15
parameter F test:         F=0.8905  , p=0.5755  , df_denom=234, df_num=15


{15: ({'ssr_ftest': (0.8904713880913613, 0.5755307844822487, 234.0, 15),
   'ssr_chi2test': (15.126597297705816, 0.44233719698993224, 15),
   'lrtest': (14.710629175040367, 0.4724549815034105, 15),
   'params_ftest': (0.8904713880913618, 0.5755307844822487, 234.0, 15.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fca5e6a5190>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fca5e689090>,
   array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 1., 0., 0., 0., 0., 0.

In [20]:
# Seleccionamos columnas sin NaN's y en una fecha posterior al los últimos días de marzo:

DF_MA = Datos[(Datos['Fecha'] > '2020-03-20')].dropna()

# Calculamos valores en logaritmos

DF_MA['LConfirmados'] = np.log(DF_MA['Confirmados'].rolling(window = 7).mean())

DF_MA['LDefunciones_Sint'] = np.log(DF_MA['Defunciones_Sint'].rolling(window = 7).mean())

DF_MA['LDefunciones_Muer'] = np.log(DF_MA['Defunciones_Muer'].rolling(window = 7).mean())

DF_MA['LDefunciones'] = np.log(DF_MA['Defunciones'].rolling(window = 7).mean())

DF_MA['LResidential'] = np.log(DF_MA['Residential'].rolling(window = 7).mean())

# Calculamos valores en diferencias logaritmicas

DF_MA['DLConfirmados'] = DF_MA['LConfirmados'].diff(1)

DF_MA['DLDefunciones_Sint'] = DF_MA['LDefunciones_Sint'].diff(1)

DF_MA['DLDefunciones_Muer'] = DF_MA['LDefunciones_Muer'].diff(1)

DF_MA['DLDefunciones'] = DF_MA['LDefunciones'].diff(1)

DF_MA['DLResidential'] = DF_MA['LResidential'].diff(1)

DF_MA = DF_MA.dropna()

DF_MA.head()

Unnamed: 0,Fecha,Confirmados,Defunciones_Sint,Defunciones_Muer,Defunciones,Defunciones_Reg,Residential,LConfirmados,LDefunciones_Sint,LDefunciones_Muer,LDefunciones,LResidential,DLConfirmados,DLDefunciones_Sint,DLDefunciones_Muer,DLDefunciones,DLResidential
88,2020-03-29,290.0,62.0,9.0,1954.0,1029.0,11.0,5.588853,3.874173,1.860752,7.571621,2.639057,0.064539,0.109656,0.169076,0.00494,0.031091
89,2020-03-30,418.0,75.0,12.0,1983.0,2358.0,15.0,5.673323,3.964886,2.079442,7.573531,2.66921,0.084471,0.090714,0.218689,0.001911,0.030153
90,2020-03-31,325.0,69.0,22.0,1874.0,1970.0,16.0,5.732416,4.055505,2.384823,7.570149,2.688819,0.059093,0.090618,0.305382,-0.003383,0.019608
91,2020-04-01,513.0,102.0,20.0,1956.0,2005.0,18.0,5.853023,4.191817,2.597385,7.574192,2.70805,0.120607,0.136312,0.212561,0.004043,0.019231
92,2020-04-02,469.0,102.0,27.0,1901.0,1889.0,19.0,5.925783,4.296313,2.781478,7.566755,2.736221,0.072759,0.104496,0.184093,-0.007437,0.028171


In [29]:
# Confirmados

result = adfuller(DF_MA['DLConfirmados'], 
                  regression = 'c', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -3.304511
p-value: 0.014678
Use lags: 16
Obs.: 263
Critical Values:
	1%: -3.455
	5%: -2.873
	10%: -2.573


In [34]:
# Defunciones_Sint

result = adfuller(DF_MA['DLDefunciones_Sint'], 
                  regression = 'c', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -3.594402
p-value: 0.005869
Use lags: 16
Obs.: 263
Critical Values:
	1%: -3.455
	5%: -2.873
	10%: -2.573


In [32]:
# Defunciones_Muer

result = adfuller(DF_MA['DLDefunciones_Muer'], 
                  regression = 'ct', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -4.115034
p-value: 0.005994
Use lags: 16
Obs.: 263
Critical Values:
	1%: -3.994
	5%: -3.427
	10%: -3.137


In [38]:
# Defunciones

result = adfuller(DF_MA['DLDefunciones'], 
                  regression = 'ct', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -1.692488
p-value: 0.754157
Use lags: 14
Obs.: 265
Critical Values:
	1%: -3.993
	5%: -3.427
	10%: -3.137


In [41]:
# Residential

result = adfuller(DF_MA['DLResidential'], 
                  regression = 'ct', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -7.194162
p-value: 0.000000
Use lags: 6
Obs.: 273
Critical Values:
	1%: -3.992
	5%: -3.427
	10%: -3.137
