# Visualizando datos de COVID-19, Exceso de Mortalidad y Movilidad

Actualización al 24 de marzo de 2021

In [57]:
# Dependencies
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import math
from scipy.stats import chisquare
#from benfordslaw import benfordslaw
from scipy.stats import chi2
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import grangercausalitytests

import warnings
warnings.filterwarnings('ignore')

## Datos COVID-19

In [15]:
# 

Covid19 = pd.read_csv('Series_Casos_23.03.2021.csv', encoding = 'utf-8-sig')

Covid19 = Covid19[['Fecha', 'Confirmados', 'Defunciones_Sint', 'Defunciones_Muer']]

Covid19.head()

Unnamed: 0,Fecha,Confirmados,Defunciones_Sint,Defunciones_Muer
0,2020-01-01,,,
1,2020-01-02,,,
2,2020-01-03,,,
3,2020-01-04,,,
4,2020-01-05,,,


In [16]:
# Nombres de las columnas
Covid19.columns

Index(['Fecha', 'Confirmados', 'Defunciones_Sint', 'Defunciones_Muer'], dtype='object')

## Datos Mortalidad - Defunciones

In [17]:
# 

Defunciones = pd.read_csv('Series_Defunciones.csv', encoding = 'utf-8-sig')

Defunciones.head()

Unnamed: 0,Fecha,Defunciones
0,2020-01-01,2436
1,2020-01-02,2481
2,2020-01-03,2226
3,2020-01-04,2271
4,2020-01-05,2335


In [18]:
# Nombres de las columnas
Defunciones.columns

Index(['Fecha', 'Defunciones'], dtype='object')

## Datos Mobility

In [69]:
# 

Mobility = pd.read_csv('Series_Mobility.csv', encoding = 'utf-8-sig')

Mobility = Mobility[['Fecha', 'residential_percent_change_from_baseline']].rename( 
    columns={'residential_percent_change_from_baseline': 'Residential'} )

Mobility.head()

Unnamed: 0,Fecha,Residential
0,2020-02-15,-1
1,2020-02-16,-1
2,2020-02-17,-2
3,2020-02-18,-2
4,2020-02-19,-1


In [70]:
# Nombres de las columnas
Mobility.columns

Index(['Fecha', 'Residential'], dtype='object')

## Uniendo las tablas

In [71]:
#

Datos = pd.merge(Covid19, Defunciones, how = 'outer', on = 'Fecha')

Datos = pd.merge(Datos, Mobility, how = 'outer', on = 'Fecha')

Datos.head()

Unnamed: 0,Fecha,Confirmados,Defunciones_Sint,Defunciones_Muer,Defunciones,Residential
0,2020-01-01,,,,2436.0,
1,2020-01-02,,,,2481.0,
2,2020-01-03,,,,2226.0,
3,2020-01-04,,,,2271.0,
4,2020-01-05,,,,2335.0,


## Pruebas de Raíces Unitarias -- ADF

In [85]:
# Seleccionamos columnas sin NaN's y en una fecha posterior al los últimos días de marzo:

DF = Datos[(Datos['Fecha'] > '2020-03-20')].dropna()

# Calculamos valores en logaritmos

DF['LConfirmados'] = np.log(DF['Confirmados'])

DF['LDefunciones_Sint'] = np.log(DF['Defunciones_Sint'])

DF['LDefunciones_Muer'] = np.log(DF['Defunciones_Muer'])

DF['LDefunciones'] = np.log(DF['Defunciones'])

DF['LResidential'] = np.log(DF['Residential'])

# Calculamos valores en diferencias logaritmicas

DF['DLConfirmados'] = DF['LConfirmados'].diff(1)

DF['DLDefunciones_Sint'] = DF['LDefunciones_Sint'].diff(1)

DF['DLDefunciones_Muer'] = DF['LDefunciones_Muer'].diff(1)

DF['DLDefunciones'] = DF['LDefunciones'].diff(1)

DF['DLResidential'] = DF['LResidential'].diff(1)

DF = DF.dropna()

DF.head()

Unnamed: 0,Fecha,Confirmados,Defunciones_Sint,Defunciones_Muer,Defunciones,Residential,LConfirmados,LDefunciones_Sint,LDefunciones_Muer,LDefunciones,LResidential,DLConfirmados,DLDefunciones_Sint,DLDefunciones_Muer,DLDefunciones,DLResidential
82,2020-03-23,253.0,43.0,1.0,1957.0,12.0,5.533389,3.7612,0.0,7.579168,2.484907,0.380098,0.465363,-0.693147,0.036424,0.405465
83,2020-03-24,201.0,34.0,2.0,1920.0,14.0,5.303305,3.526361,0.693147,7.56008,2.639057,-0.230085,-0.23484,0.693147,-0.019088,0.154151
84,2020-03-25,236.0,43.0,2.0,1901.0,16.0,5.463832,3.7612,0.693147,7.550135,2.772589,0.160527,0.23484,0.0,-0.009945,0.133531
85,2020-03-26,285.0,51.0,8.0,2002.0,16.0,5.652489,3.931826,2.079442,7.601902,2.772589,0.188657,0.170626,1.386294,0.051767,0.0
86,2020-03-27,308.0,49.0,8.0,1960.0,17.0,5.7301,3.89182,2.079442,7.5807,2.833213,0.077611,-0.040005,0.0,-0.021202,0.060625


In [86]:
# Confirmados

result = adfuller(DF['LConfirmados'], 
                  regression = 'c', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -2.971234
p-value: 0.037669
Use lags: 15
Obs.: 268
Critical Values:
	1%: -3.455
	5%: -2.872
	10%: -2.573


In [92]:
# Defunciones_Sint

result = adfuller(DF['LDefunciones_Sint'], 
                  regression = 'c', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -2.053946
p-value: 0.263432
Use lags: 15
Obs.: 268
Critical Values:
	1%: -3.455
	5%: -2.872
	10%: -2.573


In [93]:
# Defunciones_Muer

result = adfuller(DF['LDefunciones_Muer'], 
                  regression = 'c', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -2.222993
p-value: 0.198006
Use lags: 16
Obs.: 267
Critical Values:
	1%: -3.455
	5%: -2.872
	10%: -2.573


In [94]:
# Defunciones

result = adfuller(DF['LDefunciones'], 
                  regression = 'c', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -2.983003
p-value: 0.036513
Use lags: 16
Obs.: 267
Critical Values:
	1%: -3.455
	5%: -2.872
	10%: -2.573


In [91]:
# Residential

result = adfuller(DF['LResidential'], 
                  regression = 'nc', # “c” : constant only; “ct” : constant and trend; “nc” : no constant, no trend
                  autolag='AIC',)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Use lags: %i' % result[2])
print('Obs.: %i' % result[3])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -1.223731
p-value: 0.202798
Use lags: 15
Obs.: 268
Critical Values:
	1%: -2.574
	5%: -1.942
	10%: -1.616


## Granger causality

In [98]:
Data = DF[['LConfirmados', 'LResidential']] 

grangercausalitytests(Data, 4, addconst = True, verbose = True)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=23.5476 , p=0.0000  , df_denom=280, df_num=1
ssr based chi2 test:   chi2=23.7999 , p=0.0000  , df=1
likelihood ratio test: chi2=22.8519 , p=0.0000  , df=1
parameter F test:         F=23.5476 , p=0.0000  , df_denom=280, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=9.5188  , p=0.0001  , df_denom=277, df_num=2
ssr based chi2 test:   chi2=19.3812 , p=0.0001  , df=2
likelihood ratio test: chi2=18.7442 , p=0.0001  , df=2
parameter F test:         F=9.5188  , p=0.0001  , df_denom=277, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=10.9825 , p=0.0000  , df_denom=274, df_num=3
ssr based chi2 test:   chi2=33.7892 , p=0.0000  , df=3
likelihood ratio test: chi2=31.9071 , p=0.0000  , df=3
parameter F test:         F=10.9825 , p=0.0000  , df_denom=274, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=7.0879  , p=0.0000  

KeyError: 0

In [90]:
# Promedio de México

fig = go.Figure()

fig.add_trace( go.Scatter( x = DF['Fecha'], 
                           y = DF['LResidential'], 
                           name = 'Residential', 
                           line_color = 'darkblue' ) )


fig.update_layout(template = 'plotly_white', # "plotly", "plotly_white", "ggplot2", "seaborn", "simple_white", "none"
                  title = 'Mexico: ')

fig.update_xaxes(tickformat = '%Y-%m-%d') # (tickangle = 90, tickformat = '%Y-%m-%d')

fig.show()

In [54]:
# To CSV:

Series_Casos.to_csv('Series_Casos_' + Fecha + '.csv', index = False, encoding = 'utf-8-sig')