<a href="https://colab.research.google.com/github/amontenegrot/COVID19/blob/main/COVID19_mundial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importar datos

Datos oficiales:

[Mundo](https://ourworldindata.org/coronavirus-source-data)

[Colombia](https://www.ins.gov.co/Noticias/Paginas/Coronavirus.aspx)

##Librerias

Lectura de datos y ajustes de visualización

In [1]:
#%load_ext google.colab.data_table #Visualizar el DataFrame de forma más eficiente en la pantalla

In [2]:
!pip install cufflinks --upgrade
!pip install pycountry

Requirement already up-to-date: cufflinks in /usr/local/lib/python3.6/dist-packages (0.17.3)


In [3]:
import pandas as pd
import numpy as np
import cufflinks as cf
import pycountry

import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as py
from plotly.subplots import make_subplots
from pylab import *
from scipy.optimize import curve_fit
from scipy.integrate import odeint

dfWorld = pd.read_csv('https://covid.ourworldindata.org/data/ecdc/total_cases.csv')
dfMuertes = pd.read_csv('https://covid.ourworldindata.org/data/ecdc/total_deaths.csv')

##Funciones

In [4]:
#Función que retorna el código alpha 3 para el país
def get_alph_3 (location):
  try:
    return pycountry.countries.get(name=location).alpha_3
  except:
    return None

In [5]:
#Función que retorna el código alpha 3 para los departamentos y distritos de Colombia
def get_alph_3_dept (location):
  try:
    return pycountry.subdivisions.get(name=location, country_code='COL')
  except:
    return None

In [6]:
color1 = 'rgb(34, 124, 114)'
color2 = 'rgb(15, 103, 94)'
color3 = 'rgb(1, 82, 73)'
color4 = 'rgb(0, 59, 53)'
color5 = 'rgb(0, 34, 30)'
color6 = 'rgb(0, 30, 27)'
color7 = 'rgb(55, 164, 117)'
color8 = 'rgb(27, 148, 96)'
color9 = 'rgb(184, 237, 214)'
color10 = 'rgb(131, 214, 178)'

#Análisis

Preprocesamiento de los datos

In [7]:
#pd.set_option('max_rows', 200) #Quitar los "puntos" de la visualización al imprimir
dfWorld.at[0,'Colombia'] = 0 #Agregar un valor determinado en el DataFrame
dfWorld = dfWorld.fillna(method='ffill') #Rellenar los NAN con el valor anterior en la columna

print(dfWorld.head())
#print(dfWorld['Colombia'].head())

         date  World  Afghanistan  ...  Yemen  Zambia  Zimbabwe
0  2019-12-31     27          NaN  ...    NaN     NaN       NaN
1  2020-01-01     27          NaN  ...    NaN     NaN       NaN
2  2020-01-02     27          NaN  ...    NaN     NaN       NaN
3  2020-01-03     44          NaN  ...    NaN     NaN       NaN
4  2020-01-04     44          NaN  ...    NaN     NaN       NaN

[5 rows x 216 columns]


Procesamiento de los datos

In [8]:
dfWorld2 = dfWorld.T
dfWorld2 = dfWorld2.drop('date') #Eliminar la columna 'date'
columna = list(dfWorld2.columns) #Crear lista con los nombres de las columnas del DataFrame
columna = columna[-1] #Retornar el último valor de la lista
dfWorld2 = dfWorld2.reset_index() #Reiniciar el índice dada la transposición realizada
dfWorld2 = dfWorld2.rename({'index':'Pais', int(columna):'Total'}, axis=1) #Renombrar columnas

dfWorld2.head()

Unnamed: 0,Pais,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,...,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,Total
0,World,27.0,27.0,27.0,44.0,44.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0,60.0,61.0,62.0,62.0,67.0,84.0,220.0,240.0,393.0,535.0,632.0,898.0,1351.0,2024.0,2821.0,4588.0,6068.0,7825.0,9826.0,11947.0,14555.0,17373.0,20616.0,24525.0,28276.0,31494.0,...,40915670,41367912,41840416,42329189,42788799,43227729,43643644,44123016,44636477,45184578,45726736,46184735,46671967,47142461,47647460,48217351,48829738,49435759,50029033,50557870,51060158,51627093,52250604,52898103,53533932,54128402,54642940,55187421,55770400,56396170,57037140,57716898,58288941,58807799,59341326,59903030,60552696,61102596,61718735,62271031
1,Afghanistan,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,40375,40510,40626,40687,40768,40833,41032,41145,41145,41268,41425,41501,41633,41728,41814,41935,41975,42033,42159,42239,42463,42463,42609,42969,43035,43240,43403,43468,43851,43851,44133,44365,44519,44771,45017,45202,45402,45402,45616,45844
2,Albania,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,17651,17948,18250,18556,18858,19157,19445,19729,20040,20315,20634,20875,21202,21523,21904,22300,22721,23210,23705,24206,24731,25294,25801,26211,26701,27233,27830,28432,29126,29837,30623,31459,32196,32761,33556,34300,34944,35600,36245,36790
3,Algeria,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,54839,55091,55357,55630,55880,56143,56419,56706,57026,57332,57651,57942,58272,58574,58979,59527,60169,60800,61381,62051,62693,63446,64257,65108,65975,66819,67679,68589,69591,70629,71652,72755,73774,74862,75867,77000,78025,79110,79110,81212
4,Andorra,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,3623,3811,3811,4038,4038,4038,4325,4410,4517,4567,4665,4756,4825,4888,4910,5045,5135,5135,5319,5383,5437,5477,5567,5616,5725,5725,5872,5914,5951,6018,6066,6142,6207,6256,6304,6351,6428,6534,6610,6670


Total de casos según el pais

In [9]:
fig = px.line(dfWorld2, x='Pais', y='Total')
fig.show()

#fig.write_html('CasosMundo.html')

Comparación entre paises

In [10]:
cols_plot = ['United States', 'China', 'Italy', 'Venezuela', 'Ecuador', 'Colombia'] #Agregar o remover paises (según su nombre en la columna) al gusto
fig = make_subplots(rows=len(cols_plot), cols=1, start_cell="bottom-left")
i = 1
j = 0
for col in cols_plot:
 trace=go.Scatter(x=dfWorld[col], y=dfWorld['date'], mode='lines', name=cols_plot[j])
 fig.append_trace(trace, i, 1)
 i = i+1
 j = j+1
fig.show()

fig.write_html('ContrastePaises.html')

Búsqueda de paises con n número de casos

In [11]:
dfWorld3 = dfWorld2[(dfWorld2['Total']>=1000000)&(dfWorld2['Total']<=1300000)] #Paises con un rango entre m y n casos confirmados

dfWorld3 = dfWorld3.T #Transponer DataFrame
dfWorld3 = dfWorld3.reset_index() #Reiniciar index

nombres = dfWorld3.iloc[0] #Llamar nombres del índice 0
dfWorld3 = dfWorld3[1:] #Iniciar el DataFrame desde la fila 1 y no desde la fila 0
dfWorld3.columns=nombres #Asignar los nombres a las columnas
dfWorld3 = dfWorld3.drop(['Pais'], axis=1) #Eliminar el valor 'Pais' de la lista de nombres

paises = list(dfWorld3) #Extracción de los nombres de paises que cumplen la condición

print(paises)

['Colombia', 'Germany', 'Mexico']


Gráfica de países que cumplen la condición

In [12]:
py.iplot([{
   'x': dfWorld3.index,
   'y': dfWorld3[col],
   'name': col
}  for col in dfWorld3.columns], filename='cufflinks/multiple-lines-on-same-chart')

fig.write_html('NumeroCasos.html')

## Mapas

In [13]:
dfWorld2

Unnamed: 0,Pais,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,...,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,Total
0,World,27,27,27,44,44,59,59,59,59,59,59,59,59,60,61,62,62,67,84,220,240,393,535,632,898,1351,2024,2821,4588,6068,7825,9826,11947,14555,17373,20616,24525,28276,31494,...,40915670,41367912,41840416,42329189,42788799,43227729,43643644,44123016,44636477,45184578,45726736,46184735,46671967,47142461,47647460,48217351,48829738,49435759,50029033,50557870,51060158,51627093,52250604,52898103,53533932,54128402,54642940,55187421,55770400,56396170,57037140,57716898,58288941,58807799,59341326,59903030,60552696,61102596,61718735,62271031
1,Afghanistan,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,40375,40510,40626,40687,40768,40833,41032,41145,41145,41268,41425,41501,41633,41728,41814,41935,41975,42033,42159,42239,42463,42463,42609,42969,43035,43240,43403,43468,43851,43851,44133,44365,44519,44771,45017,45202,45402,45402,45616,45844
2,Albania,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,17651,17948,18250,18556,18858,19157,19445,19729,20040,20315,20634,20875,21202,21523,21904,22300,22721,23210,23705,24206,24731,25294,25801,26211,26701,27233,27830,28432,29126,29837,30623,31459,32196,32761,33556,34300,34944,35600,36245,36790
3,Algeria,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,54839,55091,55357,55630,55880,56143,56419,56706,57026,57332,57651,57942,58272,58574,58979,59527,60169,60800,61381,62051,62693,63446,64257,65108,65975,66819,67679,68589,69591,70629,71652,72755,73774,74862,75867,77000,78025,79110,79110,81212
4,Andorra,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,3623,3811,3811,4038,4038,4038,4325,4410,4517,4567,4665,4756,4825,4888,4910,5045,5135,5135,5319,5383,5437,5477,5567,5616,5725,5725,5872,5914,5951,6018,6066,6142,6207,6256,6304,6351,6428,6534,6610,6670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,Wallis and Futuna,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3
211,Western Sahara,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766
212,Yemen,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,2060,2061,2061,2061,2063,2063,2063,2063,2063,2063,2063,2063,2063,2063,2067,2067,2067,2070,2070,2070,2070,2070,2070,2070,2072,2072,2072,2072,2072,2083,2086,2090,2093,2099,2107,2114,2124,2137,2148,2160
213,Zambia,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,15982,16000,16035,16095,16117,16117,16200,16243,16325,16325,16415,16432,16480,16543,16661,16698,16770,16819,16908,16954,16971,16997,17036,17036,17093,17097,17123,17187,17243,17280,17350,17373,17394,17424,17454,17466,17535,17553,17569,17589


In [14]:
dfWorld2['Codigo'] = dfWorld2['Pais'].apply(lambda x: get_alph_3(x)) #Crear una columna con el código alpha 3 de cada país

print(dfWorld2.head())

          Pais    0    1    2  ...       332       333     Total Codigo
0        World   27   27   27  ...  61102596  61718735  62271031   None
1  Afghanistan  NaN  NaN  NaN  ...     45402     45616     45844    AFG
2      Albania  NaN  NaN  NaN  ...     35600     36245     36790    ALB
3      Algeria  NaN  NaN  NaN  ...     79110     79110     81212    DZA
4      Andorra  NaN  NaN  NaN  ...      6534      6610      6670    AND

[5 rows x 337 columns]


In [15]:
fig = px.choropleth(dfWorld2, 
                    locations='Codigo',
                    color='Total',
                    hover_name='Pais',
                    color_continuous_scale=px.colors.sequential.Plasma)

fig.show()

fig.write_html('CasosMundial.html')

##Series de tiempo

In [16]:
dfWorld4 = dfWorld.fillna(method='ffill') #Rellenar los NAN con el valor anterior en la columna
dfWorld4['date'] = pd.to_datetime(dfWorld4['date']) #Conversión a formato de fecha a la columna 'date'
dfWorld4 = dfWorld4.set_index('date') #Se selecciona la columna 'date' como índice del DataFrame

In [17]:
resumen = pd.DataFrame() #Creación de un nuevo DataFrame
resumen['PSemanal'] = dfWorld4['Colombia'].resample('W-MON').mean().round(2) #Creación de nueva columna con un promedio de la suma de casos por semana (iniciando el lunes)
print(resumen)

              PSemanal
date                  
2020-01-06        0.00
2020-01-13        0.00
2020-01-20        0.00
2020-01-27        0.00
2020-02-03        0.00
2020-02-10        0.00
2020-02-17        0.00
2020-02-24        0.00
2020-03-02        0.00
2020-03-09        0.43
2020-03-16       17.00
2020-03-23      136.43
2020-03-30      499.14
2020-04-06     1155.43
2020-04-13     2227.71
2020-04-20     3288.71
2020-04-27     4635.00
2020-05-04     6603.29
2020-05-11     9515.71
2020-05-18    13593.43
2020-05-25    18532.86
2020-06-01    25537.29
2020-06-08    34956.86
2020-06-15    45243.43
2020-06-22    60402.57
2020-06-29    81038.43
2020-07-06   105858.86
2020-07-13   134852.71
2020-07-20   174659.71
2020-07-27   226165.14
2020-08-03   286557.29
2020-08-10   356828.57
2020-08-17   433504.57
2020-08-24   511152.43
2020-08-31   580926.86
2020-09-07   641312.71
2020-09-14   694321.71
2020-09-21   743535.57
2020-09-28   791496.29
2020-10-05   835999.00
2020-10-12   886313.00
2020-10-19 

In [18]:
resumen['PSemanalMin'] = dfWorld4['Colombia'].resample('W-MON').min() #Creación de nueva columna con el menor número de casos promedio por semana (iniciando el lunes)
print(resumen)

              PSemanal  PSemanalMin
date                               
2020-01-06        0.00          0.0
2020-01-13        0.00          0.0
2020-01-20        0.00          0.0
2020-01-27        0.00          0.0
2020-02-03        0.00          0.0
2020-02-10        0.00          0.0
2020-02-17        0.00          0.0
2020-02-24        0.00          0.0
2020-03-02        0.00          0.0
2020-03-09        0.43          0.0
2020-03-16       17.00          3.0
2020-03-23      136.43         57.0
2020-03-30      499.14        306.0
2020-04-06     1155.43        798.0
2020-04-13     2227.71       1579.0
2020-04-20     3288.71       2852.0
2020-04-27     4635.00       3977.0
2020-05-04     6603.29       5597.0
2020-05-11     9515.71       7973.0
2020-05-18    13593.43      11613.0
2020-05-25    18532.86      16295.0
2020-06-01    25537.29      21981.0
2020-06-08    34956.86      30493.0
2020-06-15    45243.43      40719.0
2020-06-22    60402.57      53063.0
2020-06-29    81038.43      

In [19]:
resumen['PSemanalMax'] = dfWorld4['Colombia'].resample('W-MON').max() #Creación de nueva columna con el mayor número de casos promedio por semana (iniciando el lunes)
print(resumen)

              PSemanal  PSemanalMin  PSemanalMax
date                                            
2020-01-06        0.00          0.0          0.0
2020-01-13        0.00          0.0          0.0
2020-01-20        0.00          0.0          0.0
2020-01-27        0.00          0.0          0.0
2020-02-03        0.00          0.0          0.0
2020-02-10        0.00          0.0          0.0
2020-02-17        0.00          0.0          0.0
2020-02-24        0.00          0.0          0.0
2020-03-02        0.00          0.0          0.0
2020-03-09        0.43          0.0          1.0
2020-03-16       17.00          3.0         45.0
2020-03-23      136.43         57.0        235.0
2020-03-30      499.14        306.0        702.0
2020-04-06     1155.43        798.0       1485.0
2020-04-13     2227.71       1579.0       2776.0
2020-04-20     3288.71       2852.0       3792.0
2020-04-27     4635.00       3977.0       5379.0
2020-05-04     6603.29       5597.0       7668.0
2020-05-11     9515.

In [20]:
resumen['AcumFinSemana'] = dfWorld4['Colombia'].resample('W-MON').last() #Creación de nueva columna con el mayor número de casos promedio por semana (iniciando el lunes)
print(resumen)

              PSemanal  PSemanalMin  PSemanalMax  AcumFinSemana
date                                                           
2020-01-06        0.00          0.0          0.0            0.0
2020-01-13        0.00          0.0          0.0            0.0
2020-01-20        0.00          0.0          0.0            0.0
2020-01-27        0.00          0.0          0.0            0.0
2020-02-03        0.00          0.0          0.0            0.0
2020-02-10        0.00          0.0          0.0            0.0
2020-02-17        0.00          0.0          0.0            0.0
2020-02-24        0.00          0.0          0.0            0.0
2020-03-02        0.00          0.0          0.0            0.0
2020-03-09        0.43          0.0          1.0            1.0
2020-03-16       17.00          3.0         45.0           45.0
2020-03-23      136.43         57.0        235.0          235.0
2020-03-30      499.14        306.0        702.0          702.0
2020-04-06     1155.43        798.0     

###Contraste mensual (país)

In [21]:
dfWorld4 = dfWorld4.filter(items=['Colombia', 'United States', 'Spain', 'Italy', 'Venezuela', 'Ecuador'])

In [22]:
pd.set_option('display.max_columns', None) #Configuración de columnas sin cortes
pd.set_option('display.expand_frame_repr', False) #Configuración de columnas sin cortes

resumen2 = pd.DataFrame()
resumen2 = dfWorld4.resample('M').last() #Configuración del nuevo DataFrame para hacer un resumen por mes
print(resumen2)

             Colombia  United States      Spain      Italy  Venezuela   Ecuador
date                                                                           
2019-12-31        0.0            NaN        NaN        NaN        NaN       NaN
2020-01-31        0.0            6.0        NaN        3.0        NaN       NaN
2020-02-29        0.0           66.0       54.0      888.0        NaN       NaN
2020-03-31      798.0       164620.0   104267.0   101739.0      135.0    1966.0
2020-04-30     6211.0      1039909.0   215183.0   203591.0      331.0   24675.0
2020-05-31    28236.0      1770384.0   239429.0   232664.0     1459.0   38571.0
2020-06-30    95043.0      2590552.0   249271.0   240436.0     5530.0   55665.0
2020-07-31   286020.0      4495014.0   288522.0   247158.0    17859.0   84370.0
2020-08-31   607938.0      5997163.0   462858.0   268218.0    45868.0  113648.0
2020-09-30   824042.0      7191061.0   769188.0   313011.0    74363.0  135749.0
2020-10-31  1063151.0      9047427.0  11

In [23]:
diferencias = dfWorld4.diff()
diferencias = diferencias.abs()
print(diferencias)

            Colombia  United States    Spain    Italy  Venezuela  Ecuador
date                                                                     
2019-12-31       NaN            NaN      NaN      NaN        NaN      NaN
2020-01-01       0.0            NaN      NaN      NaN        NaN      NaN
2020-01-02       0.0            NaN      NaN      NaN        NaN      NaN
2020-01-03       0.0            NaN      NaN      NaN        NaN      NaN
2020-01-04       0.0            NaN      NaN      NaN        NaN      NaN
...              ...            ...      ...      ...        ...      ...
2020-11-25    7515.0       170293.0  10222.0  23227.0      355.0    492.0
2020-11-26    8497.0       186589.0  12289.0  25852.0      319.0    794.0
2020-11-27    9496.0       106091.0  10853.0  29001.0        0.0    908.0
2020-11-28   10023.0       207913.0      0.0  28342.0      398.0   1396.0
2020-11-29    9103.0       154893.0      0.0  26315.0      309.0   1375.0

[335 rows x 6 columns]


In [24]:
cols_plot = ['Colombia', 'United States', 'Spain', 'Italy', 'Venezuela', 'Ecuador']
fig = make_subplots(rows=len(cols_plot), cols=1, start_cell="bottom-left")
i=1
j=0
for col in cols_plot:
 trace=go.Scatter(x=diferencias.index, y=diferencias[col],mode='lines',name=cols_plot[j])
 fig.append_trace(trace,i,1)
 i=i+1
 j=j+1
fig.show()

fig.write_html('MitigacionCasos.html')