In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import math  
from datetime import datetime

# Carga de archivos

In [2]:
df_dep_mun = pd.read_csv('Data\_DepartamentosMunicipios.csv', encoding='utf-8-sig')

df_year = pd.read_csv('Data\_DengueAnual.csv', encoding='utf-8-sig')
df_year = df_year[(df_year['ANO'] != 2006) & (df_year['ANO'] != 2021)]

df_semanal = pd.read_csv('Data\_DengueSemanal.csv', encoding='utf-8-sig')
df_semanal = df_semanal[(df_semanal['ANO'] != 2006) & (df_semanal['ANO'] != 2021)]

## Organización de los DF

In [3]:
df_year['ANO'] = df_year['ANO'].astype(int)
df_year['COD_DPTO'] = df_year['COD_DPTO'].astype(int)
df_year['COD_MUNICIPIO'] = df_year['COD_MUNICIPIO'].astype(int)

df_dep_mun['COD_DPTO'] = df_dep_mun['COD_DPTO'].astype(int)
df_dep_mun['COD_MUNICIPIO'] = df_dep_mun['COD_MUNICIPIO'].astype(int)

df_year = df_year.join(df_dep_mun[['DEPARTAMENTO', 'MUNICIPIO', 'COD_MUNICIPIO']].set_index('COD_MUNICIPIO'), on='COD_MUNICIPIO', rsuffix='_')
df_year = df_year[~df_year['DEPARTAMENTO'].isin(['EXTERIOR', 'PROCEDENCIA DESCONOCIDA'])]
df_year = df_year[~df_year['MUNICIPIO'].isin(['MUNICIPIO DESCONOCIDO'])]
df_year

Unnamed: 0,ANO,COD_MUNICIPIO,COD_DPTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,DEPARTAMENTO,MUNICIPIO
1091,2007,5000,5,49,11,0,,ANTIOQUIA,* ANTIOQUIA. MUNICIPIO DESCONOCIDO
1092,2007,5001,5,4,1,0,2265244.0,ANTIOQUIA,MEDELLIN
1093,2007,5002,5,0,0,0,20022.0,ANTIOQUIA,ABEJORRAL
1094,2007,5004,5,0,0,0,2570.0,ANTIOQUIA,ABRIAQUI
1095,2007,5021,5,0,0,0,3747.0,ANTIOQUIA,ALEJANDRIA
...,...,...,...,...,...,...,...,...,...
16360,2020,99000,99,0,0,0,,VICHADA,* VICHADA. MUNICIPIO DESCONOCIDO
16361,2020,99001,99,1,0,0,17031.0,VICHADA,PUERTO CARREÑO
16362,2020,99524,99,2,0,0,18248.0,VICHADA,LA PRIMAVERA
16363,2020,99624,99,0,0,0,4310.0,VICHADA,SANTA ROSALIA


In [4]:
df_year.set_index(['ANO', 'COD_MUNICIPIO'], inplace=True)

In [5]:
df_semanal.set_index(['ANO', 'COD_MUNICIPIO'], inplace=True)
df_semanal = df_semanal.join(df_year[['POBLACION', 'DEPARTAMENTO', 'MUNICIPIO']], on=None, rsuffix='_')
df_year.reset_index(inplace = True)
df_semanal.reset_index(inplace = True)

## Cálculo de los indicadores anuales por municipio

In [6]:
df_year = df_year[~df_year['POBLACION'].isna()]
df_year['INCIDENCIA DENGUE'] = df_year['DENGUE'] * 100000 / df_year['POBLACION']
df_year['INCIDENCIA DENGUE GRAVE'] = df_year['DENGUE GRAVE'] * 100000 / df_year['POBLACION']
df_year['LETALIDAD'] = df_year['MORTALIDAD POR DENGUE'] * 100 / df_year['DENGUE GRAVE']
df_year

Unnamed: 0,ANO,COD_MUNICIPIO,COD_DPTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,DEPARTAMENTO,MUNICIPIO,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD
1,2007,5001,5,4,1,0,2265244.0,ANTIOQUIA,MEDELLIN,0.176581,0.044145,0.0
2,2007,5002,5,0,0,0,20022.0,ANTIOQUIA,ABEJORRAL,0.000000,0.000000,
3,2007,5004,5,0,0,0,2570.0,ANTIOQUIA,ABRIAQUI,0.000000,0.000000,
4,2007,5021,5,0,0,0,3747.0,ANTIOQUIA,ALEJANDRIA,0.000000,0.000000,
5,2007,5030,5,0,0,0,27709.0,ANTIOQUIA,AMAGA,0.000000,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...
15268,2020,97889,97,0,0,0,1218.0,VAUPES,YAVARATE (CD),0.000000,0.000000,
15270,2020,99001,99,1,0,0,17031.0,VICHADA,PUERTO CARREÑO,5.871646,0.000000,
15271,2020,99524,99,2,0,0,18248.0,VICHADA,LA PRIMAVERA,10.960105,0.000000,
15272,2020,99624,99,0,0,0,4310.0,VICHADA,SANTA ROSALIA,0.000000,0.000000,


## Cálculo de los indicadores semanales por municipio

In [7]:
df_semanal = df_semanal[~df_semanal['POBLACION'].isna()]
df_semanal['INCIDENCIA DENGUE'] = df_semanal['DENGUE'] * 100000 / df_semanal['POBLACION']
df_semanal['INCIDENCIA DENGUE GRAVE'] = df_semanal['DENGUE GRAVE'] * 100000 / df_semanal['POBLACION']
df_semanal['LETALIDAD'] = df_semanal['MORTALIDAD POR DENGUE'] * 100 / df_semanal['DENGUE GRAVE']
df_semanal.reset_index(inplace = True, drop=True)
df_semanal

Unnamed: 0,ANO,COD_MUNICIPIO,FECHA,SEMANA,COD_DPTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,DEPARTAMENTO,MUNICIPIO,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD
0,2007,5001,2006-12-31,1,5,1,0,0,2265244.0,ANTIOQUIA,MEDELLIN,0.044145,0.0,
1,2007,5001,2007-01-07,2,5,0,0,0,2265244.0,ANTIOQUIA,MEDELLIN,0.000000,0.0,
2,2007,5001,2007-01-14,3,5,0,0,0,2265244.0,ANTIOQUIA,MEDELLIN,0.000000,0.0,
3,2007,5001,2007-01-21,4,5,0,0,0,2265244.0,ANTIOQUIA,MEDELLIN,0.000000,0.0,
4,2007,5001,2007-01-28,5,5,0,0,0,2265244.0,ANTIOQUIA,MEDELLIN,0.000000,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
773393,2020,99773,2020-11-29,49,99,0,0,0,41459.0,VICHADA,CUMARIBO,0.000000,0.0,
773394,2020,99773,2020-12-06,50,99,0,0,0,41459.0,VICHADA,CUMARIBO,0.000000,0.0,
773395,2020,99773,2020-12-13,51,99,0,0,0,41459.0,VICHADA,CUMARIBO,0.000000,0.0,
773396,2020,99773,2020-12-20,52,99,0,0,0,41459.0,VICHADA,CUMARIBO,0.000000,0.0,


# Construcción de las gráficas semanales por municipio

In [8]:
%%time
df_semanal['P25'] = np.nan
df_semanal['MED'] = np.nan
df_semanal['P75'] = np.nan

df_semanal['lower_limit'] = np.nan
df_semanal['upper_limit'] = np.nan
df_semanal['observed_reason'] = np.nan
df_semanal['expected_reason'] = 1

df_semanal['lower_limit_IC95'] = np.nan
df_semanal['upper_limit_IC95'] = np.nan
df_semanal['threshold_IC95'] = np.nan

for i in range(len(df_semanal)):

    df_temp_past = df_semanal[(df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
            (df_semanal['ANO'] < df_semanal.iloc[i]['ANO']) &
            (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'])]['DENGUE']

    df_semanal.iloc[i, df_semanal.columns.get_loc('P25')] = df_temp_past.quantile(0.25)
    df_semanal.iloc[i, df_semanal.columns.get_loc('MED')] = df_temp_past.median()
    df_semanal.iloc[i, df_semanal.columns.get_loc('P75')] = df_temp_past.quantile(0.75)



    if df_semanal.iloc[i]['SEMANA'] == 1:
        df_temp = df_semanal[
            (df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
            (df_semanal['ANO'] < df_semanal.iloc[i]['ANO']) & 
            ((df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA']) | 
            (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'] + 1)) |
            ((df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
            (df_semanal['ANO'] < df_semanal.iloc[i]['ANO'] - 1) & 
            (df_semanal['SEMANA'] == 12))
        ]
    elif df_semanal.iloc[i]['SEMANA'] == 12:
        df_temp = df_semanal[
            (df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
            (df_semanal['ANO'] < df_semanal.iloc[i]['ANO']) & 
            ((df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA']) | 
            (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'] - 1)) |
            ((df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
            (df_semanal['ANO'] < df_semanal.iloc[i]['ANO'] + 1) & 
            (df_semanal['SEMANA'] == 1))
        ]
    else:
        df_temp = df_semanal[
            (df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
            (df_semanal['ANO'] < df_semanal.iloc[i]['ANO']) & 
            ((df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA']) | 
            (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'] + 1) | 
            (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'] - 1))]


    expected_number = df_temp['DENGUE'].mean()
    standar_dev = df_temp['DENGUE'].std()
    CV = standar_dev / expected_number
    df_semanal.iloc[i, df_semanal.columns.get_loc('lower_limit')] = 1 - (1.96 * CV)
    df_semanal.iloc[i, df_semanal.columns.get_loc('upper_limit')] = 1 + (1.96 * CV)
    df_semanal.iloc[i, df_semanal.columns.get_loc('observed_reason')] = df_semanal.iloc[i]['DENGUE'] / expected_number



    geometric_mean = stats.gmean(df_temp_past)
    standar_dev1 = df_temp_past.std()
    standar_error = standar_dev1 / math.sqrt(5)
    deg_freedom = stats.t.ppf((1 - 0.025), (df_temp_past.count() - 1))

    df_semanal.iloc[i, df_semanal.columns.get_loc('lower_limit_IC95')] = geometric_mean - (deg_freedom * standar_error)
    df_semanal.iloc[i, df_semanal.columns.get_loc('upper_limit_IC95')] = geometric_mean + (deg_freedom * standar_error)
    df_semanal.iloc[i, df_semanal.columns.get_loc('threshold_IC95')] = df_semanal[(df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
            (df_semanal['ANO'] < df_semanal.iloc[i]['ANO'])]['DENGUE'].mean()

    if i % 10000 == 0:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print('Iteration: ', i, 'Time: ', current_time)

Iteration:  0 Time:  10:31:37
Iteration:  10000 Time:  10:50:26
Iteration:  20000 Time:  11:10:05
Iteration:  30000 Time:  11:27:15
Iteration:  40000 Time:  11:42:37
Iteration:  50000 Time:  11:59:19
Iteration:  60000 Time:  12:16:49
Iteration:  70000 Time:  12:34:32
Iteration:  80000 Time:  12:52:30
Iteration:  90000 Time:  13:10:06
Iteration:  100000 Time:  13:25:36
Iteration:  110000 Time:  13:40:49
Iteration:  120000 Time:  13:55:55
Iteration:  130000 Time:  14:11:11
Iteration:  140000 Time:  14:30:14
Iteration:  150000 Time:  14:49:04
Iteration:  160000 Time:  15:08:18
Iteration:  170000 Time:  15:27:30
Iteration:  180000 Time:  15:47:22
Iteration:  190000 Time:  16:10:59
Iteration:  200000 Time:  16:31:08
Iteration:  210000 Time:  16:48:49
Iteration:  220000 Time:  17:08:34
Iteration:  230000 Time:  17:27:45
Iteration:  240000 Time:  17:47:21
Iteration:  250000 Time:  18:06:25
Iteration:  260000 Time:  18:24:59
Iteration:  270000 Time:  18:42:11
Iteration:  280000 Time:  18:59:19

In [9]:
%%time
i = df_semanal[(df_semanal['COD_MUNICIPIO'] == 5001) & 
            (df_semanal['ANO'] == 2019) &
            (df_semanal['SEMANA'] == 12)].index[0]

df_temp_past = df_semanal[(df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
        (df_semanal['ANO'] < df_semanal.iloc[i]['ANO']) &
        (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'])]['DENGUE']

df_semanal.iloc[i, df_semanal.columns.get_loc('P25')] = df_temp_past.quantile(0.25)
df_semanal.iloc[i, df_semanal.columns.get_loc('MED')] = df_temp_past.median()
df_semanal.iloc[i, df_semanal.columns.get_loc('P75')] = df_temp_past.quantile(0.75)

if df_semanal.iloc[i]['SEMANA'] == 1:
    df_temp = df_semanal[
        (df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
        (df_semanal['ANO'] < df_semanal.iloc[i]['ANO']) & 
        ((df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA']) | 
        (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'] + 1)) |
        ((df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
        (df_semanal['ANO'] < df_semanal.iloc[i]['ANO'] - 1) & 
        (df_semanal['SEMANA'] == 12))
    ]
elif df_semanal.iloc[i]['SEMANA'] == 12:
    df_temp = df_semanal[
        (df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
        (df_semanal['ANO'] < df_semanal.iloc[i]['ANO']) & 
        ((df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA']) | 
        (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'] - 1)) |
        ((df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
        (df_semanal['ANO'] < df_semanal.iloc[i]['ANO'] + 1) & 
        (df_semanal['SEMANA'] == 1))
    ]
else:
    df_temp = df_semanal[
        (df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
        (df_semanal['ANO'] < df_semanal.iloc[i]['ANO']) & 
        ((df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA']) | 
        (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'] + 1) | 
        (df_semanal['SEMANA'] == df_semanal.iloc[i]['SEMANA'] - 1))]

expected_number = df_temp['DENGUE'].mean()
standar_dev = df_temp['DENGUE'].std()
CV = standar_dev / expected_number
df_semanal.iloc[i, df_semanal.columns.get_loc('lower_limit')] = 1 - (1.96 * CV)
df_semanal.iloc[i, df_semanal.columns.get_loc('upper_limit')] = 1 + (1.96 * CV)
df_semanal.iloc[i, df_semanal.columns.get_loc('observed_reason')] = df_semanal.iloc[i]['DENGUE'] / expected_number

geometric_mean = stats.gmean(df_temp_past)
standar_dev1 = df_temp_past.std()
standar_error = standar_dev1 / math.sqrt(5)
deg_freedom = stats.t.ppf((1 - 0.025), (df_temp_past.count() - 1))

df_semanal.iloc[i, df_semanal.columns.get_loc('lower_limit_IC95')] = geometric_mean - (deg_freedom * standar_error)
df_semanal.iloc[i, df_semanal.columns.get_loc('upper_limit_IC95')] = geometric_mean + (deg_freedom * standar_error)
df_semanal.iloc[i, df_semanal.columns.get_loc('threshold_IC95')] = df_semanal[(df_semanal['COD_MUNICIPIO'] == df_semanal.iloc[i]['COD_MUNICIPIO']) & 
        (df_semanal['ANO'] < df_semanal.iloc[i]['ANO'])]['DENGUE'].mean()

df_semanal[(df_semanal['COD_MUNICIPIO'] == 5001) & 
            (df_semanal['ANO'] == 2019) &
            (df_semanal['SEMANA'] == 12)]

Wall time: 141 ms


Unnamed: 0,ANO,COD_MUNICIPIO,FECHA,SEMANA,COD_DPTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,DEPARTAMENTO,...,P25,MED,P75,lower_limit,upper_limit,observed_reason,expected_reason,lower_limit_IC95,upper_limit_IC95,threshold_IC95
662319,2019,5001,2019-03-17,12,5,11,1,0,2549537.0,ANTIOQUIA,...,12.75,28.0,44.0,-1.633344,3.633344,0.217997,1,-76.213743,76.213743,76.704473


------

# Para abajo contando con los resultados de la información semanal por municipio

In [2]:
df_semanal = pd.read_csv('Data\dengue_semanal_cities.csv', encoding='utf-8-sig')
# df_semanal = df_semanal[~((df_semanal['SEMANA'] > 16) & (df_semanal['ANO'] == 2020))]
df_semanal.to_csv('Data/JSON/dengue_semanal_cities.csv', encoding='utf-8-sig', index = False)

In [3]:
df_semanal.head()

Unnamed: 0,ANO,COD_MUNICIPIO,FECHA,SEMANA,COD_DPTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,DEPARTAMENTO,...,P25,MED,P75,lower_limit,upper_limit,observed_reason,expected_reason,lower_limit_IC95,upper_limit_IC95,threshold_IC95
0,2007,5001,2006-12-31,1,5,1,0,0,2265244.0,ANTIOQUIA,...,,,,,,,1,,,
1,2007,5001,2007-01-07,2,5,0,0,0,2265244.0,ANTIOQUIA,...,,,,,,,1,,,
2,2007,5001,2007-01-14,3,5,0,0,0,2265244.0,ANTIOQUIA,...,,,,,,,1,,,
3,2007,5001,2007-01-21,4,5,0,0,0,2265244.0,ANTIOQUIA,...,,,,,,,1,,,
4,2007,5001,2007-01-28,5,5,0,0,0,2265244.0,ANTIOQUIA,...,,,,,,,1,,,


In [4]:
df_semanal.columns

Index(['ANO', 'COD_MUNICIPIO', 'FECHA', 'SEMANA', 'COD_DPTO', 'DENGUE',
       'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION', 'DEPARTAMENTO',
       'MUNICIPIO', 'INCIDENCIA DENGUE', 'INCIDENCIA DENGUE GRAVE',
       'LETALIDAD', 'P25', 'MED', 'P75', 'lower_limit', 'upper_limit',
       'observed_reason', 'expected_reason', 'lower_limit_IC95',
       'upper_limit_IC95', 'threshold_IC95'],
      dtype='object')

# Construcción de las gráficas semanales por departamento

In [5]:
df_semanal_dpto = df_semanal[['ANO', 'FECHA', 'SEMANA', 'COD_DPTO', 'DENGUE',
       'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION', 'DEPARTAMENTO']].groupby(['ANO', 'FECHA', 'SEMANA', 'COD_DPTO', 'DEPARTAMENTO']).sum()
df_semanal_dpto.reset_index(inplace = True)
df_semanal_dpto

Unnamed: 0,ANO,FECHA,SEMANA,COD_DPTO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION
0,2007,2006-12-31,1,5,ANTIOQUIA,1,0,0,5834865.0
1,2007,2006-12-31,1,8,ATLANTICO,79,0,0,2225481.0
2,2007,2006-12-31,1,11,BOGOTA,0,0,0,7050228.0
3,2007,2006-12-31,1,13,BOLIVAR,28,3,0,1917345.0
4,2007,2006-12-31,1,15,BOYACA,3,1,0,1009633.0
...,...,...,...,...,...,...,...,...,...
24118,2020,2020-12-27,53,91,AMAZONAS,0,0,0,76787.0
24119,2020,2020-12-27,53,94,GUAINIA,0,0,0,43349.0
24120,2020,2020-12-27,53,95,GUAVIARE,0,0,0,119214.0
24121,2020,2020-12-27,53,97,VAUPES,0,0,0,44997.0


In [6]:
df_semanal_dpto = df_semanal_dpto[~df_semanal_dpto['POBLACION'].isna()]
df_semanal_dpto['INCIDENCIA DENGUE'] = df_semanal_dpto['DENGUE'] * 100000 / df_semanal_dpto['POBLACION']
df_semanal_dpto['INCIDENCIA DENGUE GRAVE'] = df_semanal_dpto['DENGUE GRAVE'] * 100000 / df_semanal_dpto['POBLACION']
df_semanal_dpto['LETALIDAD'] = df_semanal_dpto['MORTALIDAD POR DENGUE'] * 100 / df_semanal_dpto['DENGUE GRAVE']
df_semanal_dpto.reset_index(inplace = True, drop=True)
df_semanal_dpto

Unnamed: 0,ANO,FECHA,SEMANA,COD_DPTO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD
0,2007,2006-12-31,1,5,ANTIOQUIA,1,0,0,5834865.0,0.017138,0.000000,
1,2007,2006-12-31,1,8,ATLANTICO,79,0,0,2225481.0,3.549794,0.000000,
2,2007,2006-12-31,1,11,BOGOTA,0,0,0,7050228.0,0.000000,0.000000,
3,2007,2006-12-31,1,13,BOLIVAR,28,3,0,1917345.0,1.460353,0.156466,0.0
4,2007,2006-12-31,1,15,BOYACA,3,1,0,1009633.0,0.297138,0.099046,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
24118,2020,2020-12-27,53,91,AMAZONAS,0,0,0,76787.0,0.000000,0.000000,
24119,2020,2020-12-27,53,94,GUAINIA,0,0,0,43349.0,0.000000,0.000000,
24120,2020,2020-12-27,53,95,GUAVIARE,0,0,0,119214.0,0.000000,0.000000,
24121,2020,2020-12-27,53,97,VAUPES,0,0,0,44997.0,0.000000,0.000000,


In [7]:
%%time
df_semanal_dpto['P25'] = np.nan
df_semanal_dpto['MED'] = np.nan
df_semanal_dpto['P75'] = np.nan

df_semanal_dpto['lower_limit'] = np.nan
df_semanal_dpto['upper_limit'] = np.nan
df_semanal_dpto['observed_reason'] = np.nan
df_semanal_dpto['expected_reason'] = 1

df_semanal_dpto['lower_limit_IC95'] = np.nan
df_semanal_dpto['upper_limit_IC95'] = np.nan
df_semanal_dpto['threshold_IC95'] = np.nan

for i in range(len(df_semanal_dpto)):

    df_temp_past = df_semanal_dpto[(df_semanal_dpto['COD_DPTO'] == df_semanal_dpto.iloc[i]['COD_DPTO']) & 
            (df_semanal_dpto['ANO'] < df_semanal_dpto.iloc[i]['ANO']) &
            (df_semanal_dpto['SEMANA'] == df_semanal_dpto.iloc[i]['SEMANA'])]['DENGUE']

    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('P25')] = df_temp_past.quantile(0.25)
    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('MED')] = df_temp_past.median()
    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('P75')] = df_temp_past.quantile(0.75)



    if df_semanal_dpto.iloc[i]['SEMANA'] == 1:
        df_temp = df_semanal_dpto[
            (df_semanal_dpto['COD_DPTO'] == df_semanal_dpto.iloc[i]['COD_DPTO']) & 
            (df_semanal_dpto['ANO'] < df_semanal_dpto.iloc[i]['ANO']) & 
            ((df_semanal_dpto['SEMANA'] == df_semanal_dpto.iloc[i]['SEMANA']) | 
            (df_semanal_dpto['SEMANA'] == df_semanal_dpto.iloc[i]['SEMANA'] + 1)) |
            ((df_semanal_dpto['COD_DPTO'] == df_semanal_dpto.iloc[i]['COD_DPTO']) & 
            (df_semanal_dpto['ANO'] < df_semanal_dpto.iloc[i]['ANO'] - 1) & 
            (df_semanal_dpto['SEMANA'] == 12))
        ]
    elif df_semanal_dpto.iloc[i]['SEMANA'] == 12:
        df_temp = df_semanal_dpto[
            (df_semanal_dpto['COD_DPTO'] == df_semanal_dpto.iloc[i]['COD_DPTO']) & 
            (df_semanal_dpto['ANO'] < df_semanal_dpto.iloc[i]['ANO']) & 
            ((df_semanal_dpto['SEMANA'] == df_semanal_dpto.iloc[i]['SEMANA']) | 
            (df_semanal_dpto['SEMANA'] == df_semanal_dpto.iloc[i]['SEMANA'] - 1)) |
            ((df_semanal_dpto['COD_DPTO'] == df_semanal_dpto.iloc[i]['COD_DPTO']) & 
            (df_semanal_dpto['ANO'] < df_semanal_dpto.iloc[i]['ANO'] + 1) & 
            (df_semanal_dpto['SEMANA'] == 1))
        ]
    else:
        df_temp = df_semanal_dpto[
            (df_semanal_dpto['COD_DPTO'] == df_semanal_dpto.iloc[i]['COD_DPTO']) & 
            (df_semanal_dpto['ANO'] < df_semanal_dpto.iloc[i]['ANO']) & 
            ((df_semanal_dpto['SEMANA'] == df_semanal_dpto.iloc[i]['SEMANA']) | 
            (df_semanal_dpto['SEMANA'] == df_semanal_dpto.iloc[i]['SEMANA'] + 1) | 
            (df_semanal_dpto['SEMANA'] == df_semanal_dpto.iloc[i]['SEMANA'] - 1))]


    expected_number = df_temp['DENGUE'].mean()
    standar_dev = df_temp['DENGUE'].std()
    CV = standar_dev / expected_number
    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('lower_limit')] = 1 - (1.96 * CV)
    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('upper_limit')] = 1 + (1.96 * CV)
    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('observed_reason')] = df_semanal_dpto.iloc[i]['DENGUE'] / expected_number



    geometric_mean = stats.gmean(df_temp_past)
    standar_dev1 = df_temp_past.std()
    standar_error = standar_dev1 / math.sqrt(5)
    deg_freedom = stats.t.ppf((1 - 0.025), (df_temp_past.count() - 1))

    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('lower_limit_IC95')] = geometric_mean - (deg_freedom * standar_error)
    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('upper_limit_IC95')] = geometric_mean + (deg_freedom * standar_error)
    df_semanal_dpto.iloc[i, df_semanal_dpto.columns.get_loc('threshold_IC95')] = df_semanal_dpto[(df_semanal_dpto['COD_DPTO'] == df_semanal_dpto.iloc[i]['COD_DPTO']) & 
            (df_semanal_dpto['ANO'] < df_semanal_dpto.iloc[i]['ANO'])]['DENGUE'].mean()

    if i % 1000 == 0:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print('Iteration: ', i, 'Time: ', current_time)

Iteration:  0 Time:  11:06:19
Iteration:  1000 Time:  11:06:52
Iteration:  2000 Time:  11:07:27
Iteration:  3000 Time:  11:08:01
Iteration:  4000 Time:  11:08:34
Iteration:  5000 Time:  11:09:09
Iteration:  6000 Time:  11:09:43
Iteration:  7000 Time:  11:10:16
Iteration:  8000 Time:  11:10:50
Iteration:  9000 Time:  11:11:24
Iteration:  10000 Time:  11:11:59
Iteration:  11000 Time:  11:12:34
Iteration:  12000 Time:  11:13:10
Iteration:  13000 Time:  11:13:41
Iteration:  14000 Time:  11:14:14
Iteration:  15000 Time:  11:14:44
Iteration:  16000 Time:  11:15:17
Iteration:  17000 Time:  11:15:48
Iteration:  18000 Time:  11:16:21
Iteration:  19000 Time:  11:16:56
Iteration:  20000 Time:  11:17:31
Iteration:  21000 Time:  11:18:04
Iteration:  22000 Time:  11:18:38
Iteration:  23000 Time:  11:19:15
Iteration:  24000 Time:  11:19:47
Wall time: 13min 31s


In [8]:
df_semanal_dpto.to_csv('Data/JSON/dengue_semanal_dpto.csv', encoding='utf-8-sig', index = False)

In [9]:
df_semanal_dpto.head()

Unnamed: 0,ANO,FECHA,SEMANA,COD_DPTO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,...,P25,MED,P75,lower_limit,upper_limit,observed_reason,expected_reason,lower_limit_IC95,upper_limit_IC95,threshold_IC95
0,2007,2006-12-31,1,5,ANTIOQUIA,1,0,0,5834865.0,0.017138,...,,,,,,,1,,,
1,2007,2006-12-31,1,8,ATLANTICO,79,0,0,2225481.0,3.549794,...,,,,,,,1,,,
2,2007,2006-12-31,1,11,BOGOTA,0,0,0,7050228.0,0.0,...,,,,,,,1,,,
3,2007,2006-12-31,1,13,BOLIVAR,28,3,0,1917345.0,1.460353,...,,,,,,,1,,,
4,2007,2006-12-31,1,15,BOYACA,3,1,0,1009633.0,0.297138,...,,,,,,,1,,,


In [10]:
df_semanal_dpto.columns

Index(['ANO', 'FECHA', 'SEMANA', 'COD_DPTO', 'DEPARTAMENTO', 'DENGUE',
       'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION',
       'INCIDENCIA DENGUE', 'INCIDENCIA DENGUE GRAVE', 'LETALIDAD', 'P25',
       'MED', 'P75', 'lower_limit', 'upper_limit', 'observed_reason',
       'expected_reason', 'lower_limit_IC95', 'upper_limit_IC95',
       'threshold_IC95'],
      dtype='object')

# Construcción de las gráficas semanales para Colombia

In [11]:
df_semanal_col = df_semanal_dpto[['ANO', 'FECHA', 'SEMANA', 'DENGUE', 'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION']].groupby(['ANO', 'FECHA', 'SEMANA']).sum()
df_semanal_col.reset_index(inplace = True)
df_semanal_col

Unnamed: 0,ANO,FECHA,SEMANA,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION
0,2007,2006-12-31,1,554,107,1,43514726.0
1,2007,2007-01-07,2,382,58,0,43514726.0
2,2007,2007-01-14,3,403,87,0,43514726.0
3,2007,2007-01-21,4,503,81,1,43514726.0
4,2007,2007-01-28,5,507,82,0,43514726.0
...,...,...,...,...,...,...,...
726,2020,2020-11-29,49,0,0,0,50492854.0
727,2020,2020-12-06,50,0,0,0,50492854.0
728,2020,2020-12-13,51,0,0,0,50492854.0
729,2020,2020-12-20,52,0,0,0,50492854.0


In [12]:
df_semanal_col = df_semanal_col[~df_semanal_col['POBLACION'].isna()]
df_semanal_col['INCIDENCIA DENGUE'] = df_semanal_col['DENGUE'] * 100000 / df_semanal_col['POBLACION']
df_semanal_col['INCIDENCIA DENGUE GRAVE'] = df_semanal_col['DENGUE GRAVE'] * 100000 / df_semanal_col['POBLACION']
df_semanal_col['LETALIDAD'] = df_semanal_col['MORTALIDAD POR DENGUE'] * 100 / df_semanal_col['DENGUE GRAVE']
df_semanal_col.reset_index(inplace = True, drop=True)
df_semanal_col

Unnamed: 0,ANO,FECHA,SEMANA,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD
0,2007,2006-12-31,1,554,107,1,43514726.0,1.273132,0.245894,0.934579
1,2007,2007-01-07,2,382,58,0,43514726.0,0.877864,0.133288,0.000000
2,2007,2007-01-14,3,403,87,0,43514726.0,0.926123,0.199932,0.000000
3,2007,2007-01-21,4,503,81,1,43514726.0,1.155931,0.186144,1.234568
4,2007,2007-01-28,5,507,82,0,43514726.0,1.165123,0.188442,0.000000
...,...,...,...,...,...,...,...,...,...,...
726,2020,2020-11-29,49,0,0,0,50492854.0,0.000000,0.000000,
727,2020,2020-12-06,50,0,0,0,50492854.0,0.000000,0.000000,
728,2020,2020-12-13,51,0,0,0,50492854.0,0.000000,0.000000,
729,2020,2020-12-20,52,0,0,0,50492854.0,0.000000,0.000000,


In [13]:
%%time
df_semanal_col['P25'] = np.nan
df_semanal_col['MED'] = np.nan
df_semanal_col['P75'] = np.nan

df_semanal_col['lower_limit'] = np.nan
df_semanal_col['upper_limit'] = np.nan
df_semanal_col['observed_reason'] = np.nan
df_semanal_col['expected_reason'] = 1

df_semanal_col['lower_limit_IC95'] = np.nan
df_semanal_col['upper_limit_IC95'] = np.nan
df_semanal_col['threshold_IC95'] = np.nan

for i in range(len(df_semanal_col)):

    df_temp_past = df_semanal_col[(df_semanal_col['ANO'] < df_semanal_col.iloc[i]['ANO']) &
            (df_semanal_col['SEMANA'] == df_semanal_col.iloc[i]['SEMANA'])]['DENGUE']

    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('P25')] = df_temp_past.quantile(0.25)
    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('MED')] = df_temp_past.median()
    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('P75')] = df_temp_past.quantile(0.75)



    if df_semanal_col.iloc[i]['SEMANA'] == 1:
        df_temp = df_semanal_col[
            (df_semanal_col['ANO'] < df_semanal_col.iloc[i]['ANO']) & 
            ((df_semanal_col['SEMANA'] == df_semanal_col.iloc[i]['SEMANA']) | 
            (df_semanal_col['SEMANA'] == df_semanal_col.iloc[i]['SEMANA'] + 1)) |
            ((df_semanal_col['ANO'] < df_semanal_col.iloc[i]['ANO'] - 1) & 
            (df_semanal_col['SEMANA'] == 12))
        ]
    elif df_semanal_col.iloc[i]['SEMANA'] == 12:
        df_temp = df_semanal_col[
            (df_semanal_col['ANO'] < df_semanal_col.iloc[i]['ANO']) & 
            ((df_semanal_col['SEMANA'] == df_semanal_col.iloc[i]['SEMANA']) | 
            (df_semanal_col['SEMANA'] == df_semanal_col.iloc[i]['SEMANA'] - 1)) |
            ((df_semanal_col['ANO'] < df_semanal_col.iloc[i]['ANO'] + 1) & 
            (df_semanal_col['SEMANA'] == 1))
        ]
    else:
        df_temp = df_semanal_col[
            (df_semanal_col['ANO'] < df_semanal_col.iloc[i]['ANO']) & 
            ((df_semanal_col['SEMANA'] == df_semanal_col.iloc[i]['SEMANA']) | 
            (df_semanal_col['SEMANA'] == df_semanal_col.iloc[i]['SEMANA'] + 1) | 
            (df_semanal_col['SEMANA'] == df_semanal_col.iloc[i]['SEMANA'] - 1))]


    expected_number = df_temp['DENGUE'].mean()
    standar_dev = df_temp['DENGUE'].std()
    CV = standar_dev / expected_number
    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('lower_limit')] = 1 - (1.96 * CV)
    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('upper_limit')] = 1 + (1.96 * CV)
    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('observed_reason')] = df_semanal_col.iloc[i]['DENGUE'] / expected_number



    geometric_mean = stats.gmean(df_temp_past)
    standar_dev1 = df_temp_past.std()
    standar_error = standar_dev1 / math.sqrt(5)
    deg_freedom = stats.t.ppf((1 - 0.025), (df_temp_past.count() - 1))

    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('lower_limit_IC95')] = geometric_mean - (deg_freedom * standar_error)
    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('upper_limit_IC95')] = geometric_mean + (deg_freedom * standar_error)
    df_semanal_col.iloc[i, df_semanal_col.columns.get_loc('threshold_IC95')] = df_semanal_col[(df_semanal_col['ANO'] < df_semanal_col.iloc[i]['ANO'])]['DENGUE'].mean()

    if i % 100 == 0:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print('Iteration: ', i, 'Time: ', current_time)

Iteration:  0 Time:  11:19:52
Iteration:  100 Time:  11:19:54
Iteration:  200 Time:  11:19:56
Iteration:  300 Time:  11:19:58
Iteration:  400 Time:  11:20:00
Iteration:  500 Time:  11:20:02
Iteration:  600 Time:  11:20:04
Iteration:  700 Time:  11:20:06
Wall time: 15.5 s


In [14]:
df_semanal_col.to_csv('Data/JSON/dengue_semanal_col.csv', encoding='utf-8-sig', index = False)

# Construcción de las gráficas anuales por municipio

In [15]:
df_semanal.columns

Index(['ANO', 'COD_MUNICIPIO', 'FECHA', 'SEMANA', 'COD_DPTO', 'DENGUE',
       'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION', 'DEPARTAMENTO',
       'MUNICIPIO', 'INCIDENCIA DENGUE', 'INCIDENCIA DENGUE GRAVE',
       'LETALIDAD', 'P25', 'MED', 'P75', 'lower_limit', 'upper_limit',
       'observed_reason', 'expected_reason', 'lower_limit_IC95',
       'upper_limit_IC95', 'threshold_IC95'],
      dtype='object')

In [16]:
df_anual = df_semanal[['ANO', 'COD_MUNICIPIO', 'DENGUE', 'MUNICIPIO', 'DEPARTAMENTO', 
       'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION']].groupby(['ANO', 'COD_MUNICIPIO', 'MUNICIPIO', 'DEPARTAMENTO']).agg({'DENGUE' : 'sum', 'DENGUE GRAVE' : 'sum', 'MORTALIDAD POR DENGUE' : 'sum', 'POBLACION' : 'mean'})
df_anual.reset_index(inplace = True)
df_anual

Unnamed: 0,ANO,COD_MUNICIPIO,MUNICIPIO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION
0,2007,5001,MEDELLIN,ANTIOQUIA,4,1,0,2265244.0
1,2007,5002,ABEJORRAL,ANTIOQUIA,0,0,0,20022.0
2,2007,5004,ABRIAQUI,ANTIOQUIA,0,0,0,2570.0
3,2007,5021,ALEJANDRIA,ANTIOQUIA,0,0,0,3747.0
4,2007,5030,AMAGA,ANTIOQUIA,0,0,0,27709.0
...,...,...,...,...,...,...,...,...
14807,2020,97889,YAVARATE (CD),VAUPES,0,0,0,1218.0
14808,2020,99001,PUERTO CARREÑO,VICHADA,1,0,0,17031.0
14809,2020,99524,LA PRIMAVERA,VICHADA,4,0,0,18248.0
14810,2020,99624,SANTA ROSALIA,VICHADA,0,0,0,4310.0


In [17]:
df_anual = df_anual[~df_anual['POBLACION'].isna()]
df_anual['INCIDENCIA DENGUE'] = df_anual['DENGUE'] * 100000 / df_anual['POBLACION']
df_anual['INCIDENCIA DENGUE GRAVE'] = df_anual['DENGUE GRAVE'] * 100000 / df_anual['POBLACION']
df_anual['LETALIDAD'] = df_anual['MORTALIDAD POR DENGUE'] * 100 / df_anual['DENGUE GRAVE']
df_anual['MORTALITY RATE'] = df_anual['MORTALIDAD POR DENGUE'] * 100 / df_anual['POBLACION']

lista = []

for i in df_anual['COD_MUNICIPIO'].unique():
    df_temp = df_anual[df_anual['COD_MUNICIPIO'] == i]

    df_temp['PCT CHANGE DENGUE'] = df_temp['DENGUE'].pct_change() * 100
    df_temp['PCT CHANGE DENGUE GRAVE'] = df_temp['DENGUE GRAVE'].pct_change() * 100
    df_temp['PCT CHANGE MORTALIDAD POR DENGUE'] = df_temp['MORTALIDAD POR DENGUE'].pct_change() * 100
    df_temp['PCT CHANGE INCIDENCIA DENGUE'] = df_temp['INCIDENCIA DENGUE'].pct_change() * 100
    df_temp['PCT CHANGE INCIDENCIA DENGUE GRAVE'] = df_temp['INCIDENCIA DENGUE GRAVE'].pct_change() * 100
    df_temp['PCT CHANGE MORTALITY RATE'] = df_temp['MORTALITY RATE'].pct_change() * 100
    lista.append(df_temp)

df_anual = pd.concat(lista)
df_anual.fillna(0, inplace = True)
df_anual.reset_index(inplace = True, drop=True)
df_anual

Unnamed: 0,ANO,COD_MUNICIPIO,MUNICIPIO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD,MORTALITY RATE,PCT CHANGE DENGUE,PCT CHANGE DENGUE GRAVE,PCT CHANGE MORTALIDAD POR DENGUE,PCT CHANGE INCIDENCIA DENGUE,PCT CHANGE INCIDENCIA DENGUE GRAVE,PCT CHANGE MORTALITY RATE
0,2007,5001,MEDELLIN,ANTIOQUIA,4,1,0,2265244.0,0.176581,0.044145,0.000000,0.000000,0.000000,0.00,0.0,0.000000,0.000000e+00,0.0
1,2008,5001,MEDELLIN,ANTIOQUIA,888,20,0,2291378.0,38.753973,0.872837,0.000000,0.000000,22100.000000,1900.00,0.0,21846.800921,1.877189e+03,0.0
2,2009,5001,MEDELLIN,ANTIOQUIA,412,1,0,2317336.0,17.779036,0.043153,0.000000,0.000000,-53.603604,-95.00,0.0,-54.123320,-9.505601e+01,0.0
3,2010,5001,MEDELLIN,ANTIOQUIA,15458,112,13,2343049.0,659.738657,4.780096,11.607143,0.000555,3651.941748,11100.00,inf,3610.767330,1.097709e+04,inf
4,2011,5001,MEDELLIN,ANTIOQUIA,781,21,0,2368282.0,32.977492,0.886719,0.000000,0.000000,-94.947600,-81.25,-100.0,-95.001431,-8.144977e+01,-100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14807,2016,99773,CUMARIBO,VICHADA,53,2,0,37740.0,140.434552,5.299417,0.000000,0.000000,1.923077,inf,0.0,-0.434603,inf,0.0
14808,2017,99773,CUMARIBO,VICHADA,16,0,0,38634.0,41.414298,0.000000,0.000000,0.000000,-69.811321,-100.00,0.0,-70.509894,-1.000000e+02,0.0
14809,2018,99773,CUMARIBO,VICHADA,6,0,0,39549.0,15.171054,0.000000,0.000000,0.000000,-62.500000,0.00,0.0,-63.367595,0.000000e+00,0.0
14810,2019,99773,CUMARIBO,VICHADA,38,1,0,40490.0,93.850333,2.469746,0.000000,0.000000,533.333333,inf,0.0,518.614473,inf,0.0


In [18]:
df_anual.to_csv('Data/JSON/dengue_anual.csv', encoding='utf-8-sig', index = False)

# Construcción de las gráficas anuales por departamento

In [19]:
df_semanal_dpto.columns

Index(['ANO', 'FECHA', 'SEMANA', 'COD_DPTO', 'DEPARTAMENTO', 'DENGUE',
       'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION',
       'INCIDENCIA DENGUE', 'INCIDENCIA DENGUE GRAVE', 'LETALIDAD', 'P25',
       'MED', 'P75', 'lower_limit', 'upper_limit', 'observed_reason',
       'expected_reason', 'lower_limit_IC95', 'upper_limit_IC95',
       'threshold_IC95'],
      dtype='object')

In [20]:
df_anual_dpto = df_semanal_dpto[['ANO', 'COD_DPTO', 'DENGUE', 'DEPARTAMENTO', 
       'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION']].groupby(['ANO', 'COD_DPTO', 'DEPARTAMENTO']).agg({'DENGUE' : 'sum', 'DENGUE GRAVE' : 'sum', 'MORTALIDAD POR DENGUE' : 'sum', 'POBLACION' : 'mean'})
df_anual_dpto.reset_index(inplace = True)
df_anual_dpto

Unnamed: 0,ANO,COD_DPTO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION
0,2007,5,ANTIOQUIA,49,18,0,5834865.0
1,2007,8,ATLANTICO,3232,88,0,2225481.0
2,2007,11,BOGOTA,0,0,1,7050228.0
3,2007,13,BOLIVAR,635,93,0,1917345.0
4,2007,15,BOYACA,331,19,0,1009633.0
...,...,...,...,...,...,...,...
457,2020,91,AMAZONAS,134,0,0,76787.0
458,2020,94,GUAINIA,27,1,0,43349.0
459,2020,95,GUAVIARE,72,1,0,119214.0
460,2020,97,VAUPES,48,0,0,44997.0


In [21]:
df_anual_dpto = df_anual_dpto[~df_anual_dpto['POBLACION'].isna()]
df_anual_dpto['INCIDENCIA DENGUE'] = df_anual_dpto['DENGUE'] * 100000 / df_anual_dpto['POBLACION']
df_anual_dpto['INCIDENCIA DENGUE GRAVE'] = df_anual_dpto['DENGUE GRAVE'] * 100000 / df_anual_dpto['POBLACION']
df_anual_dpto['LETALIDAD'] = df_anual_dpto['MORTALIDAD POR DENGUE'] * 100 / df_anual_dpto['DENGUE GRAVE']
df_anual_dpto['MORTALITY RATE'] = df_anual_dpto['MORTALIDAD POR DENGUE'] * 100 / df_anual['POBLACION']

lista = []

for i in df_anual_dpto['COD_DPTO'].unique():
    df_temp = df_anual_dpto[df_anual_dpto['COD_DPTO'] == i]

    df_temp['PCT CHANGE DENGUE'] = df_temp['DENGUE'].pct_change() * 100
    df_temp['PCT CHANGE DENGUE GRAVE'] = df_temp['DENGUE GRAVE'].pct_change() * 100
    df_temp['PCT CHANGE MORTALIDAD POR DENGUE'] = df_temp['MORTALIDAD POR DENGUE'].pct_change() * 100
    df_temp['PCT CHANGE INCIDENCIA DENGUE'] = df_temp['INCIDENCIA DENGUE'].pct_change() * 100
    df_temp['PCT CHANGE INCIDENCIA DENGUE GRAVE'] = df_temp['INCIDENCIA DENGUE GRAVE'].pct_change() * 100
    df_temp['PCT CHANGE MORTALITY RATE'] = df_temp['MORTALITY RATE'].pct_change() * 100
    lista.append(df_temp)

df_anual_dpto = pd.concat(lista)
df_anual_dpto.fillna(0, inplace = True)
df_anual_dpto.reset_index(inplace = True, drop=True)
df_anual_dpto

Unnamed: 0,ANO,COD_DPTO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD,MORTALITY RATE,PCT CHANGE DENGUE,PCT CHANGE DENGUE GRAVE,PCT CHANGE MORTALIDAD POR DENGUE,PCT CHANGE INCIDENCIA DENGUE,PCT CHANGE INCIDENCIA DENGUE GRAVE,PCT CHANGE MORTALITY RATE
0,2007,5,ANTIOQUIA,49,18,0,5834865.0,0.839779,0.308490,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
1,2008,5,ANTIOQUIA,2474,180,1,5911399.0,41.851345,3.044964,0.555556,0.043668,4948.979592,900.000000,inf,4883.611207,887.053149,inf
2,2009,5,ANTIOQUIA,1297,108,1,5988552.0,21.657990,1.803441,0.925926,0.003336,-47.574778,-40.000000,0.0,-48.250194,-40.773005,-92.361574
3,2010,5,ANTIOQUIA,25661,476,17,6066003.0,423.029794,7.847012,3.571429,0.211601,1878.488820,340.740741,1600.0,1853.227386,335.113343,6243.788897
4,2011,5,ANTIOQUIA,1921,59,0,6143809.0,31.267248,0.960316,0.000000,0.000000,-92.513932,-87.605042,-100.0,-92.608736,-87.762013,-100.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
457,2016,99,VICHADA,166,7,0,73702.0,225.231337,9.497707,0.000000,0.000000,39.495798,inf,0.0,36.225212,inf,0.000000
458,2017,99,VICHADA,30,0,0,75468.0,39.751948,0.000000,0.000000,0.000000,-81.927711,-100.000000,0.0,-82.350614,-100.000000,0.000000
459,2018,99,VICHADA,14,0,0,77276.0,18.116880,0.000000,0.000000,0.000000,-53.333333,0.000000,0.0,-54.425177,0.000000,0.000000
460,2019,99,VICHADA,375,1,0,79134.0,473.879748,1.263679,0.000000,0.000000,2578.571429,inf,0.0,2515.680816,inf,0.000000


In [22]:
df_anual_dpto.to_csv('Data/JSON/dengue_anual_dpto.csv', encoding='utf-8-sig', index = False)

# Construcción de las gráficas anuales para Colombia

In [23]:
df_semanal_col.columns

Index(['ANO', 'FECHA', 'SEMANA', 'DENGUE', 'DENGUE GRAVE',
       'MORTALIDAD POR DENGUE', 'POBLACION', 'INCIDENCIA DENGUE',
       'INCIDENCIA DENGUE GRAVE', 'LETALIDAD', 'P25', 'MED', 'P75',
       'lower_limit', 'upper_limit', 'observed_reason', 'expected_reason',
       'lower_limit_IC95', 'upper_limit_IC95', 'threshold_IC95'],
      dtype='object')

In [24]:
df_anual_col = df_semanal_col[['ANO', 'DENGUE', 'DENGUE GRAVE', 'MORTALIDAD POR DENGUE', 'POBLACION']].groupby(['ANO']).agg({'DENGUE' : 'sum', 'DENGUE GRAVE' : 'sum', 'MORTALIDAD POR DENGUE' : 'sum', 'POBLACION' : 'mean'})
df_anual_col.reset_index(inplace = True)
df_anual_col

Unnamed: 0,ANO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION
0,2007,34227,6747,24,43514726.0
1,2008,32276,4217,20,44038418.0
2,2009,44896,7050,48,44565643.0
3,2010,145672,9583,275,45095941.0
4,2011,28597,1281,40,45630615.0
5,2012,52094,1395,82,46167367.0
6,2013,121609,3084,180,46706149.0
7,2014,104647,2592,166,47246426.0
8,2015,94433,1408,152,47787526.0
9,2016,99088,888,123,48331333.0


In [25]:
df_anual_col = df_anual_col[~df_anual_col['POBLACION'].isna()]
df_anual_col['INCIDENCIA DENGUE'] = df_anual_col['DENGUE'] * 100000 / df_anual_col['POBLACION']
df_anual_col['INCIDENCIA DENGUE GRAVE'] = df_anual_col['DENGUE GRAVE'] * 100000 / df_anual_col['POBLACION']
df_anual_col['LETALIDAD'] = df_anual_col['MORTALIDAD POR DENGUE'] * 100 / df_anual_col['DENGUE GRAVE']
df_anual_col['MORTALITY RATE'] = df_anual_col['MORTALIDAD POR DENGUE'] * 100 / df_anual['POBLACION']
df_anual_col['PCT CHANGE DENGUE'] = df_anual_col['DENGUE'].pct_change() * 100
df_anual_col['PCT CHANGE DENGUE GRAVE'] = df_anual_col['DENGUE GRAVE'].pct_change() * 100
df_anual_col['PCT CHANGE MORTALIDAD POR DENGUE'] = df_anual_col['MORTALIDAD POR DENGUE'].pct_change() * 100
df_anual_col['PCT CHANGE INCIDENCIA DENGUE'] = df_anual_col['INCIDENCIA DENGUE'].pct_change() * 100
df_anual_col['PCT CHANGE INCIDENCIA DENGUE GRAVE'] = df_anual_col['INCIDENCIA DENGUE GRAVE'].pct_change() * 100
df_anual_col['PCT CHANGE MORTALITY RATE'] = df_anual_col['MORTALITY RATE'].pct_change() * 100
df_anual_col.reset_index(inplace = True, drop=True)
df_anual_col

Unnamed: 0,ANO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD,MORTALITY RATE,PCT CHANGE DENGUE,PCT CHANGE DENGUE GRAVE,PCT CHANGE MORTALIDAD POR DENGUE,PCT CHANGE INCIDENCIA DENGUE,PCT CHANGE INCIDENCIA DENGUE GRAVE,PCT CHANGE MORTALITY RATE
0,2007,34227,6747,24,43514726.0,78.656131,15.505096,0.355714,0.001059,,,,,,
1,2008,32276,4217,20,44038418.0,73.290553,9.57573,0.474271,0.000873,-5.700178,-37.498147,-16.666667,-6.821564,-38.241401,-17.617114
2,2009,44896,7050,48,44565643.0,100.741282,15.819361,0.680851,0.002071,39.10026,67.18046,140.0,37.454662,65.202665,137.311603
3,2010,145672,9583,275,45095941.0,323.026855,21.25025,2.869665,0.011737,224.465431,35.929078,472.916667,220.649936,34.330643,466.629386
4,2011,28597,1281,40,45630615.0,62.670643,2.807326,3.12256,0.001689,-80.368911,-86.632579,-85.454545,-80.598937,-86.78921,-85.609521
5,2012,52094,1395,82,46167367.0,112.837277,3.021615,5.878136,0.003427,82.165961,8.899297,105.0,80.04806,7.63321,102.881562
6,2013,121609,3084,180,46706149.0,260.370428,6.602985,5.836576,0.007446,133.441471,121.075269,119.512195,130.748591,118.52504,117.304292
7,2014,104647,2592,166,47246426.0,221.491886,5.486129,6.404321,0.0068,-13.947981,-15.953307,-7.777778,-14.932011,-16.914406,-8.676833
8,2015,94433,1408,152,47787526.0,197.610146,2.946376,10.795455,0.006168,-9.760433,-45.679012,-8.433735,-10.782219,-46.294091,-9.295735
9,2016,99088,888,123,48331333.0,205.01814,1.837317,13.851351,0.004946,4.929421,-36.931818,-19.078947,3.748792,-37.641439,-19.807904


In [26]:
df_anual_col.to_csv('Data/JSON/dengue_anual_col.csv', encoding='utf-8-sig', index = False)

In [27]:
df_anual_col[df_anual_dpto['ANO'] == 2020]

Unnamed: 0,ANO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD,MORTALITY RATE,PCT CHANGE DENGUE,PCT CHANGE DENGUE GRAVE,PCT CHANGE MORTALIDAD POR DENGUE,PCT CHANGE INCIDENCIA DENGUE,PCT CHANGE INCIDENCIA DENGUE GRAVE,PCT CHANGE MORTALITY RATE
13,2020,42351,465,15,50492854.0,83.875235,0.920922,3.225806,0.000584,-63.271731,-62.348178,-78.26087,-63.661943,-62.748202,-78.425626


In [28]:
df_anual_dpto[df_anual_dpto['COD_DPTO'] == 5]

Unnamed: 0,ANO,COD_DPTO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD,MORTALITY RATE,PCT CHANGE DENGUE,PCT CHANGE DENGUE GRAVE,PCT CHANGE MORTALIDAD POR DENGUE,PCT CHANGE INCIDENCIA DENGUE,PCT CHANGE INCIDENCIA DENGUE GRAVE,PCT CHANGE MORTALITY RATE
0,2007,5,ANTIOQUIA,49,18,0,5834865.0,0.839779,0.30849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2008,5,ANTIOQUIA,2474,180,1,5911399.0,41.851345,3.044964,0.555556,0.043668,4948.979592,900.0,inf,4883.611207,887.053149,inf
2,2009,5,ANTIOQUIA,1297,108,1,5988552.0,21.65799,1.803441,0.925926,0.003336,-47.574778,-40.0,0.0,-48.250194,-40.773005,-92.361574
3,2010,5,ANTIOQUIA,25661,476,17,6066003.0,423.029794,7.847012,3.571429,0.211601,1878.48882,340.740741,1600.0,1853.227386,335.113343,6243.788897
4,2011,5,ANTIOQUIA,1921,59,0,6143809.0,31.267248,0.960316,0.0,0.0,-92.513932,-87.605042,-100.0,-92.608736,-87.762013,-100.0
5,2012,5,ANTIOQUIA,1978,85,1,6221817.0,31.791356,1.36616,1.176471,0.013156,2.967205,44.067797,inf,1.676221,42.261501,inf
6,2013,5,ANTIOQUIA,5631,124,4,6299990.0,89.381094,1.968257,3.225806,0.042048,184.681496,45.882353,300.0,181.149045,44.072182,219.604751
7,2014,5,ANTIOQUIA,6741,177,5,6378132.0,105.689252,2.775107,2.824859,0.010147,19.712307,42.741935,25.0,18.245646,40.993126,-75.86709
8,2015,5,ANTIOQUIA,6729,63,4,6456299.0,104.223798,0.975791,6.349206,0.000814,-0.178015,-64.40678,-20.0,-1.386569,-64.83771,-91.974624
9,2016,5,ANTIOQUIA,27275,110,14,6534857.0,417.377151,1.683281,12.727273,0.050543,305.335117,74.603175,250.0,300.462429,72.504203,6106.494819


In [29]:
df_anual[df_anual['COD_MUNICIPIO'] == 5001]

Unnamed: 0,ANO,COD_MUNICIPIO,MUNICIPIO,DEPARTAMENTO,DENGUE,DENGUE GRAVE,MORTALIDAD POR DENGUE,POBLACION,INCIDENCIA DENGUE,INCIDENCIA DENGUE GRAVE,LETALIDAD,MORTALITY RATE,PCT CHANGE DENGUE,PCT CHANGE DENGUE GRAVE,PCT CHANGE MORTALIDAD POR DENGUE,PCT CHANGE INCIDENCIA DENGUE,PCT CHANGE INCIDENCIA DENGUE GRAVE,PCT CHANGE MORTALITY RATE
0,2007,5001,MEDELLIN,ANTIOQUIA,4,1,0,2265244.0,0.176581,0.044145,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2008,5001,MEDELLIN,ANTIOQUIA,888,20,0,2291378.0,38.753973,0.872837,0.0,0.0,22100.0,1900.0,0.0,21846.800921,1877.189272,0.0
2,2009,5001,MEDELLIN,ANTIOQUIA,412,1,0,2317336.0,17.779036,0.043153,0.0,0.0,-53.603604,-95.0,0.0,-54.12332,-95.056008,0.0
3,2010,5001,MEDELLIN,ANTIOQUIA,15458,112,13,2343049.0,659.738657,4.780096,11.607143,0.000555,3651.941748,11100.0,inf,3610.76733,10977.089382,inf
4,2011,5001,MEDELLIN,ANTIOQUIA,781,21,0,2368282.0,32.977492,0.886719,0.0,0.0,-94.9476,-81.25,-100.0,-95.001431,-81.449773,-100.0
5,2012,5001,MEDELLIN,ANTIOQUIA,701,12,0,2393011.0,29.293639,0.50146,0.0,0.0,-10.243278,-42.857143,0.0,-11.17081,-43.447648,0.0
6,2013,5001,MEDELLIN,ANTIOQUIA,2297,45,0,2417325.0,95.02239,1.861562,0.0,0.0,227.67475,275.0,0.0,224.378924,271.228165,0.0
7,2014,5001,MEDELLIN,ANTIOQUIA,3223,50,0,2441123.0,132.029398,2.048238,0.0,0.0,40.313452,11.111111,0.0,38.945566,10.027912,0.0
8,2015,5001,MEDELLIN,ANTIOQUIA,3748,37,3,2464322.0,152.090514,1.501427,8.108108,0.000122,16.289172,-26.0,inf,15.194431,-26.696632,inf
9,2016,5001,MEDELLIN,ANTIOQUIA,17242,64,7,2486723.0,693.362309,2.573668,10.9375,0.000281,360.032017,72.972973,133.333333,355.887938,71.414791,131.23141
