In [1]:
import pandas as pd
import numpy as np
import glob
from tqdm import tqdm, trange

In [2]:
def to_number(val):
    try:
        val = int(val)
    except:
        val = 0
    return val

### Reading resources

In [3]:
# Morbilidad B2
files = glob.glob("src/morbilidad_b2/*")

object_df = pd.DataFrame()
types = {'ANHO': int,
         'MES': int,
         'UBIGEO': int,
         'DEPARTAMENTO': str,
         'PROVINCIA': str,
         'DISTRITO': str,
         'SECTOR': str,
         'CATEGORIA': str,
         'CO_IPRESS': str,
         'RAZON_SOC': str,
         'SEXO': str,
         'EDAD': str,
         'DIAGNOSTICO': str,
         'NU_TOTAL_ATENDIDOS': str,}

for p in tqdm(files):
    data_temp = pd.read_csv(p, encoding='latin1', dtype=types)
    object_df = pd.concat([object_df,data_temp])


object_df = object_df.rename(columns={'ANHO': 'YEAR',
                                      'MES': 'MONTH',
                                      'UBIGEO': 'UBIGEO',
                                      'DEPARTAMENTO': 'STATE',
                                      'PROVINCIA': 'PROVINCE',
                                      'DISTRITO': 'DISTRICT',
                                      'SECTOR': 'SECTOR',
                                      'CATEGORIA': 'CATEGORY',
                                      'CO_IPRESS': 'CO_IPRESS',
                                      'RAZON_SOC': 'NAME',
                                      'SEXO': 'SEX',
                                      'EDAD': 'AGE',
                                      'DIAGNOSTICO': 'DIAGNOSIS',
                                      'NU_TOTAL_ATENDIDOS': 'QTY_PEOPLE_SERVED'})


object_df['QTY_PEOPLE_SERVED'] = object_df['QTY_PEOPLE_SERVED'].apply(lambda x : to_number(x))
object_df['DATE'] = object_df['YEAR'].astype(str) +'-'+object_df['MONTH'].astype(str) 
object_df['DATE'] = pd.to_datetime(object_df['DATE'])

object_df.to_pickle('data/morbilidad_b2.csv')  

100%|██████████| 45/45 [05:10<00:00,  6.89s/it]


In [4]:
# Morbilidad C2
files = glob.glob("src/morbilidad_c2/*")

object_df = pd.DataFrame()
types = {'ANHO': int,
         'MES': int,
         'UBIGEO': int,
         'DEPARTAMENTO': str,
         'PROVINCIA': str,
         'DISTRITO': str,
         'SECTOR': str,
         'CATEGORIA': str,
         'CO_IPRESS': str,
         'RAZON_SOC': str,
         'SEXO': str,
         'EDAD': str,
         'ID_DIGNOSTICO': str,
         'DIAGNOSTICO': str,
         'NU_TOTAL_ATENDIDOS': str,}

for p in tqdm(files):
    data_temp = pd.read_csv(p, encoding='latin1', dtype=types)
    object_df = pd.concat([object_df,data_temp])

object_df = object_df.rename(columns={'ANHO': 'YEAR',
                                      'MES': 'MONTH',
                                      'UBIGEO': 'UBIGEO',
                                      'DEPARTAMENTO': 'STATE',
                                      'PROVINCIA': 'PROVINCE',
                                      'DISTRITO': 'DISTRICT',
                                      'SECTOR': 'SECTOR',
                                      'CATEGORIA': 'CATEGORY',
                                      'CO_IPRESS': 'CO_IPRESS',
                                      'RAZON_SOC': 'NAME',
                                      'SEXO': 'SEX',
                                      'EDAD': 'AGE',
                                      'ID_DIGNOSTICO': 'DIAGNOSIS_ID',
                                      'DIAGNOSTICO': 'DIAGNOSIS',
                                      'NU_TOTAL_ATENDIDOS': 'QTY_PEOPLE_SERVED'})

object_df['QTY_PEOPLE_SERVED'] = object_df['QTY_PEOPLE_SERVED'].apply(lambda x : to_number(x))
object_df['DATE'] = object_df['YEAR'].astype(str) +'-'+object_df['MONTH'].astype(str) 
object_df['DATE'] = pd.to_datetime(object_df['DATE'])

object_df.to_pickle('data/morbilidad_c2.csv')  

100%|██████████| 49/49 [01:29<00:00,  1.83s/it]


In [5]:
# Morbilidad D2
files = glob.glob("src/morbilidad_d2/*")

object_df = pd.DataFrame()
types = {'ANHO': int,
         'MES': int,
         'UBIGEO': int,
         'DEPARTAMENTO': str,
         'PROVINCIA': str,
         'DISTRITO': str,
         'SECTOR': str,
         'CATEGORIA': str,
         'CO_IPRESS': str,
         'RAZON_SOC': str,
         'SEXO': str,
         'EDAD': str,
         'ID_DIGNOSTICO': str,
         'DIGNOSTICO': str,
         'NU_TOTAL_ATENDIDOS': str,}

for p in tqdm(files):
    data_temp = pd.read_csv(p, encoding='latin1', dtype=types)
    object_df = pd.concat([object_df,data_temp])
    
object_df = object_df.rename(columns={'ANHO': 'YEAR',
                                      'MES': 'MONTH',
                                      'UBIGEO': 'UBIGEO',
                                      'DEPARTAMENTO': 'STATE',
                                      'PROVINCIA': 'PROVINCE',
                                      'DISTRITO': 'DISTRICT',
                                      'SECTOR': 'SECTOR',
                                      'CATEGORIA': 'CATEGORY',
                                      'CO_IPRESS': 'CO_IPRESS',
                                      'RAZON_SOC': 'NAME',
                                      'SEXO': 'SEX',
                                      'EDAD': 'AGE',
                                      'ID_DIGNOSTICO': 'DIAGNOSIS_ID',
                                      'DIAGNOSTICO': 'DIAGNOSIS',
                                      'NU_TOTAL_ATENDIDOS': 'QTY_PEOPLE_SERVED'})

object_df['QTY_PEOPLE_SERVED'] = object_df['QTY_PEOPLE_SERVED'].apply(lambda x : to_number(x))
object_df['DATE'] = object_df['YEAR'].astype(str) +'-'+object_df['MONTH'].astype(str) 
object_df['DATE'] = pd.to_datetime(object_df['DATE'])

object_df.to_pickle('data/morbilidad_d2.csv')  

100%|██████████| 8/8 [00:18<00:00,  2.37s/it]


In [6]:
# Procedimientos
files = glob.glob("src/procedimientos/*")

object_df = pd.DataFrame()
types = {'ANHO': int,
         'MES': int,
         'UBIGEO': int,
         'DEPARTAMENTO': str,
         'PROVINCIA': str,
         'DISTRITO': str,
         'SECTOR': str,
         'CATEGORIA': str,
         'CO_IPRESS': int,
         'RAZON_SOC': str,
         'ID_PROCEDIMIENTO': str,
         'PROCEDIMIENTO': str,
         'TOTAL': str,
         'ID_CODIGO': str,
         'DES_DESCRIPCION': str,}

for p in tqdm(files):
    data_temp = pd.read_csv(p, encoding='latin1', dtype=types)
    object_df = pd.concat([object_df,data_temp])
    
object_df = object_df.rename(columns={'ANHO': 'YEAR',
                                      'MES': 'MONTH',
                                      'UBIGEO': 'UBIGEO',
                                      'DEPARTAMENTO': 'STATE',
                                      'PROVINCIA': 'PROVINCE',
                                      'DISTRITO': 'DISTRICT',
                                      'SECTOR': 'SECTOR',
                                      'CATEGORIA': 'CATEGORY',
                                      'CO_IPRESS': 'CO_IPRESS',
                                      'RAZON_SOC': 'NAME',
                                      'ID_PROCEDIMIENTO': 'PROCEDURE_ID',
                                      'PROCEDIMIENTO': 'PROCEDURE',
                                      'TOTAL': 'TOTAL',
                                      'ID_CODIGO': 'DESCRIPTION_ID',
                                      'DES_DESCRIPCION': 'DESCRIPTION'})

object_df['DATE'] = object_df['YEAR'].astype(str) +'-'+object_df['MONTH'].astype(str) 
object_df['DATE'] = pd.to_datetime(object_df['DATE'])

object_df.to_pickle('data/procedimientos.csv')  

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
100%|██████████| 12/12 [10:34<00:00, 52.88s/it] 


In [7]:
# Modificar nombres de columns y fechas como DT