In [1]:
import pandas as pd
import numpy as np
import glob
from tqdm import tqdm, trange

In [2]:
# CO_IPRESS CODIGOS
CO_IPRESS_df = pd.read_excel('src/CIE10-VOLUMEN-2018-EXCEL.xlsx', index_col=None, header=0) 
CO_IPRESS_df['DIAGNOSIS_ID'] = CO_IPRESS_df['CIE10_X'].apply(lambda x: x[:-1]+'.'+x[-1:])
CO_IPRESS_df = CO_IPRESS_df[['DIAGNOSIS_ID','DESCRIPCION CIE']]
CO_IPRESS_df = CO_IPRESS_df.rename(columns={'DESCRIPCION CIE': 'DISEASE'})

# CO_IPRESS CODIGOS
CO_IPRESS_df = pd.read_csv('src/diagnosis.csv', encoding='utf_8')
CO_IPRESS_df['DISEASE'] = CO_IPRESS_df['ShortDescription'].apply(lambda x: x.upper())
CO_IPRESS_df = CO_IPRESS_df[['CodeWithSeparator','DISEASE']]
CO_IPRESS_df = CO_IPRESS_df.rename(columns={'CodeWithSeparator': 'DIAGNOSIS_ID'})

In [3]:
# CO_IPRESS CODIGOS
CO_IPRESS_dict = pd.read_excel('src/CIE10-VOLUMEN-2018-EXCEL.xlsx', index_col=None, header=0) 
CO_IPRESS_dict['CO_IPRESS'] = CO_IPRESS_dict['CIE10_X'].apply(lambda x: x[:-1]+'.'+x[-1:])
CO_IPRESS_dict = CO_IPRESS_dict.set_index('CO_IPRESS').T.to_dict('list')

# CO_IPRESS CODIGOS
CO_IPRESS_dict = pd.read_csv('src/diagnosis.csv', encoding='utf_8')
CO_IPRESS_dict['DESCRIPCION'] = CO_IPRESS_dict['ShortDescription'].apply(lambda x: x.upper())
CO_IPRESS_dict = CO_IPRESS_dict.set_index('CodeWithSeparator').T.to_dict('list')

#CO_IPRESS_dict

In [4]:
def to_CO_IPRESS(val):
    try:
        val = CO_IPRESS_dict[val][6]
    except:
        val = 'NO IDENTIFICADO'
    return val

def to_number(val):
    try:
        val = int(val)
    except:
        val = 0
    return val


def to_correct_district(val):
    if val == 'BREÃ\x91A':
        return 'BREÑA'
    return val


### Reading resources

In [5]:
# Morbilidad B2
files = glob.glob("src/morbilidad_b2/*")

object_df = pd.DataFrame()
types = {'ANHO': int,
         'MES': int,
         'UBIGEO': int,
         'DEPARTAMENTO': str,
         'PROVINCIA': str,
         'DISTRITO': str,
         'SECTOR': str,
         'CATEGORIA': str,
         'CO_IPRESS': str,
         'RAZON_SOC': str,
         'SEXO': str,
         'EDAD': str,
         'DIAGNOSTICO': str,
         'NU_TOTAL_ATENDIDOS': str,}

for p in tqdm(files):
    try:
        data_temp = pd.read_csv(p, encoding='utf_8', dtype=types)
        pass
    except:
        data_temp = pd.read_csv(p, encoding='latin', dtype=types)
        
    object_df = pd.concat([object_df,data_temp])


object_df = object_df.rename(columns={'ANHO': 'YEAR',
                                      'MES': 'MONTH',
                                      'UBIGEO': 'UBIGEO',
                                      'DEPARTAMENTO': 'STATE',
                                      'PROVINCIA': 'PROVINCE',
                                      'DISTRITO': 'DISTRICT',
                                      'SECTOR': 'SECTOR',
                                      'CATEGORIA': 'CATEGORY',
                                      'CO_IPRESS': 'CO_IPRESS',
                                      'RAZON_SOC': 'NAME',
                                      'SEXO': 'SEX',
                                      'EDAD': 'AGE',
                                      'DIAGNOSTICO': 'DIAGNOSIS_ID',
                                      'NU_TOTAL_ATENDIDOS': 'QTY_PEOPLE_SERVED'})


object_df['QTY_PEOPLE_SERVED'] = object_df['QTY_PEOPLE_SERVED'].apply(lambda x : to_number(x))
object_df['DISTRICT'] = object_df['DISTRICT'].apply(lambda x : to_correct_district(x))
object_df = pd.merge(object_df, CO_IPRESS_df, how='left', on=('DIAGNOSIS_ID'))
object_df['DATE'] = object_df['YEAR'].astype(str) +'-'+object_df['MONTH'].astype(str) 
object_df['DATE'] = pd.to_datetime(object_df['DATE'])
print('Guardando archivo...')
object_df.to_pickle('data/morbilidad_b2.csv')  

100%|██████████| 45/45 [05:38<00:00,  7.52s/it]


Guardando archivo...


In [6]:
# Morbilidad C2
files = glob.glob("src/morbilidad_c2/*")

object_df = pd.DataFrame()
types = {'ANHO': int,
         'MES': int,
         'UBIGEO': int,
         'DEPARTAMENTO': str,
         'PROVINCIA': str,
         'DISTRITO': str,
         'SECTOR': str,
         'CATEGORIA': str,
         'CO_IPRESS': str,
         'RAZON_SOC': str,
         'SEXO': str,
         'EDAD': str,
         'ID_DIGNOSTICO': str,
         'DIAGNOSTICO': str,
         'NU_TOTAL_ATENDIDOS': str,}

for p in tqdm(files):
    try:
        data_temp = pd.read_csv(p, encoding='utf_8', dtype=types)
        pass
    except:
        data_temp = pd.read_csv(p, encoding='latin', dtype=types)
        
    object_df = pd.concat([object_df,data_temp])

object_df = object_df.rename(columns={'ANHO': 'YEAR',
                                      'MES': 'MONTH',
                                      'UBIGEO': 'UBIGEO',
                                      'DEPARTAMENTO': 'STATE',
                                      'PROVINCIA': 'PROVINCE',
                                      'DISTRITO': 'DISTRICT',
                                      'SECTOR': 'SECTOR',
                                      'CATEGORIA': 'CATEGORY',
                                      'CO_IPRESS': 'CO_IPRESS',
                                      'RAZON_SOC': 'NAME',
                                      'SEXO': 'SEX',
                                      'EDAD': 'AGE',
                                      'ID_DIGNOSTICO': 'DIAGNOSIS_ID',
                                      'DIAGNOSTICO': 'DIAGNOSIS',
                                      'NU_TOTAL_ATENDIDOS': 'QTY_PEOPLE_SERVED'})

object_df['QTY_PEOPLE_SERVED'] = object_df['QTY_PEOPLE_SERVED'].apply(lambda x : to_number(x))
#object_df['DISEASE'] = object_df['CO_IPRESS'].apply(lambda x : to_CO_IPRESS(x))
object_df['DISTRICT'] = object_df['DISTRICT'].apply(lambda x : to_correct_district(x))
object_df = pd.merge(object_df, CO_IPRESS_df, how='left', on=('DIAGNOSIS_ID'))
object_df['DATE'] = object_df['YEAR'].astype(str) +'-'+object_df['MONTH'].astype(str) 
object_df['DATE'] = pd.to_datetime(object_df['DATE'])
print('Guardando archivo...')

object_df.to_pickle('data/morbilidad_c2.csv')  

100%|██████████| 49/49 [02:40<00:00,  3.28s/it]


Guardando archivo...


In [7]:
# Morbilidad D2
files = glob.glob("src/morbilidad_d2/*")

object_df = pd.DataFrame()
types = {'ANHO': int,
         'MES': int,
         'UBIGEO': int,
         'DEPARTAMENTO': str,
         'PROVINCIA': str,
         'DISTRITO': str,
         'SECTOR': str,
         'CATEGORIA': str,
         'CO_IPRESS': str,
         'RAZON_SOC': str,
         'SEXO': str,
         'EDAD': str,
         'ID_DIGNOSTICO': str,
         'DIGNOSTICO': str,
         'NU_TOTAL_ATENDIDOS': str,}

for p in tqdm(files):
    try:
        data_temp = pd.read_csv(p, encoding='utf_8', dtype=types)
        pass
    except:
        data_temp = pd.read_csv(p, encoding='latin', dtype=types)

    object_df = pd.concat([object_df,data_temp])
    
object_df = object_df.rename(columns={'ANHO': 'YEAR',
                                      'MES': 'MONTH',
                                      'UBIGEO': 'UBIGEO',
                                      'DEPARTAMENTO': 'STATE',
                                      'PROVINCIA': 'PROVINCE',
                                      'DISTRITO': 'DISTRICT',
                                      'SECTOR': 'SECTOR',
                                      'CATEGORIA': 'CATEGORY',
                                      'CO_IPRESS': 'CO_IPRESS',
                                      'RAZON_SOC': 'NAME',
                                      'SEXO': 'SEX',
                                      'EDAD': 'AGE',
                                      'ID_DIGNOSTICO': 'DIAGNOSIS_ID',
                                      'DIAGNOSTICO': 'DIAGNOSIS',
                                      'NU_TOTAL_ATENDIDOS': 'QTY_PEOPLE_SERVED'})

object_df['QTY_PEOPLE_SERVED'] = object_df['QTY_PEOPLE_SERVED'].apply(lambda x : to_number(x))
#object_df['DISEASE'] = object_df['CO_IPRESS'].apply(lambda x : to_CO_IPRESS(x))
object_df['DISTRICT'] = object_df['DISTRICT'].apply(lambda x : to_correct_district(x))
object_df = pd.merge(object_df, CO_IPRESS_df, how='left', on=('DIAGNOSIS_ID'))
object_df['DATE'] = object_df['YEAR'].astype(str) +'-'+object_df['MONTH'].astype(str) 
object_df['DATE'] = pd.to_datetime(object_df['DATE'])
print('Guardando archivo...')

object_df.to_pickle('data/morbilidad_d2.csv')  

100%|██████████| 8/8 [00:21<00:00,  2.74s/it]


Guardando archivo...


In [8]:
# Procedimientos
files = glob.glob("src/procedimientos/*")

object_df = pd.DataFrame()
types = {'ANHO': int,
         'MES': int,
         'UBIGEO': int,
         'DEPARTAMENTO': str,
         'PROVINCIA': str,
         'DISTRITO': str,
         'SECTOR': str,
         'CATEGORIA': str,
         'CO_IPRESS': int,
         'RAZON_SOC': str,
         'ID_PROCEDIMIENTO': str,
         'PROCEDIMIENTO': str,
         'TOTAL': str,
         'ID_CODIGO': str,
         'DES_DESCRIPCION': str,}

for p in tqdm(files):
    try:
        data_temp = pd.read_csv(p, encoding='utf_8', dtype=types)
        pass
    except:
        data_temp = pd.read_csv(p, encoding='latin', dtype=types)
        
    object_df = pd.concat([object_df,data_temp])
    
object_df = object_df.rename(columns={'ANHO': 'YEAR',
                                      'MES': 'MONTH',
                                      'UBIGEO': 'UBIGEO',
                                      'DEPARTAMENTO': 'STATE',
                                      'PROVINCIA': 'PROVINCE',
                                      'DISTRITO': 'DISTRICT',
                                      'SECTOR': 'SECTOR',
                                      'CATEGORIA': 'CATEGORY',
                                      'CO_IPRESS': 'CO_IPRESS',
                                      'RAZON_SOC': 'NAME',
                                      'ID_PROCEDIMIENTO': 'PROCEDURE_ID',
                                      'PROCEDIMIENTO': 'PROCEDURE',
                                      'TOTAL': 'TOTAL',
                                      'ID_CODIGO': 'DESCRIPTION_ID',
                                      'DES_DESCRIPCION': 'DESCRIPTION'})

#object_df['DISEASE'] = object_df['CO_IPRESS'].apply(lambda x : to_CO_IPRESS(x))
#object_df = pd.merge(object_df, CO_IPRESS_df, how='left', on=('DIAGNOSIS'))
object_df['DISTRICT'] = object_df['DISTRICT'].apply(lambda x : to_correct_district(x))
object_df['DATE'] = object_df['YEAR'].astype(str) +'-'+object_df['MONTH'].astype(str) 
object_df['DATE'] = pd.to_datetime(object_df['DATE'])
print('Guardando archivo...')

object_df.to_pickle('data/procedimientos.csv')  

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
100%|██████████| 12/12 [11:37<00:00, 58.11s/it] 


Guardando archivo...


In [9]:
# Modificar nombres de columns y fechas como DT