In [3]:
import os
import sys
sys.path.append("..")
sys.path.append(os.path.join("..", "..", "linkage-saude"))

In [4]:
import numpy as np
import pandas as pd
import datetime as dt
from simpledbf import Dbf5

PyTables is not installed. No support for HDF output.


In [5]:
from linkage_saude.matching import PLinkage

In [6]:
from injectsus import WarehouseSUS
from injectsus.process_layer import ProcessBase, ProcessSivep

## Inject data

In [7]:
basepath = os.path.join(os.environ["HOMEPATH"], "Documents", "data")
siveppath = os.path.join(basepath, "SIVEP-GRIPE", "MILLENA_14JUN2023")
fname = "SRAGHOSPITALIZADO1930520_00.dbf"

sivep_df = Dbf5(os.path.join(siveppath, fname), codec="latin").to_dataframe()
sample_df = sivep_df.copy()
#sample_df = sivep_df.sample(n=500, random_state=1)

sample_df1 = sample_df.copy()
sample_df1["DT_NASC"] = pd.to_datetime(sample_df1["DT_NASC"], format="%d/%m/%Y", errors="coerce")
sample_df1["DT_NOTIFIC"] = pd.to_datetime(sample_df1["DT_NOTIFIC"], format="%d/%m/%Y", errors="coerce")
print(sample_df1.shape)
sample_df1.sample(n=4)

(5305, 221)


Unnamed: 0,DT_RES_AN,RES_AN,LAB_AN,CO_LAB_AN,POS_AN_FLU,TP_FLU_AN,POS_AN_OUT,AN_SARS2,AN_VSR,AN_PARA1,...,PAC_DSCBO,OUT_ANIM,DOR_ABD,FADIGA,PERD_OLFT,PERD_PALA,TOMO_RES,TOMO_OUT,DT_TOMO,TP_TES_AN
407,,5.0,,,,,,,,,...,,,,1.0,,,0.0,,,0.0
2940,,,,,,,,,,,...,,,2.0,2.0,2.0,2.0,0.0,,,0.0
3221,,5.0,,,,,,,,,...,TRABALHADOR VOLANTE DA AGRICULTURA,,,,,,0.0,,,0.0
1677,,,,,,,,,,,...,,,,,,,0.0,,,0.0


In [8]:
# -- Paths
basepath = os.path.join(os.environ["HOMEPATH"], "Documents", "data")
suspath = os.path.join(basepath, "DATASUS_WAREHOUSE", "datasus_pessoas.db")
engine_url = f"sqlite:///{suspath}"

warehouse = WarehouseSUS(engine_url)
engine = warehouse.db_init()

# Uncomment if you want to reset table
#warehouse.delete_table('sivep_gripe', is_sure=True, authkey="###!Y!.")
#warehouse.db_init()

#warehouse.insert('sivep_gripe', sample_df1, batchsize=50, verbose=True)

In [1]:
#warehouse.query_where('sivep_gripe', value="2526638", colname="CNES")

## Create API functions

## Searching

In [42]:
def app(table_name):
    '''
        ...
    '''
    basepath = os.path.join(os.environ["HOMEPATH"], "Documents", "data")
    dbpath = os.path.join(basepath, "DATASUS_WAREHOUSE", "datasus_pessoas.db")
    engine_url = f"sqlite:///{dbpath}"
    dbsus = WarehouseSUS(engine_url)
    engine = dbsus.db_init()

    qdata = pd.DataFrame( dbsus.query_all(table_name) )
    objdata = ProcessSivep(qdata, field_id="ID_SIVEP")
    objdata.basic_standardize().specific_standardize()
    proc_data = objdata.data.copy()

    return proc_data

def search_person(record, table_name, not_period, field_id="ID"):
    
    record_df = pd.DataFrame(record).reset_index()
    base_fields = ["NOME_PACIENTE", "DATA_NASCIMENTO", "NOME_MAE"]
    if not all([ elem in record_df.columns for elem in base_fields ]):
        raise Exception()
    
    objdata = ProcessBase(record_df, field_id=field_id)
    objdata.basic_standardize().specific_standardize()
    record_df = objdata.data.copy()

    searchbase = app(table_name)

    linkage = PLinkage(left_df=record_df, right_df=searchbase, left_id=field_id, right_id='ID_SIVEP', env_folder=None)
    
    
    map_compare = {
        "nascimento_dia": ["exact"], "nascimento_mes": ["exact"], "nascimento_ano": ["exact"],
        "primeiro_nome_mae": ["string", None], "complemento_nome_mae": ["string", None],
        "primeiro_nome": ["string", None], "complemento_nome": ["string", None],
    }
    map_sum = {
        "SOMA": list(map_compare.keys()),
        "SOMA ESSENCIAL 1": ["nascimento_dia", "nascimento_mes", "nascimento_ano", "primeiro_nome", "complemento_nome"],
    }
    linkage.set_linkage(map_compare, string_method="damerau_levenshtein")
    linkage.perform_linkage("FONETICA_N", window=3, threshold=0.75)
    comp_matrix = linkage.comparison_matrix.copy()
    comp_matrix["SOMA"] = comp_matrix.apply(sum, axis=1)
    comp_matrix_1 = comp_matrix[comp_matrix["SOMA"]>0].reset_index()
    found_ids = comp_matrix_1["ID_SIVEP"]
    collect = []
    for cur_id in found_ids:
        res = warehouse.query_where('sivep_gripe', value=cur_id, colname='ID_SIVEP')
        collect += res
    
    return pd.DataFrame(collect).drop(["CRIADO_EM", "ATUALIZADO_EM"], axis=1)


In [43]:
data = app('sivep_gripe')

In [44]:
record = {"ID": ["HGR1", "FRC2"], 
          "NOME_PACIENTE": ["HIGOR DA SILVA MONTEIRO", "FRANCISCA SOARES BARBOSA"], 
          "DATA_NASCIMENTO": [dt.datetime(1996, 7, 26), dt.datetime(1931, 10, 7)],
          "NOME_MAE": ["DANIELA DA SILVA CAXILE", "MARIA RDIGUES SOARES"] }

out = search_person(record, 'sivep_gripe', None, 'ID')

Number of pairs: 6


In [45]:
out

Unnamed: 0,ID_SIVEP,DATA_NOTIFICACAO,NOME_PACIENTE,DATA_NASCIMENTO,SEXO,NOME_MAE,LOGRADOURO,LOGRADOURO_NUMERO,BAIRRO_RESIDENCIA,MUNICIPIO_RESIDENCIA,CEP,CNS,CPF,CNES
0,31682003799083,2023-04-19,FRANCISCA DE PONTES BARRETO,1930-08-25,F,MARIA DE PONTES BARRETO,BEIRA MAR,1020,PRAIA DE IRACEMA,230440,60000000,,1819283372,3047091
1,31684265426043,2023-05-16,FRANCISCA SOARES BARBOSA,1931-10-08,F,MARIA RODRIGUES SOARES,FELINO BARROSO,281,FATIMA,230440,60050130,,32413670378,3242587
2,31685731946870,2023-05-09,FRANCISCA SOARES BARBOSA,1931-10-08,F,MARIA RODRIGUES SOARES,FELINO BARROSO,281,FATIMA,230440,60050130,,32413670378,3242587
3,31684946006940,2023-05-24,FRANCISCA DJANICE OLIVEIRA ARAUJO,1973-05-25,F,TEREZA ALVES ARAUJO,CLAUDIO CAMELO TIMBO 1130,160,CENTRO,230520,62270970,,72875356372,3242587


In [30]:
import duckdb

In [30]:
warehouse.query_where('sivep_gripe', value='31677160640902', colname="ID_SIVEP")

[('31677160640902', datetime.datetime(2023, 2, 23, 0, 0), 'HILDA HOLANDA DOS SANTOS', datetime.datetime(1924, 6, 12, 0, 0), 'F', 'MARIA NAZARETH DE HOLANDA', 'SANTA LIGIA', '221', 'PARANGABA', '230440', '60720720', None, '15516121353', '3242587', datetime.datetime(2023, 8, 2, 14, 52, 15, 279306), datetime.datetime(2023, 8, 2, 14, 52, 15, 279306))]

In [7]:
qres = dbsus.query('sivep_gripe', date_col="DATA_NOTIFICACAO", period=[dt.datetime(2023, 4, 1), None])

In [8]:
qdata = pd.DataFrame(qres)
qdata

Unnamed: 0,ID_SIVEP,DATA_NOTIFICACAO,NOME_PACIENTE,DATA_NASCIMENTO,SEXO,NOME_MAE,MUNICIPIO_RESIDENCIA,BAIRRO_RESIDENCIA,LOGRADOURO,LOGRADOURO_NUMERO,CEP,CNS,CPF,CRIADO_EM,ATUALIZADO_EM
0,31680347753465,2023-04-01,FRANCISCO DHAVY ARANDA REIS,2023-02-23,M,NAYADY ARANDA ROCHA,230440,GENIBAU,RUA DAS PEDRINHAS,633,,,12727638388,2023-07-31 16:47:55.255447,2023-07-31 16:47:55.255447
1,31680358132567,2023-04-01,JOSE CARLOS SOUSA SILVA,2015-05-24,M,MARIA JOSE SOUSA DE FARIAS,230440,PARQUE ARAXA,RUA CARVALHO MOTA,467,,,11281335398,2023-07-31 16:47:55.287414,2023-07-31 16:47:55.287414
2,31680546435594,2023-04-01,MARIA ALYCE SILVA MOREIRA,2020-08-28,F,FRANCISCA DAS CHAGAS MEIRELANE SILVA MOREIRA,230440,PARQUE DOIS IRMAOS,DAS OLIMPIADAS,505,60761135,,11455973351,2023-07-31 16:47:55.407413,2023-07-31 16:47:55.407413
3,31680552994265,2023-04-02,DYLAN PEREIRA VIEIRA,2021-01-27,M,ALINE REGINA PEREIRA,230765,LUZARDO VIANA,ALMIR FREITAS DUTRA,78,61910065,,11680150308,2023-07-31 16:47:55.275412,2023-07-31 16:47:55.275412
4,31680410614381,2023-04-02,HELENA VICTORIA FELIX SOUSA,2020-03-01,F,CAROLINA BARROSO FELIX,230440,JARDIM AMERICA,RUA COMENDADOR MACHADO,538,,898006194502845,,2023-07-31 16:47:55.318412,2023-07-31 16:47:55.318412
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296,31686605179575,2023-06-12,AURORA OLIVEIRA DE SOUSA,2019-01-10,F,JANIELE OLIVEIRA DE HOLANDA,230440,ITAPERI,VEREADORA ZELIA CORREIA DE SOUSA,678,60000000,600000000000000,,2023-07-31 16:47:55.352423,2023-07-31 16:47:55.352423
297,31686753171124,2023-06-13,MARCOS ANTONIO DOS SANTOS ALVES,1967-10-28,M,MARIA EUNICE DOS SANTOS ALVES,230440,RODOLFO TEOFILO,RUA CORONEL NUNES DE MELO,1342,60000000,,32059086353,2023-07-31 16:47:55.287414,2023-07-31 16:47:55.287414
298,31686704429298,2023-06-13,RONALD EMANUEL ALVES COSTA,2016-12-16,M,NATALIA ALVES DE OLIVEIRA,230440,HENRIQUE JORGE,RUA TEREZINHA,406,,,63091774350,2023-07-31 16:47:55.330441,2023-07-31 16:47:55.330441
299,31686702251809,2023-06-13,MIQUEIAS DEOLINO DE SOUSA,2021-06-16,M,PRISCILA DEOLINO DAMASCENO RAMOS,230440,NOVA METROPOLE,AVENIDA C,35,,,11870689348,2023-07-31 16:47:55.362439,2023-07-31 16:47:55.362439


In [10]:
obj = ProcessSivep(qdata, field_id="ID_SIVEP")

In [11]:
obj.basic_standardize()

In [12]:
obj.specific_standardize()

In [18]:
ex = obj.data.copy()

In [None]:
record = {
    "data": [
    {"NOME_PACIENTE": "HIGOR DA SILVA MONTEIRO", "DATA DE NASCIMENTO": "26/07/1996",
    "NOME_MAE": "DANIELA DA SILVA CAXILE"}
    ]          
}

s = requests.post(url, data=record)

In [None]:
# Create class to perform comparison between records in the same database
class Compare:
    def __init__(self):
        pass

    def test(self):
        pass



In [None]:
r