In [33]:
from tqdm import tqdm
from pathlib import Path
from pysus import SIH
import pandas as pd
import sys
sys.path.append("..")

from pyopensus.storage.whandler_sus import HandlerSIH


In [39]:
def load_SIH_file(data_filename):
    """Load a SIH RD/CE parquet file, add metadata columns, and parse date fields.
    
    Parameters
    ----------
    data_filename : str or pathlib.Path
        Path to the parquet file to load.
    
    Returns
    -------
    pandas.DataFrame
        Loaded data with `FONTE`, `SEQUENCIA`, and parsed date columns.
    """
    
    mes_ano=Path(data_filename).stem
    mes_ano=mes_ano.replace('RD',"").replace('CE',"")
   
    df = pd.read_parquet(data_filename, engine="fastparquet")
    nrows = df.shape[0]
    
    df['FONTE'] = [mes_ano for n in range(nrows)]
    
    # 'SEQUENCIA' tem valores repetidos
    df['SEQUENCIA'] = df['SEQUENCIA'].str.replace(" ","0")
    # df['FONTE_SEQUENCIA']= df['FONTE']+df['SEQUENCIA']
    
    # date columns...
    for col in ["NASC", "DT_INTER", "DT_SAIDA"]:
        df[col] = pd.to_datetime(df[col], format="%Y%m%d", errors='coerce')
    return df

In [41]:
data_folder =  Path.home().joinpath("Workspace", "pyopensus", "data")
base_folder = Path.joinpath(data_folder,"sihsus")
warehouse_location = Path.joinpath(data_folder, "opendatasus")
warehouse_name = "SIHSUS_NORDESTE_NO_SERVICE.db"
warehouse_injector = HandlerSIH(warehouse_location, warehouse_name)

# warehouse_name = "SIHSUS_NORDESTE_NO_SERVICE.db"
parquet_location = base_folder

In [31]:
prefix_list = ["RD"]
uf_list = ["CE"]
year_list = [2025]
sih = SIH().load()

files = sih.get_files(prefix_list, uf=uf_list, year=year_list);
filenames = [ parquet_location.joinpath(str(f).replace("dbc", "parquet")) for f in files]
filenames

[PosixPath('/home/humberto/Workspace/pyopensus/data/sihsus/RDCE2501.parquet'),
 PosixPath('/home/humberto/Workspace/pyopensus/data/sihsus/RDCE2502.parquet'),
 PosixPath('/home/humberto/Workspace/pyopensus/data/sihsus/RDCE2503.parquet'),
 PosixPath('/home/humberto/Workspace/pyopensus/data/sihsus/RDCE2504.parquet'),
 PosixPath('/home/humberto/Workspace/pyopensus/data/sihsus/RDCE2505.parquet'),
 PosixPath('/home/humberto/Workspace/pyopensus/data/sihsus/RDCE2506.parquet'),
 PosixPath('/home/humberto/Workspace/pyopensus/data/sihsus/RDCE2507.parquet'),
 PosixPath('/home/humberto/Workspace/pyopensus/data/sihsus/RDCE2508.parquet')]

In [None]:
for current_file in tqdm(filenames):
    fname = current_file.stem
    prefix = fname[:2]
    cur_df = load_SIH_file(current_file)
    warehouse_injector.insert_sih(cur_df, fname, prefix)

    break
    

  0%|          | 0/8 [00:00<?, ?it/s]

RD


Unnamed: 0,UF_ZI,ANO_CMPT,MES_CMPT,ESPEC,CGC_HOSP,N_AIH,IDENT,CEP,MUNIC_RES,NASC,...,TPDISEC1,TPDISEC2,TPDISEC3,TPDISEC4,TPDISEC5,TPDISEC6,TPDISEC7,TPDISEC8,TPDISEC9,FONTE
0,230440,2025,1,3,4885197000225,2325107135714,1,60766120,230440,2000-10-20,...,0,0,0,0,0,0,0,0,0,2501


  return bound(*args, **kwds)
  0%|          | 0/8 [00:14<?, ?it/s]
