# 🧃 Excel Preprocessing

In [67]:
import pandas as pd
import json

In [68]:
data_path = 'RIPS-FEV-Procedures.xlsx'
transaction_df = pd.read_excel(data_path, sheet_name='FACTURA')
users_df = pd.read_excel(data_path, sheet_name='USUARIO')
appointments_df = pd.read_excel(data_path, sheet_name='CONSULTA')
procedures_df = pd.read_excel(data_path, sheet_name='PROCEDIMIENTOS')

In [70]:
def process_datetime(timestamp):
    has_time = timestamp.hour > 0 or timestamp.minute > 0
    if has_time:
        ts_str = timestamp.strftime('%Y-%m-%d %H:%M:%S')
    else:
        ts_str = timestamp.strftime('%Y-%m-%d')
    return ts_str


def create_transaction(transaction_df):
    try:
        transaction = transaction_df.loc[0].to_dict()
        for key in transaction:
            if pd.isnull(transaction[key]):
                transaction[key] = ""
    except (ValueError, KeyError):
        transaction = {col: "" for col in transaction_df.columns}
    return transaction


def create_user(users, index=0):
    user = users.loc[index].to_dict()
    for key in user:
        if pd.isnull(user[key]):
            user[key] = ""
            continue
        if 'fecha' in key.lower():
            user[key] = process_datetime(user[key])
    return user


def create_appointment(appointments, index=0):
    appointment = appointments.loc[index].to_dict()
    for key in appointment:
        if pd.isnull(appointment[key]):
            appointment[key] = ""
            continue 
        if 'fecha' in key.lower():
            appointment[key] = process_datetime(appointment[key])
    return appointment


def create_procedure(procedures, index=0):
    procedure = procedures.loc[index].to_dict()
    for key in procedure:
        if pd.isnull(procedure[key]):
            procedure[key] = ""
            continue
        if 'fecha' in key.lower():
            procedure[key] = process_datetime(procedure[key])
    return procedure

In [71]:
main_dict = create_transaction(transaction_df)

main_dict['usuarios'] = []
for i in range(len(users_df)):
    user = create_user(users_df, i)
    user['servicios'] = {}
    main_dict['usuarios'].append(user)

In [72]:
id_key = 'DocumentoIdentificacionUS'
id_map = {id_key: 'numDocumentoIdentificacion'}

for i in range(len(appointments_df)):
    appointment = create_appointment(appointments_df, i)
    user_id = appointment[id_key]
    del appointment[id_key]
    
    for user in main_dict['usuarios']:
        if user[id_map[id_key]] == user_id:
            if 'consultas' not in user['servicios']:
                user['servicios'].update({'consultas': []})
            user['servicios']['consultas'].append(appointment)
            break

In [73]:
for i in range(len(procedures_df)):
    procedure = create_appointment(procedures_df, i)
    user_id = procedure[id_key]
    del procedure[id_key]

    for user in main_dict['usuarios']:
        if user[id_map[id_key]] == user_id:
            if 'procedimientos' not in user['servicios']:
                user['servicios'].update({'procedimientos': []})
            user['servicios']['procedimientos'].append(procedure)
            break

In [74]:
print(json.dumps(main_dict, indent=2))

{
  "numDocumentoIdObligado": "",
  "numFactura": "",
  "tipoNota": "",
  "numNota": "",
  "usuarios": [
    {
      "tipoDocumentoIdentificacion": "CC",
      "numDocumentoIdentificacion": 1065628313,
      "tipoUsuario": 5,
      "fechaNacimiento": "1991-04-19",
      "codSexo": "F",
      "codPaisResidencia": 170,
      "codMunicipioResidencia": 11001,
      "codZonaTerritorialResidencia": 2,
      "incapacidad": "NO",
      "consecutivo": 1,
      "codPaisOrigen": 170,
      "servicios": {
        "consultas": [
          {
            "codPrestador": 110010895701,
            "fechaInicioAtencion": "2024-02-02 11:45:00",
            "numAutorizacion": "",
            "codConsulta": 890307,
            "modalidadGrupoServicioTecSal": 4,
            "grupoServicios": 1,
            "codServicio": 337,
            "finalidadTecnologiaSalud": 15,
            "causaMotivoAtencion": 38,
            "codDiagnosticoPrincipal": "H522",
            "codDiagnosticoRelacionado1": "",
        

In [75]:
with open('output.json', 'w') as f:
    json.dump(main_dict, f, indent=4)