Leemos los archivos CSV y se convierten a JSON

In [3]:
import csv
import json

def csv2json(archivo_csv, archivo_json):
    datos = []
    with open(archivo_csv, 'r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for fila in csv_reader:
            datos.append(fila)

    with open(archivo_json, 'w') as json_file:
        json_file.write(json.dumps(datos, indent=4))




In [2]:
# Utiliza la función csv_a_json con los nombres de tus archivos CSV y JSON
csv2json('patients.csv', 'patient.json')
csv2json('admissions.csv', 'admissions.json')
csv2json('transfers.csv', 'transfers.json')

Combinamos los datos para el recurso del paciente

In [10]:
# Función para cargar datos de un archivo JSON
def load_json(filename):
    with open(filename) as f:
      return json.load(f)

In [7]:
# Cargamos los recursos FHIR de MIMIC para seleccionar únicamente los pacientes que existen
import json

# Lee el archivo y carga los datos JSON
with open('MimicEncounter2.ndjson') as file:
    data = file.readlines()

# Inicializa una lista para almacenar los valores de "identifier": "value"
identifier_values = []

# Itera sobre cada línea del archivo
for line in data:
    # Parsea la línea como JSON
    record = json.loads(line)
    # Obtiene el valor de "identifier": "value" y lo añade a la lista
    identifier_value = record.get("identifier", [{}])[0].get("value", None)
    identifier_values.append(int(identifier_value))

# Imprime la lista de valores de "identifier": "value"


In [8]:
17242912 in identifier_values

True

In [12]:
import json
import pandas as pd

# Crear un diccionario para almacenar la información combinada
combined_data = {}

# Construir un diccionario de pacientes para una búsqueda eficiente
patients_dict = {}

transfers_ordered = pd.read_json('transfers_ordered.json')

for patient in load_json('patient.json'):
    if(int(patient['subject_id']) in identifier_values):
        patients_dict[patient['subject_id']] = patient
        row = transfers_ordered.loc[transfers_ordered['subject_id'] == int(patient['subject_id'])]
        patients_dict[patient['subject_id']]['intime'] = row['intime'].values[0]



# Iterar sobre las admisiones médicas y combinar la información del paciente
for admission in load_json('admissions.json'):
    subject_id = admission['subject_id']
    patient = patients_dict.get(subject_id)
    # Comprobar si el paciente está presente en el JSON de pacientes
    if patient:
        # Si el paciente ya está en el diccionario combinado, agregar la admisión médica a su lista
        if subject_id in combined_data:
            combined_data[subject_id]['admissions'].append(admission)
        # Si el paciente no está en el diccionario combinado, crear una nueva entrada
        else:
            combined_data[subject_id] = patient.copy()
            combined_data[subject_id]['admissions'] = [admission]

# Guardar la información combinada como un archivo JSON
with open('combined_data.json', 'w') as json_file:
    json.dump(list(combined_data.values()), json_file, indent=2)


Redactamos el texto de cada paciente

In [16]:
import json

# Función para generar texto a partir de una instancia del JSON
def generate_text_from_instance(instance):
    text_by_id = {}
    for patient_data in instance:
        patient_id = patient_data["subject_id"]
        patient_text = "Patient Info:\n"
        for key, value in patient_data.items():
            patient_text += f"The {key} of the patient is {value}. "
        for admission in patient_data['admissions']:
            for key, value in admission.items():
                patient_text += f"The {key} is {value}. "
            patient_text += "\n"
        patient_text += "\n"
        # Almacenar la descripción de texto por ID
        text_by_id[patient_id] = patient_text
    return text_by_id


# Función para generar texto a partir de una instancia del JSON
def generate_text_from_instance_2(instance):
    text_by_id = {}
    for patient_data in instance:
        patient_id = int(patient_data["subject_id"])
        patient_text = ["Patient Info:"]
        for key, value in patient_data.items():
            if key != 'admissions':
                patient_text.append(f"The {key} of the patient is {value}.")
        for admission in patient_data.get('admissions', []):
            admission_text = []
            for key, value in admission.items():
                admission_text.append(f"The {key} is {value}.")
            patient_text.append(" ".join(admission_text))
        # Almacenar la descripción de texto por ID
        text_by_id[patient_id] = "\n".join(patient_text)
    return text_by_id




In [17]:
# Leer los datos del archivo JSON
with open('combined_data.json') as f:
    data = json.load(f)

Buscamos un paciente concreto

In [18]:
# Generar texto para el JSON y almacenarlo por ID
text_dict = generate_text_from_instance_2(data)
# Guardar la información combinada como un archivo JSON
with open('serialized_patient.json', 'w') as json_file:
    json.dump(list(text_dict.values()), json_file, indent=2)

In [20]:
# Función para buscar la información por ID
def search_by_id(patient_id):
    return text_dict.get(patient_id, "ID not found")

# Ejemplo de búsqueda
patient_id_to_search = "10322234"
print(search_by_id(patient_id_to_search))

Patient Info:
The subject_id of the patient is 10322234. The gender of the patient is F. The anchor_age of the patient is 53. The anchor_year of the patient is 2122. The anchor_year_group of the patient is 2008 - 2010. The dod of the patient is . The admissions of the patient is [{'subject_id': '10322234', 'hadm_id': '29789116', 'admittime': '2122-02-25 01:06:00', 'dischtime': '2122-02-25 12:42:00', 'deathtime': '', 'admission_type': 'EU OBSERVATION', 'admission_location': 'EMERGENCY ROOM', 'discharge_location': '', 'insurance': 'Other', 'language': '?', 'marital_status': 'MARRIED', 'ethnicity': 'ASIAN', 'edregtime': '2122-02-24 21:30:00', 'edouttime': '2122-02-25 12:42:00', 'hospital_expire_flag': '0'}]. The subject_id is 10322234. The hadm_id is 29789116. The admittime is 2122-02-25 01:06:00. The dischtime is 2122-02-25 12:42:00. The deathtime is . The admission_type is EU OBSERVATION. The admission_location is EMERGENCY ROOM. The discharge_location is . The insurance is Other. The l

# Transfers + Admissions

In [9]:
# Utiliza la función csv_a_json con los nombres de tus archivos CSV y JSON
csv2json('transfers.csv', 'transfers.json')

In [30]:
# Crear un diccionario para almacenar la información combinada
combined_data = {}

# Construir un diccionario de pacientes para una búsqueda eficiente
admission_dict = {}

for admission in load_json('admissions.json'):
    admission_dict[admission['hadm_id']] = admission

# Iterar sobre las admisiones médicas y combinar la información del paciente
for transfer in load_json('transfers.json'):
    hadm_id = transfer['hadm_id']
    admission = admission.get(hadm_id)
    # Comprobar si el paciente está presente en el JSON de pacientes
    if admission:
        # Si el paciente ya está en el diccionario combinado, agregar la admisión médica a su lista
        if hadm_id in combined_data:
            combined_data[hadm_id]['transfers'].append(transfer)
        # Si el paciente no está en el diccionario combinado, crear una nueva entrada
        else:
            combined_data[hadm_id] = transfer.copy()
            combined_data[hadm_id]['transfers'] = [transfer]

# Guardar la información combinada como un archivo JSON
with open('combined_data_TA.json', 'w') as json_file:
    json.dump(list(combined_data.values()), json_file, indent=2)


AttributeError: 'NoneType' object has no attribute 'get'

In [22]:
import json

# Función para cargar datos desde archivos CSV
def load_data_from_csv(file_path):
    data = {}
    with open(file_path, 'r') as file:
        next(file)  # Saltar la primera línea que contiene los encabezados
        for line in file:
            fields = line.strip().split(',')
            subject_id = fields[0]
            hadm_id = fields[1]
            transfer_id = fields[2]
            eventtype = fields[3]
            careunit = fields[4]
            intime = fields[5]
            outtime = fields[6]
            admission_data = {
                "hadm_id": hadm_id,
                "transfer_id": transfer_id,
                "eventtype": eventtype,
                "careunit": careunit,
                "intime": intime,
                "outtime": outtime
            }
            if subject_id not in data:
                data[subject_id] = {}
            if hadm_id not in data[subject_id]:
                data[subject_id][hadm_id] = []
            data[subject_id][hadm_id].append(admission_data)
    return data

# Cargar datos de los archivos CSV
transfer_data = load_data_from_csv('transfers.csv')
admission_data = load_data_from_csv('admissions.csv')

# Combinar los datos de transfer y admission
combined_data = {}
for subject_id, transfers in transfer_data.items():
    combined_data[f"subject_id_{subject_id}"] = {}
    for hadm_id, admissions in admission_data.get(subject_id, {}).items():
        combined_data[f"subject_id_{subject_id}"][f"hamd_id_{hadm_id}"] = {
            "admissions": admissions,
            "transfers": transfers.get(hadm_id, [])
        }


# Guardar datos combinados en un archivo JSON
with open('combined_data_TA_2.json', 'w') as json_file:
    json.dump(combined_data, json_file, indent=4)

print("Datos combinados guardados en combined_data.json")


Datos combinados guardados en combined_data.json


In [51]:
import json

def combine_data(transfers, admissions):
    combined_data = {}
    
    # Iterar sobre los datos de Transfer
    for transfer in transfers:
        subject_id = transfer['subject_id']
        if subject_id not in combined_data:
            combined_data[subject_id] = {'transfers': []}
        
        # Agregar el transfer a la lista de transfers asociados al hadm_id
        combined_data[subject_id]['transfers'].append(transfer)
    
    # Iterar sobre los datos de Admission
    for admission in admissions:
        hadm_id = admission['hadm_id']
        subject_id = admission['subject_id']
        if hadm_id in combined_data[subject_id]['transfers']:
            # Asignar la información de admisión al hadm_id correspondiente
            combined_data[subject_id]['transfers'].append(admission)
    
    return combined_data

In [48]:

# Guardar los datos combinados en un archivo JSON
with open('combined_data_prueba.json', 'w') as f:
    json.dump(combined_data, f, indent=4)


In [21]:
# Leer los datos del archivo JSON
with open('combined_data_TA.json') as f:
    data = json.load(f)

In [22]:
# Generar texto para el JSON y almacenarlo por ID
text_dict_TA = generate_text_from_instance(data)

# Guardar los datos combinados en un archivo JSON
with open('serialized_text_TA.json', 'w') as json_file:
    json.dump(list(text_dict.values()), json_file, indent=2)

In [27]:
# Función para buscar la información por ID
def search_by_id(patient_id):
    return text_dict_TA.get(patient_id, "ID not found")

# Ejemplo de búsqueda
patient_id_to_search = "13588367"
print(search_by_id(patient_id_to_search))

Patient Info:
The subject_id of the patient is 13588367. The hadm_id of the patient is 27078906. The transfer_id of the patient is 30984755. The eventtype of the patient is transfer. The careunit of the patient is Medicine. The intime of the patient is 2156-04-01 18:11:03. The outtime of the patient is 2156-04-05 13:05:39. The admissions of the patient is [{'subject_id': '13588367', 'hadm_id': '27078906', 'admittime': '2156-03-26 19:48:00', 'dischtime': '2156-04-05 13:05:00', 'deathtime': '', 'admission_type': 'EW EMER.', 'admission_location': 'EMERGENCY ROOM', 'discharge_location': 'HOME', 'insurance': 'Medicare', 'language': 'ENGLISH', 'marital_status': 'SINGLE', 'ethnicity': 'WHITE', 'edregtime': '2156-03-26 17:42:00', 'edouttime': '2156-03-26 20:11:00', 'hospital_expire_flag': '0'}]. The subject_id is 13588367. The hadm_id is 27078906. The admittime is 2156-03-26 19:48:00. The dischtime is 2156-04-05 13:05:00. The deathtime is . The admission_type is EW EMER.. The admission_locatio