Dentro de este Notebook se realizará la lógica necesaria para la generación de datos aleatorios necesarios para posteriores desarrollos

In [0]:
# librerias utilizadas

import copy
import numpy as np
import pandas as pd
from google.colab import drive
from random import uniform

In [32]:
#nos conectamos a las plantillas generadas en csv

drive.mount('/gdrive')
analitica_template = pd.read_excel('/gdrive/My Drive/Pathologies Study/COVID-Study/templates/analitica_plantilla.xlsx')
analitica_range = pd.read_excel('/gdrive/My Drive/Pathologies Study/COVID-Study/templates/analitica_rangos.xlsx')
anamnesis_template = pd.read_excel('/gdrive/My Drive/Pathologies Study/COVID-Study/templates/anamnesis_plantilla.xlsx')
anamnesis_range = pd.read_excel('/gdrive/My Drive/Pathologies Study/COVID-Study/templates/anamnesis_rangos.xlsx')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
#definimos los rangos de sobreinfección y transtorno de la coagulación

sobreinfec = pd.DataFrame([["procalcitonina", 1, 2]],columns= ["Parámetro", "minimo", "maximo"])
trans_coa = pd.DataFrame([["d_dímero", 1000, 1500], ["nt-probnp", 125, 200]],columns= ["Parámetro", "minimo", "maximo"])

In [0]:
# definimos los diferentes escenarios para generar los valores aleatorios

clusters = ["Inflamación Intensa", "Inflamación Moderada", "Inflamación Leve"]
patron_adiccional = ["Ninguno", "SobreInfección", "Transtorno Coagulación"]

In [0]:
# definimos una función para generar valores aleatorios (establecemos un 20% de probabilidades de estar dentro de sobreinfección y un 20% de probabilidades de tener transtorno de coagulación)

def generate_random_values(cluster):
    template = copy.deepcopy(analitica_range)

    #definimos los aleatorios a realizar en función del cluster introducido
    if cluster == "Inflamación Intensa":
        template['Value'] = template.apply(lambda x: round(uniform(x.Mínimo_1, x.Máximo_1), 2), axis = 1)
    elif cluster == "Inflamación Moderada":
        template['Value'] = template.apply(lambda x: round(uniform(x.Mínimo_2, x.Máximo_2), 2), axis = 1)
    elif cluster == "Inflamación Leve":
        template['Value'] = template.apply(lambda x: round(uniform(x.Mínimo_3, x.Máximo_3), 2), axis = 1)
    else:
        raise Exception('El cluster introducido no es válido, debe de estar dentro del rango: Inflamación Intensa, Inflamación Moderada, Inflamación Leve')


    #eliminamos las columnas que ya no son necesarias
    template.drop(['Mínimo_1', 'Máximo_1', 'Mínimo_2', 'Máximo_2', 'Mínimo_3', 'Máximo_3'], axis = 1, inplace = True)

    #establecemos un 20% de probabilidades de tener sobreinfección
    proba_sobr = uniform(0, 1)
    if proba_sobr >= 0.8:
        template['Value'] = template.apply(lambda x: round(uniform(sobreinfec['minimo'][0], sobreinfec['maximo'][0]), 2) if x.Parámetro == sobreinfec['Parámetro'][0] else x.Value, axis = 1)

    #establecemos un 20% de probabilidades de tener transtorno de coagulación
    proba_coag = uniform(0, 1)
    if proba_coag >= 0.8:
        for i in range(trans_coa.shape[0]):
          template['Value'] = template.apply(lambda x: round(uniform(trans_coa['minimo'][i], trans_coa['maximo'][i]), 2) if x.Parámetro == trans_coa['Parámetro'][i] else x.Value, axis = 1)

    #modificamos los datos obtenidos para poder introducirlos en el sistema
    template = template.set_index(['Parámetro']).transpose().reset_index().drop(['index'], axis = 1)
    template.index.names = [""]
    template['Cluster'] = cluster
    return template

In [0]:
#ejecutamos por primera vez el generador de datos para obtener el DataFrame base

data = generate_random_values(clusters[0])

In [0]:
#generamos 200 muestras de datos para cada uno de los cluster

for j in range(len(clusters)):
  for i in range(200):
      data = data.append(generate_random_values(clusters[j]))

In [38]:
data

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap,Cluster
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,4.32,4.73,16.29,40.31,325.66,67.92,37.28,5.01,2.97,109.27,1.01,215.76,32.06,3160.46,330.55,97.10,0.90,271.72,139.29,4.29,105.32,17.28,2.16,4.36,28.52,0.82,4.78,21.87,7.00,110.28,7.36,88.23,37.71,0.63,23.95,24.92,0.96,1.26,1.20,12.57,Inflamación Intensa
0,8.27,5.17,17.04,42.72,301.52,69.25,41.19,5.07,1.78,101.31,1.01,179.83,7.92,3605.29,1330.63,86.25,1.21,519.27,137.95,4.80,105.31,37.79,30.03,9.24,10.35,0.24,4.39,19.03,8.11,126.41,7.39,106.92,44.80,1.33,25.98,22.07,0.99,1.26,1.24,12.23,Inflamación Intensa
0,7.24,4.11,13.63,42.46,333.06,70.35,35.02,5.57,3.82,88.87,1.02,339.85,18.11,4047.72,1008.55,79.34,1.09,756.94,137.08,4.56,108.44,36.65,19.11,13.94,22.34,0.62,4.93,19.26,7.77,164.41,7.36,87.08,45.08,1.58,25.72,23.57,1.48,1.22,1.24,9.85,Inflamación Intensa
0,5.61,4.17,13.43,38.20,345.84,55.60,33.23,6.54,4.97,77.01,1.03,416.22,34.02,4425.47,441.21,103.21,1.20,232.74,144.30,3.53,101.66,16.54,29.87,61.49,17.87,0.25,4.19,24.64,6.84,65.33,7.43,89.44,40.38,0.88,22.38,24.50,0.54,1.27,1.20,14.84,Inflamación Intensa
0,3.99,5.16,15.58,42.46,308.91,46.74,39.62,3.88,4.34,89.06,0.99,356.06,24.16,3148.77,51.59,96.67,1.02,718.21,138.09,4.97,106.56,4.61,4.37,55.99,18.06,1.29,4.81,21.28,7.16,47.48,7.36,101.30,44.51,1.40,23.52,22.53,0.19,1.20,1.16,14.74,Inflamación Intensa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,3.80,5.39,15.69,44.47,197.30,51.17,42.98,6.04,4.92,78.73,1.16,223.21,80.32,5192.19,407.85,75.91,1.09,97.70,139.08,3.77,106.53,11.09,12.09,43.04,5.06,0.89,2.65,20.75,6.73,103.20,7.42,90.78,40.54,1.17,22.06,23.54,1.31,1.22,1.29,11.06,Inflamación Leve
0,10.49,5.10,15.73,44.43,351.89,67.68,25.92,5.87,3.79,110.29,0.90,253.34,22.12,3061.99,70.07,84.57,1.04,303.60,141.92,4.03,102.41,35.56,16.83,56.01,2.90,0.45,0.41,19.83,8.23,8.33,7.40,95.21,41.22,0.72,24.45,25.76,0.53,1.19,1.27,10.84,Inflamación Leve
0,7.51,4.73,13.53,40.63,248.91,58.63,30.79,5.75,2.75,70.91,0.96,194.24,98.41,4794.68,469.86,101.86,1.11,58.38,139.01,3.94,108.01,36.51,26.53,63.97,0.82,0.48,2.69,18.51,7.55,50.71,7.45,101.94,40.95,0.54,22.23,24.22,0.28,1.16,1.24,15.44,Inflamación Leve


**En los siguientes DataFrames podemos visualizar diferentes muestras de los datos aleatorios generados**

Muestra de pacientes con Inflamación Intensa

In [39]:
data[data['Cluster'] == "Inflamación Intensa"].head()

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap,Cluster
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0.0,4.32,4.73,16.29,40.31,325.66,67.92,37.28,5.01,2.97,109.27,1.01,215.76,32.06,3160.46,330.55,97.1,0.9,271.72,139.29,4.29,105.32,17.28,2.16,4.36,28.52,0.82,4.78,21.87,7.0,110.28,7.36,88.23,37.71,0.63,23.95,24.92,0.96,1.26,1.2,12.57,Inflamación Intensa
0.0,8.27,5.17,17.04,42.72,301.52,69.25,41.19,5.07,1.78,101.31,1.01,179.83,7.92,3605.29,1330.63,86.25,1.21,519.27,137.95,4.8,105.31,37.79,30.03,9.24,10.35,0.24,4.39,19.03,8.11,126.41,7.39,106.92,44.8,1.33,25.98,22.07,0.99,1.26,1.24,12.23,Inflamación Intensa
0.0,7.24,4.11,13.63,42.46,333.06,70.35,35.02,5.57,3.82,88.87,1.02,339.85,18.11,4047.72,1008.55,79.34,1.09,756.94,137.08,4.56,108.44,36.65,19.11,13.94,22.34,0.62,4.93,19.26,7.77,164.41,7.36,87.08,45.08,1.58,25.72,23.57,1.48,1.22,1.24,9.85,Inflamación Intensa
0.0,5.61,4.17,13.43,38.2,345.84,55.6,33.23,6.54,4.97,77.01,1.03,416.22,34.02,4425.47,441.21,103.21,1.2,232.74,144.3,3.53,101.66,16.54,29.87,61.49,17.87,0.25,4.19,24.64,6.84,65.33,7.43,89.44,40.38,0.88,22.38,24.5,0.54,1.27,1.2,14.84,Inflamación Intensa
0.0,3.99,5.16,15.58,42.46,308.91,46.74,39.62,3.88,4.34,89.06,0.99,356.06,24.16,3148.77,51.59,96.67,1.02,718.21,138.09,4.97,106.56,4.61,4.37,55.99,18.06,1.29,4.81,21.28,7.16,47.48,7.36,101.3,44.51,1.4,23.52,22.53,0.19,1.2,1.16,14.74,Inflamación Intensa


Muestra de pacientes con Inflamación Moderada

In [40]:
data[data['Cluster'] == "Inflamación Moderada"].head()

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap,Cluster
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0.0,7.43,5.61,13.47,37.16,300.17,49.18,43.44,4.23,1.86,117.65,0.86,310.12,38.68,1763.47,401.0,78.76,0.84,483.52,142.2,4.27,106.24,16.07,13.33,62.43,18.13,0.17,4.27,22.43,7.0,32.55,7.39,96.53,40.93,0.53,25.82,24.42,0.82,1.16,1.19,10.24,Inflamación Moderada
0.0,8.52,5.64,13.5,39.04,185.3,52.25,27.25,5.27,2.49,104.95,0.87,190.56,29.81,2332.67,1355.66,104.58,1.12,319.12,139.93,5.09,99.56,10.49,21.72,4.58,11.13,0.02,3.63,21.88,7.7,175.92,7.41,92.87,38.74,0.87,23.65,22.42,1.55,1.26,1.22,14.83,Inflamación Moderada
0.0,7.25,4.08,15.97,45.76,222.27,65.74,30.55,3.31,1.92,118.48,0.81,376.21,82.55,1911.98,105.16,77.35,0.99,69.53,141.49,3.97,105.21,12.25,33.86,44.69,28.49,0.96,4.17,23.91,7.8,13.03,7.36,95.98,43.1,0.79,23.66,23.32,0.32,1.2,1.24,11.66,Inflamación Moderada
0.0,6.58,4.76,14.65,39.82,279.1,46.46,27.87,4.99,1.7,74.3,1.12,288.78,30.06,2556.52,450.0,93.07,0.71,288.51,144.18,4.85,99.59,19.82,5.92,29.66,18.26,0.52,4.35,21.86,6.64,75.22,7.44,83.81,40.99,1.35,22.46,25.97,1.19,1.28,1.23,11.37,Inflamación Moderada
0.0,8.61,4.21,17.11,44.27,307.26,55.99,30.69,7.66,1.57,96.57,1.1,195.02,42.29,2830.14,255.2,83.91,1.1,598.88,137.05,3.71,102.7,11.13,8.92,23.55,13.76,0.51,3.81,24.65,6.62,29.51,7.36,93.01,40.73,1.3,25.62,25.25,1.83,1.23,1.25,15.92,Inflamación Moderada


Muestra de pacientes con Inflamación Leve

In [41]:
data[data['Cluster'] == "Inflamación Leve"].head()

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap,Cluster
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0.0,7.15,4.98,16.7,47.6,194.27,49.82,20.94,5.79,1.58,96.92,0.91,222.53,85.74,5431.73,486.1,105.45,0.82,889.25,144.88,4.17,103.66,12.69,23.0,65.21,8.77,0.93,0.82,18.03,7.8,4.25,7.41,95.03,38.26,1.2,25.95,23.86,0.61,1.16,1.24,12.59,Inflamación Leve
0.0,6.76,5.09,14.26,40.5,276.73,48.78,42.35,8.76,0.81,72.89,1.01,311.62,9.06,3849.02,335.78,89.5,1.25,326.82,136.59,4.5,107.79,34.72,32.14,58.64,7.4,0.22,1.58,22.46,8.02,83.76,7.36,107.67,46.74,0.71,22.68,24.55,0.14,1.24,1.25,11.66,Inflamación Leve
0.0,4.64,4.02,14.86,48.88,284.5,76.3,24.57,8.16,1.88,90.64,0.81,432.25,64.95,3937.04,1078.99,103.49,0.85,657.94,140.72,4.97,104.52,28.84,13.13,31.71,7.31,0.57,0.53,21.66,6.73,184.0,7.4,90.98,37.19,1.35,24.16,24.82,1.71,1.28,1.28,8.78,Inflamación Leve
0.0,9.87,4.27,14.05,44.56,243.17,51.11,23.72,3.92,5.3,103.82,1.17,348.78,32.71,3990.77,7.1,74.51,1.07,462.39,140.52,3.84,99.73,28.91,14.57,23.08,0.4,1.08,1.91,22.59,7.96,49.9,7.39,83.44,35.17,0.99,24.88,22.74,1.73,1.27,1.24,11.49,Inflamación Leve
0.0,4.62,5.21,15.29,41.45,238.14,69.01,22.71,5.87,2.39,106.78,0.99,207.18,16.78,4764.17,39.99,97.98,0.86,75.41,145.0,3.61,108.86,15.38,30.02,5.73,0.77,0.89,0.12,22.32,6.84,33.02,7.45,86.51,41.59,0.94,24.15,23.0,0.16,1.21,1.24,15.78,Inflamación Leve


In [0]:
#exportamos los datos que utilizaremos en otros scripts

data.to_csv('/gdrive/My Drive/Pathologies Study/COVID-Study/data/random_data.csv', sep = ";", encoding = 'latin-1', index = False)