**Estudio sobre las Patologías asociadas al COVID-19 y Ébola.**

Dentro de este Script se definirán las tablas con las que se trabajará posteriormente en un modelado de datos. Dentro del modelado de datos se trabajará con datos totalmente ficticios para comprobar cómo se comportan los modelos frente al modelo de la Base de Datos propuesto.

In [0]:
# librerias utilizadas

import copy
import numpy as np
import pandas as pd
from google.colab import drive
from random import uniform

In [168]:
#nos conectamos a las plantillas generadas en csv

drive.mount('/gdrive')
analitica_template = pd.read_excel('/gdrive/My Drive/Pathologies Study/templates/analitica_plantilla.xlsx')
analitica_range = pd.read_excel('/gdrive/My Drive/Pathologies Study/templates/analitica_rangos.xlsx')
anamnesis_template = pd.read_excel('/gdrive/My Drive/Pathologies Study/templates/anamnesis_plantilla.xlsx')
anamnesis_range = pd.read_excel('/gdrive/My Drive/Pathologies Study/templates/anamnesis_rangos.xlsx')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
#definimos los rangos de sobreinfección y transtorno de la coagulación

sobreinfec = pd.DataFrame([["procalcitonina", 1, 2]],columns= ["Parámetro", "minimo", "maximo"])
trans_coa = pd.DataFrame([["d_dímero", 1000, 1500], ["nt-probnp", 125, 200]],columns= ["Parámetro", "minimo", "maximo"])

In [0]:
# definimos los diferentes escenarios para generar los valores aleatorios

clusters = ["Inflamación Intensa", "Inflamación Moderada", "Inflamación Leve"]
patron_adiccional = ["Ninguno", "SobreInfección", "Transtorno Coagulación"]

In [0]:
# definimos una función para generar valores aleatorios (establecemos un 20% de probabilidades de estar dentro de sobreinfección y un 20% de probabilidades de tener transtorno de coagulación)

def generate_random_values(cluster):
    template = copy.deepcopy(analitica_range)

    #definimos los aleatorios a realizar en función del cluster introducido
    if cluster == "Inflamación Intensa":
        template['Value'] = template.apply(lambda x: round(uniform(int(x.Mínimo_1), int(x.Máximo_1)), 2), axis = 1)
    elif cluster == "Inflamación Moderada":
        template['Value'] = template.apply(lambda x: round(uniform(int(x.Mínimo_2), int(x.Máximo_2)), 2), axis = 1)
    elif cluster == "Inflamación Leve":
        template['Value'] = template.apply(lambda x: round(uniform(int(x.Mínimo_3), int(x.Máximo_3)), 2), axis = 1)
    else:
        raise Exception('El cluster introducido no es válido, debe de estar dentro del rango: Inflamación Intensa, Inflamación Moderada, Inflamación Leve')


    #eliminamos las columnas que ya no son necesarias
    template.drop(['Mínimo_1', 'Máximo_1', 'Mínimo_2', 'Máximo_2', 'Mínimo_3', 'Máximo_3'], axis = 1, inplace = True)

    #establecemos un 20% de probabilidades de tener sobreinfección
    proba_sobr = uniform(0, 1)
    if proba_sobr >= 0.8:
        template['Value'] = template.apply(lambda x: uniform(sobreinfec['minimo'][0], sobreinfec['maximo'][0]) if x.Parámetro == sobreinfec['Parámetro'][0] else x.Value, axis = 1)

    #establecemos un 20% de probabilidades de tener transtorno de coagulación
    proba_coag = uniform(0, 1)
    if proba_coag >= 0.8:
        for i in range(trans_coa.shape[0]):
          template['Value'] = template.apply(lambda x: uniform(trans_coa['minimo'][i], trans_coa['maximo'][i]) if x.Parámetro == trans_coa['Parámetro'][i] else x.Value, axis = 1)

    #modificamso los datos obtenidos para poder introducirlos en el sistema
    template = template.set_index(['Parámetro']).transpose().reset_index().drop(['index'], axis = 1)
    template.index.names = [""]
    template['Cluster'] = cluster
    return template

In [0]:
#ejecutamos por primera vez el generador de datos para obtener el DataFrame base

data = generate_random_values(clusters[0])

In [0]:
#generamos 200 muestras de datos para cada uno de los cluster

for j in range(len(clusters)):
  for i in range(200):
      data = data.append(generate_random_values(clusters[j]))

In [184]:
data

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap,Cluster
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,4.84,4.50,14.54,37.30,332.68,54.32,34.83,7.63,3.52,118.73,0.42,350.26,9.90,3717.74,424.93000,103.93,0.53,964.23,143.68,3.39,99.60,37.55,24.10,51.40,22.70,0.590000,4.52,23.15,7.57,98.150000,7.0,95.58,47.76,0.43,23.87,22.22,1.55,1.0,1.0,10.77,Inflamación Intensa
0,6.93,4.06,14.60,46.18,263.68,60.78,34.40,5.34,1.98,114.97,0.38,182.66,78.16,5045.25,431.33000,83.51,0.30,561.98,144.55,3.15,102.28,15.24,23.60,34.69,29.33,0.470000,4.75,19.26,6.62,63.140000,7.0,87.45,42.42,0.32,23.58,24.13,1.63,1.0,1.0,8.46,Inflamación Intensa
0,9.39,4.97,12.93,47.63,189.68,56.82,38.44,8.70,3.76,72.29,0.70,362.20,96.59,2791.39,1320.20146,75.36,0.71,495.99,138.77,4.51,99.32,37.52,3.76,36.04,12.90,0.630000,4.17,20.05,6.26,135.001813,7.0,84.86,38.68,0.70,22.16,25.76,0.16,1.0,1.0,10.86,Inflamación Moderada
0,9.90,4.26,16.18,38.75,346.01,50.67,38.37,8.32,4.64,85.69,0.46,217.74,99.35,4596.17,346.58000,78.70,0.07,583.87,143.26,3.34,103.55,2.60,18.86,7.97,0.02,0.080000,1.91,21.76,7.70,21.460000,7.0,84.69,38.43,0.75,23.74,22.42,1.36,1.0,1.0,13.78,Inflamación Leve
0,8.73,4.65,14.38,41.27,232.01,51.23,29.21,8.17,0.80,116.73,0.79,423.48,42.08,5505.41,457.18000,98.40,0.00,112.11,144.57,3.19,103.40,21.56,12.79,30.15,12.33,0.760000,3.54,19.83,7.81,15.310000,7.0,95.92,43.69,0.67,24.28,25.53,0.29,1.0,1.0,9.62,Inflamación Intensa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,6.40,4.93,15.05,47.23,335.52,43.33,40.02,8.13,2.68,81.40,0.06,386.22,94.13,5315.61,8.30000,78.70,0.02,501.14,141.87,3.64,99.10,38.40,16.43,14.12,8.20,0.860000,0.64,19.66,7.70,47.920000,7.0,86.44,44.89,0.34,22.11,22.64,0.71,1.0,1.0,12.51,Inflamación Leve
0,3.00,4.38,14.83,46.20,190.22,56.08,23.19,6.31,1.55,114.54,0.09,167.11,89.53,4507.85,278.74000,85.97,0.58,916.48,139.62,3.46,103.01,39.54,22.04,33.56,8.47,0.410000,0.67,20.88,7.51,54.100000,7.0,87.35,38.02,0.55,23.52,22.03,0.93,1.0,1.0,11.80,Inflamación Leve
0,3.63,4.82,16.23,38.47,258.63,51.60,38.63,2.74,2.77,87.14,0.54,154.12,37.49,4193.10,158.72000,92.28,0.78,810.52,137.24,3.81,99.26,33.02,16.55,34.99,9.46,1.266784,1.97,18.57,7.25,108.950000,7.0,97.28,42.01,0.61,25.35,23.20,1.11,1.0,1.0,8.14,Inflamación Leve


**En los siguientes DataFrames podemos visualizar diferentes muestras de los datos aleatorios generados**

Muestra de pacientes con Inflamación Intensa

In [185]:
data[data['Cluster'] == "Inflamación Intensa"].head()

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap,Cluster
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0.0,4.84,4.5,14.54,37.3,332.68,54.32,34.83,7.63,3.52,118.73,0.42,350.26,9.9,3717.74,424.93,103.93,0.53,964.23,143.68,3.39,99.6,37.55,24.1,51.4,22.7,0.59,4.52,23.15,7.57,98.15,7.0,95.58,47.76,0.43,23.87,22.22,1.55,1.0,1.0,10.77,Inflamación Intensa
0.0,6.93,4.06,14.6,46.18,263.68,60.78,34.4,5.34,1.98,114.97,0.38,182.66,78.16,5045.25,431.33,83.51,0.3,561.98,144.55,3.15,102.28,15.24,23.6,34.69,29.33,0.47,4.75,19.26,6.62,63.14,7.0,87.45,42.42,0.32,23.58,24.13,1.63,1.0,1.0,8.46,Inflamación Intensa
0.0,8.73,4.65,14.38,41.27,232.01,51.23,29.21,8.17,0.8,116.73,0.79,423.48,42.08,5505.41,457.18,98.4,0.0,112.11,144.57,3.19,103.4,21.56,12.79,30.15,12.33,0.76,3.54,19.83,7.81,15.31,7.0,95.92,43.69,0.67,24.28,25.53,0.29,1.0,1.0,9.62,Inflamación Intensa
0.0,6.26,4.77,15.54,37.02,363.89,68.19,31.67,2.79,1.05,98.97,0.03,211.81,38.6,4081.74,186.11,78.83,0.73,574.85,136.79,4.3,99.65,33.72,2.37,15.62,13.8,0.08,4.68,22.89,7.34,36.3,7.0,94.0,35.48,0.25,24.46,25.55,0.33,1.0,1.0,8.81,Inflamación Intensa
0.0,6.17,4.93,15.01,41.99,274.72,44.05,21.49,2.13,0.99,96.03,0.86,389.12,68.85,3569.2,1418.099479,97.72,0.43,307.3,139.04,4.06,105.66,39.8,2.93,69.82,28.42,0.98,3.8,18.19,7.92,174.339589,7.0,92.04,37.02,0.86,22.32,24.72,1.1,1.0,1.0,8.82,Inflamación Intensa


Muestra de pacientes con Inflamación Moderada

In [186]:
data[data['Cluster'] == "Inflamación Moderada"].head()

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap,Cluster
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0.0,9.39,4.97,12.93,47.63,189.68,56.82,38.44,8.7,3.76,72.29,0.7,362.2,96.59,2791.39,1320.20146,75.36,0.71,495.99,138.77,4.51,99.32,37.52,3.76,36.04,12.9,0.63,4.17,20.05,6.26,135.001813,7.0,84.86,38.68,0.7,22.16,25.76,0.16,1.0,1.0,10.86,Inflamación Moderada
0.0,9.79,4.69,14.75,47.17,155.46,49.65,37.72,6.16,4.69,70.13,0.38,341.8,10.16,2309.23,23.92,83.59,0.3,956.22,137.69,3.51,108.48,8.0,10.65,28.51,13.26,1.155921,3.13,18.62,7.11,42.26,7.0,99.47,36.03,0.43,24.55,24.65,1.02,1.0,1.0,10.32,Inflamación Moderada
0.0,6.8,4.15,15.85,44.54,188.14,49.26,40.76,3.55,2.99,92.09,0.9,328.74,52.28,2948.52,170.53,90.44,0.46,691.33,139.24,4.23,106.47,19.13,26.16,59.82,20.72,1.753157,4.67,19.53,7.92,92.42,7.0,86.95,38.86,0.61,25.46,24.29,1.36,1.0,1.0,15.37,Inflamación Moderada
0.0,3.87,4.6,15.89,44.01,357.11,73.56,27.54,4.87,3.11,114.45,0.28,206.5,37.24,1786.73,297.28,100.29,0.16,683.39,139.56,4.96,100.6,31.91,21.75,67.48,16.9,0.95,3.31,18.54,6.92,91.66,7.0,93.61,40.92,0.21,23.61,24.71,1.02,1.0,1.0,14.98,Inflamación Moderada
0.0,8.01,4.4,16.3,41.32,202.0,72.45,21.63,4.73,1.3,115.02,0.01,164.11,43.34,2987.07,135.17,83.97,0.81,717.29,139.61,4.68,105.9,4.32,8.04,12.49,27.21,1.036525,4.94,20.26,6.28,99.97,7.0,97.58,35.91,0.21,24.15,24.11,1.13,1.0,1.0,15.07,Inflamación Moderada


Muestra de pacientes con Inflamación Leve

In [187]:
data[data['Cluster'] == "Inflamación Leve"].head()

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap,Cluster
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0.0,9.9,4.26,16.18,38.75,346.01,50.67,38.37,8.32,4.64,85.69,0.46,217.74,99.35,4596.17,346.58,78.7,0.07,583.87,143.26,3.34,103.55,2.6,18.86,7.97,0.02,0.08,1.91,21.76,7.7,21.46,7.0,84.69,38.43,0.75,23.74,22.42,1.36,1.0,1.0,13.78,Inflamación Leve
0.0,7.9,4.24,13.62,38.35,252.34,56.71,32.87,2.24,4.81,72.42,0.04,156.28,37.26,5072.8,40.78,91.05,0.86,284.5,136.4,4.87,106.53,15.65,21.22,46.1,3.36,0.19,2.85,24.59,7.76,36.13,7.0,84.41,35.02,0.89,24.4,22.92,1.87,1.0,1.0,9.0,Inflamación Leve
0.0,9.28,4.21,15.66,37.99,175.07,60.43,39.0,2.74,4.21,110.68,0.72,196.7,18.39,4995.1,210.43,103.05,0.44,853.24,144.66,3.44,107.57,13.73,15.2,37.33,2.54,0.32,2.63,20.29,6.07,102.07,7.0,90.55,47.75,0.72,23.18,22.35,1.36,1.0,1.0,10.06,Inflamación Leve
0.0,6.35,4.28,15.05,47.37,268.0,45.96,22.78,6.69,3.66,85.9,0.94,373.39,69.59,3129.71,289.89,92.91,0.37,140.94,136.02,4.64,104.63,37.83,9.5,30.19,2.19,1.854087,2.97,22.0,7.2,72.34,7.0,88.57,38.78,0.15,22.26,24.85,0.99,1.0,1.0,15.39,Inflamación Leve
0.0,9.43,4.01,14.49,38.18,167.51,75.34,34.79,2.03,2.76,105.13,0.92,422.69,36.24,3448.98,1446.645727,105.76,0.53,234.32,136.02,4.91,101.26,28.08,3.88,68.54,6.03,0.03,1.78,22.02,7.07,132.715465,7.0,100.01,35.62,0.73,22.61,23.93,1.07,1.0,1.0,12.96,Inflamación Leve
