**Estudio sobre las Patologías asociadas al COVID-19 y Ébola.**

Dentro de este Script se definirán las tablas con las que se trabajará posteriormente en un modelado de datos. Dentro del modelado de datos se trabajará con datos totalmente ficticios para comprobar cómo se comportan los modelos frente al modelo de la Base de Datos propuesto.

In [0]:
# librerias utilizadas

import copy
import numpy as np
import pandas as pd
from google.colab import drive
from random import uniform

In [3]:
#nos conectamos a las plantillas generadas en csv

drive.mount('/gdrive')
analitica_template = pd.read_excel('/gdrive/My Drive/Pathologies Study/templates/analitica_plantilla.xlsx')
analitica_range = pd.read_excel('/gdrive/My Drive/Pathologies Study/templates/analitica_rangos.xlsx')
anamnesis_template = pd.read_excel('/gdrive/My Drive/Pathologies Study/templates/anamnesis_plantilla.xlsx')
anamnesis_range = pd.read_excel('/gdrive/My Drive/Pathologies Study/templates/anamnesis_rangos.xlsx')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive


In [0]:
#definimos los rangos de sobreinfección y transtorno de la coagulación

sobreinfec = pd.DataFrame([["procalcitonina", 1, 2]],columns= ["Parámetro", "minimo", "maximo"])
trans_coa = pd.DataFrame([["d_dímero", 1000, 1500], ["nt-probnp", 125, 200]],columns= ["Parámetro", "minimo", "maximo"])

In [0]:
# definimos los diferentes escenarios para generar los valores aleatorios

clusters = ["Inflamación Intensa", "Inflamación Moderada", "Inflamación Leve"]
patron_adiccional = ["Ninguno", "SobreInfección", "Transtorno Coagulación"]

In [0]:
# definimos una función para generar valores aleatorios (establecemos un 20% de probabilidades de estar dentro de sobreinfección y un 20% de probabilidades de tener transtorno de coagulación)

def generate_random_values(cluster):
    template = copy.deepcopy(analitica_range)

    #definimos los aleatorios a realizar en función del cluster introducido
    if cluster == "Inflamación Intensa":
        template['Value'] = template.apply(lambda x: round(uniform(int(x.Mínimo_1), int(x.Máximo_1)), 2), axis = 1)
    elif cluster == "Inflamación Moderada":
        template['Value'] = template.apply(lambda x: round(uniform(int(x.Mínimo_2), int(x.Máximo_2)), 2), axis = 1)
    elif cluster == "Inflamación Leve":
        template['Value'] = template.apply(lambda x: round(uniform(int(x.Mínimo_3), int(x.Máximo_3)), 2), axis = 1)
    else:
        raise Exception('El cluster introducido no es válido, debe de estar dentro del rango: Inflamación Intensa, Inflamación Moderada, Inflamación Leve')


    #eliminamos las columnas que ya no son necesarias
    template.drop(['Mínimo_1', 'Máximo_1', 'Mínimo_2', 'Máximo_2', 'Mínimo_3', 'Máximo_3'], axis = 1, inplace = True)

    #establecemos un 20% de probabilidades de tener sobreinfección
    proba_sobr = uniform(0, 1)
    if proba_sobr >= 0.8:
        template['Value'] = template.apply(lambda x: uniform(sobreinfec['minimo'][0], sobreinfec['maximo'][0]) if x.Parámetro == sobreinfec['Parámetro'][0] else x.Value, axis = 1)

    #establecemos un 20% de probabilidades de tener transtorno de coagulación
    proba_coag = uniform(0, 1)
    if proba_coag >= 0.8:
        for i in range(trans_coa.shape[0]):
          template['Value'] = template.apply(lambda x: uniform(trans_coa['minimo'][i], trans_coa['maximo'][i]) if x.Parámetro == trans_coa['Parámetro'][i] else x.Value, axis = 1)

    #modificamso los datos obtenidos para poder introducirlos en el sistema
    template = template.set_index(['Parámetro']).transpose().reset_index().drop(['index'], axis = 1)
    template.index.names = [""]
    template['Cluster'] = cluster
    return template

In [0]:
#ejecutamos por primera vez el generador de datos para obtener el DataFrame base

data = generate_random_values(clusters[0])

In [164]:
#generamos 200 muestras de datos para cada uno de los cluster

for j in range(len(clusters)):
  print (j)
  for i in range(200):
      data = data.append(generate_random_values(clusters[0]))



0
1
2


In [165]:
data

Parámetro,leucocitos,hematies,hemoglobina,hematocrito,plaquetas,neutrofilos,linfocitos,monocitos,eosinófilos,actividad_de_protrombina,inr,fibrinogeno_derivado,tiempo_de_cefalina,ferritina,d_dímero,glucosa_en_suero,creatinina_en_suero,filtrado_glomerular(ckd-epi),sodio_en_suero,potasio_en_suero,cloro_en_suero,asat/got,alat/gpt,ggt,proteína_c_reactiva,procalcitonina,interleuquina-6,interleuqiona-1,proteinas_totales_en_suero,nt-probnp,ph_sangre_arterial,po2 _sangre_arterial,pco2_sangre_arterial,lactato,bicarbonato_sangre_arterial,bicarbonato_std_sangre_arterial,exceso_de_bases_standard,calcio_ionizado,calcio_ionizado_corregido_ph_7.40,anion_gap
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,7.53,4.26,14.24,45.63,208.89,52.67,22.25,7.91,2.96,73.03,0.36,201.88,79.73,3591.63,241.15,93.38,0.71,203.73,141.64,3.52,108.93,4.58,7.83,5.97,14.97,0.70,3.22,24.72,6.75,53.69,7.0,91.28,47.93,0.99,24.24,22.51,1.59,1.0,1.0,8.69
0,6.63,4.76,14.12,44.17,177.89,70.81,20.70,5.63,2.34,90.89,0.10,257.04,95.18,3589.43,424.08,88.21,0.22,526.13,143.17,4.15,103.74,2.93,28.11,25.21,14.79,0.83,4.88,18.54,6.14,23.67,7.0,90.53,42.21,0.82,24.58,25.15,1.34,1.0,1.0,9.96
0,8.70,4.72,13.14,39.47,177.79,44.36,27.38,6.82,1.38,73.23,0.79,181.66,70.68,4313.36,189.83,103.55,0.27,877.85,137.01,4.37,106.69,13.48,2.15,15.96,28.38,0.30,4.32,20.65,7.51,17.73,7.0,83.77,45.18,0.96,25.99,25.95,1.06,1.0,1.0,12.99
0,8.59,4.59,15.37,43.43,256.03,49.67,40.57,8.15,2.29,98.37,0.18,386.92,74.85,4101.57,370.87,75.15,0.15,311.53,136.77,4.71,106.86,5.43,23.35,69.14,27.11,0.76,4.80,18.38,7.20,97.39,7.0,92.87,41.93,0.57,24.76,25.53,1.27,1.0,1.0,10.34
0,6.47,4.23,15.03,41.24,179.46,44.35,28.16,6.72,3.17,115.84,0.55,301.72,10.38,5897.89,253.59,100.95,0.87,482.59,143.45,4.38,106.96,36.18,16.36,42.94,22.45,0.05,3.96,23.73,6.62,0.26,7.0,83.88,36.45,0.10,23.16,23.65,1.67,1.0,1.0,9.46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,8.14,4.43,14.84,37.88,246.86,67.46,43.43,5.45,0.72,90.36,0.87,169.48,4.98,5282.41,101.78,89.41,0.36,908.66,136.92,3.04,104.80,23.07,4.15,19.60,22.54,0.52,3.05,22.24,7.79,93.21,7.0,91.48,40.12,0.31,23.56,25.62,0.04,1.0,1.0,14.71
0,9.46,4.51,12.96,42.66,205.04,45.18,29.82,2.03,1.04,111.75,0.39,412.56,42.96,5931.62,2.99,98.92,0.60,389.73,142.97,4.84,105.13,36.57,29.16,32.16,12.24,0.61,3.69,19.13,6.68,55.61,7.0,86.70,38.44,0.44,23.60,23.74,1.19,1.0,1.0,15.19
0,7.58,4.27,15.76,38.63,204.29,68.26,24.51,8.41,3.18,112.59,0.70,251.91,34.21,5955.02,223.73,89.61,0.95,387.85,138.35,4.07,105.45,15.69,23.85,47.98,28.66,0.76,4.03,20.53,6.22,0.34,7.0,102.32,35.83,0.56,23.83,22.09,1.01,1.0,1.0,13.48
