# 🧠 Análisis de la Readmisión a la UCI en Pacientes con Hemorragia Intracerebral (MIMIC-IV)

## 1. 🔧 Configuración Inicial

In [ ]:
from google.cloud import bigquery
from google.colab import auth

# Autenticación
auth.authenticate_user()
project_id = "TU_PROYECTO"
client = bigquery.Client(project=project_id)

## 2. 📦 Extracción de Datos

In [ ]:
query = """
SELECT
  a.subject_id,
  a.hadm_id,
  a.stay_id,
  a.gender,
  a.admittime,
  a.dischtime,
  i.icd_code,
  d.icd_version,
  d.long_title,
  s.gcsverbal,
  s.gcsmotor,
  s.gcseyes,
  vitals.heart_rate,
  vitals.meanbp,
  vitals.resprate,
  vitals.tempc,
  vitals.spo2,
  readmit.stay_id AS readmitted_stay
FROM
  `physionet-data.mimic_icu.icustays` a
JOIN
  `physionet-data.mimic_hosp.diagnoses_icd` i
  ON a.hadm_id = i.hadm_id
JOIN
  `physionet-data.mimic_hosp.d_icd_diagnoses` d
  ON i.icd_code = d.icd_code
LEFT JOIN
  `physionet-data.mimic_icu.gcs` s
  ON a.stay_id = s.stay_id
LEFT JOIN
  `physionet-data.mimic_derived.vitalsign` vitals
  ON a.stay_id = vitals.stay_id
LEFT JOIN (
  SELECT subject_id, MIN(stay_id) AS stay_id
  FROM `physionet-data.mimic_icu.icustays`
  GROUP BY subject_id
  HAVING COUNT(stay_id) > 1
) readmit
  ON a.subject_id = readmit.subject_id AND a.stay_id != readmit.stay_id
WHERE
  d.long_title LIKE '%intracerebral hemorrhage%'
"""
df = client.query(query).to_dataframe()

## 3. 🧽 Preprocesamiento

In [ ]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Relleno de valores nulos
df.fillna(df.median(numeric_only=True), inplace=True)

# Escalado de características numéricas
scaler = StandardScaler()
features_scaled = scaler.fit_transform(df.select_dtypes(include='number'))

# Target: readmisión
df['readmitted'] = df['readmitted_stay'].notnull().astype(int)

## 4. 📊 Modelado Predictivo

In [ ]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

X = features_scaled
y = df['readmitted']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

print(classification_report(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_proba))

## 5. 📈 Visualización de Resultados

In [ ]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve

fpr, tpr, _ = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr)
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title("Curva ROC")
plt.show()

## 6. 🚀 Despliegue (Futuro)
Se puede utilizar Hugging Face Spaces o Streamlit Cloud para desplegar un frontend que permita:
- Cargar variables clínicas
- Obtener un score de riesgo
- Visualizar el gráfico ROC o una matriz de confusión