In [None]:
#STEP 1: Setup Colab + Connect to BigQuery
!pip install -q pandas google-cloud-bigquery

from google.colab import auth
auth.authenticate_user()

from google.cloud import bigquery
client = bigquery.Client(project='sampleproject-56290')


In [None]:
query = """
SELECT *
FROM `physionet-data.mimiciii_demo.admissions`
LIMIT 5
"""

df = client.query(query).to_dataframe()
df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA
0,12269,10017,199207,2149-05-26 17:19:00,2149-06-03 18:42:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,,CATHOLIC,DIVORCED,WHITE,2149-05-26 12:08:00,2149-05-26 19:45:00,HUMERAL FRACTURE,0,1
1,12280,10029,132349,2139-09-22 10:58:00,2139-10-02 14:29:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,,PROTESTANT QUAKER,DIVORCED,WHITE,2139-09-22 06:03:00,2139-09-22 11:50:00,SYNCOPE;TELEMETRY,0,1
2,12283,10033,157235,2132-12-05 02:46:00,2132-12-08 15:15:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,POLI,CATHOLIC,MARRIED,WHITE,2132-12-04 20:11:00,2132-12-05 04:05:00,RENAL FAILIURE-SYNCOPE-HYPERKALEMIA,0,1
3,12288,10038,111115,2144-02-09 17:53:00,2144-02-21 13:30:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,ENGL,CATHOLIC,WIDOWED,WHITE,2144-02-09 13:35:00,2144-02-09 20:12:00,FAILURE TO THRIVE,0,1
4,12293,10043,168674,2185-04-14 00:23:00,2185-04-26 18:20:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,,CATHOLIC,,WHITE,2185-04-13 23:01:00,2185-04-14 02:56:00,RESPIRATORY DISTRESS,0,1


In [None]:
#STEP 2: Query MIMIC-IV Data from BigQuery
query = """
SELECT
  p.subject_id, p.gender, i.hadm_id,  i.intime, i.outtime, i.first_careunit, i.last_careunit
FROM
  `physionet-data.mimiciii_demo.icustays` i
JOIN
  `physionet-data.mimiciii_demo.patients` p
ON
  i.subject_id = p.subject_id
LIMIT 10000
"""

df = client.query(query).to_dataframe()

In [None]:
#STEP 3: Preprocess Your Data
import pandas as pd
from sklearn.preprocessing import LabelEncoder

df['length_of_stay_hours'] = (pd.to_datetime(df['outtime']) - pd.to_datetime(df['intime'])).dt.total_seconds() / 3600

encoder = LabelEncoder()
df['gender'] = encoder.fit_transform(df['gender'])
df['first_careunit'] = encoder.fit_transform(df['first_careunit'])

# Simple features for demo
features = df[['gender', 'first_careunit']].fillna(0)
print("Input feature shape:", features.shape[1])
target = df['length_of_stay_hours'].fillna(0)


Input feature shape: 2


In [None]:
#STEP 4: Build a Simple MLP Base Model
import torch
import torch.nn as nn

class BaseMLP(nn.Module):
    def __init__(self, in_features, hidden=64):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features, hidden),
            nn.ReLU(),
            nn.Linear(hidden, 1)
        )

    def forward(self, x):
        return self.model(x)


In [None]:
#STEP 5: Add LoRA to MLP Linear Layers
class LoRALinear(nn.Module):
    def __init__(self, in_features, out_features, rank=4):
        super().__init__()
        self.rank = rank
        self.weight = nn.Parameter(torch.randn(out_features, in_features))
        self.A = nn.Parameter(torch.randn(rank, in_features))
        self.B = nn.Parameter(torch.randn(out_features, rank))

    def forward(self, x):
        lora_update = self.B @ self.A  # Low-rank approximation
        return x @ (self.weight + lora_update).T

class LoRAMLP(nn.Module):
    def __init__(self, in_features=features.shape[1], hidden=64, rank=4):
        super().__init__()
        self.fc1 = LoRALinear(in_features, hidden, rank)
        self.relu = nn.ReLU()
        self.fc2 = LoRALinear(hidden, 1, rank)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.fc2(x)


In [None]:
#STEP 6: Train the LoRA-augmented Model
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features.values, target.values, test_size=0.2)

train_ds = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32).unsqueeze(1))
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

model = LoRAMLP(in_features=3, rank=4)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

for epoch in range(5):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        pred = model(xb)
        loss = loss_fn(pred, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x2 and 3x64)

In [None]:
'''
Query via Big data
'''
SELECT *
FROM `physionet-data.mimiciii_demo.admissions`
LIMIT 5;

SELECT *
FROM `physionet-data.mimic_cxr_jpg.metadata`
LIMIT 5;

SELECT *
FROM `physionet-data.mimic_cxr_jpg.chexpert`
WHERE Cardiomegaly is not null
LIMIT 5;

SELECT *
FROM `physionet-data.mimiciii_demo.patients`
LIMIT 5;

SyntaxError: invalid syntax (ipython-input-1-136341538.py, line 1)