# 수술 중 저혈압(Hypotension) 조기 예측

VitalDB 데이터로 **MAP < 65 mmHg** 저혈압을 **5분 후** 발생 여부 예측.  
CUDA 사용, 진행률 표시, 과금 방지(최대 시간/스텝 도달 시 자동 저장 후 중단).

In [None]:
# 셀 1: 패키지 설치 (최초 1회)
%pip install -q -r requirements.txt

In [None]:
# 셀 2: 경로 설정 및 import
import os
import sys
from pathlib import Path

# 노트북 기준 프로젝트 루트 = hypo_vitaldb
NB_DIR = Path(os.getcwd())
if NB_DIR.name != 'hypo_vitaldb':
    NB_DIR = Path(r'C:\Users\sck32\hypo_vitaldb')
os.chdir(NB_DIR)
sys.path.insert(0, str(NB_DIR))

import pandas as pd
import numpy as np
import time
from tqdm.auto import tqdm

from config import (
    VITAL_DIR, CLINICAL_CSV, DATASET_PATH, CHECKPOINT_DIR, MODEL_PATH,
    LOOKBACK_MIN, TRACK_MAP, TRACK_HR, MAP_THRESHOLD_MMHG, HYPOTENSION_DURATION_SEC,
    MAX_RUNTIME_MINUTES, MAX_TRAIN_STEPS, check_data_paths,
    TEST_SIZE, RANDOM_STATE, DEVICE,
)
from data_loader import load_vital_case, build_labels_for_case

LOOKBACK_SEC = LOOKBACK_MIN * 60
MAX_CASES = 100  # 테스트용. 전체는 None

ok, msg = check_data_paths()
print(f"[진행상황] 프로젝트: {NB_DIR}")
print(f"[진행상황] {msg}")
if not ok:
    raise FileNotFoundError(msg)

## 1. 데이터셋 구축

Vital 파일에서 MAP/HR 특성 추출 + 저혈압 라벨 생성 후 CSV 저장.

In [None]:
def extract_features(df, start_idx):
    end_idx = min(start_idx + LOOKBACK_SEC, len(df))
    if end_idx - start_idx < LOOKBACK_SEC // 2:
        return None
    seg = df.iloc[start_idx:end_idx]
    feats = {}
    for col in [TRACK_MAP, TRACK_HR]:
        if col not in seg.columns:
            continue
        s = pd.to_numeric(seg[col], errors='coerce').dropna()
        if len(s) < 10:
            continue
        key = col.split('/')[-1]
        feats[f'{key}_mean'] = s.mean()
        feats[f'{key}_std'] = s.std()
        feats[f'{key}_min'] = s.min()
    return feats if feats else None


clinical = pd.read_csv(CLINICAL_CSV)
caseids = clinical['caseid'].dropna().astype(int).unique()
if MAX_CASES:
    caseids = caseids[:MAX_CASES]

rows = []
start_time = time.perf_counter()
limit_sec = (MAX_RUNTIME_MINUTES * 60) if MAX_RUNTIME_MINUTES else None

print(f"[진행상황] 데이터셋 구축 시작 (총 {len(caseids)}건 케이스)")
for caseid in tqdm(caseids, desc='[1/2] 케이스 처리 중', unit='건'):
    if limit_sec and (time.perf_counter() - start_time) >= limit_sec:
        print(f"\n[과금 방지] {MAX_RUNTIME_MINUTES}분 도달 - 저장 후 중단")
        break
    path = VITAL_DIR / f'{caseid:04d}.vital'
    if not path.exists():
        continue
    df = load_vital_case(caseid)
    if df is None or df.empty:
        continue
    labels = build_labels_for_case(df)
    if len(labels) == 0:
        continue
    for i, label in enumerate(labels):
        feats = extract_features(df, i * 60)
        if feats is None:
            continue
        feats['caseid'] = caseid
        feats['label'] = int(label)
        rows.append(feats)

out = pd.DataFrame(rows)
out.to_csv(DATASET_PATH, index=False)
print(f"[진행상황] 데이터셋 구축 완료 — {len(out)}행 저장: {DATASET_PATH}")
out.head()

## 2. 모델 학습 (PyTorch CUDA)

저혈압 발생 이진 분류, CUDA 사용.

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix


class HypoNet(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 64), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(64, 32), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(32, 1),
        )
    def forward(self, x):
        return self.net(x).squeeze(-1)


if not DATASET_PATH.exists():
    raise FileNotFoundError('위에서 데이터셋 구축 셀을 먼저 실행하세요.')

df = pd.read_csv(DATASET_PATH)
target = 'label'
feature_cols = [c for c in df.columns if c not in ('caseid', target)]
X = df[feature_cols].fillna(0).values.astype(np.float32)
y = df[target].values.astype(np.int64)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y
)

device = torch.device(DEVICE if torch.cuda.is_available() else 'cpu')
print(f'[진행상황] 학습 장치: {device} | train {len(X_train)}건, test {len(X_test)}건')

train_ds = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
train_loader = DataLoader(train_ds, batch_size=256, shuffle=True, num_workers=0)

model = HypoNet(len(feature_cols)).to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.BCEWithLogitsLoss()

model.train()
step = 0
for batch_x, batch_y in tqdm(train_loader, desc='[2/2] 모델 학습 중', unit='배치'):
    if MAX_TRAIN_STEPS is not None and step >= MAX_TRAIN_STEPS:
        print(f"\n[과금 방지] 최대 스텝 {MAX_TRAIN_STEPS} 도달 - 저장 후 중단")
        break
    batch_x = batch_x.to(device)
    batch_y = batch_y.float().unsqueeze(1).to(device)
    opt.zero_grad()
    logits = model(batch_x).unsqueeze(1)
    loss = loss_fn(logits, batch_y)
    loss.backward()
    opt.step()
    step += 1

CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
torch.save({'model_state': model.state_dict(), 'optimizer_state': opt.state_dict(), 'step': step}, MODEL_PATH)
print(f'\n[저장] 모델 -> {MODEL_PATH}')

In [None]:
# 평가
model.eval()
with torch.no_grad():
    X_t = torch.from_numpy(X_test).to(device)
    logits = model(X_t).cpu().numpy()
y_prob = 1 / (1 + np.exp(-logits))
y_pred = (y_prob >= 0.5).astype(int)

print('[결과] 분류 성능 (한글)')
print(classification_report(y_test, y_pred, target_names=['저혈압 없음', '저혈압']))
print('AUC-ROC:', roc_auc_score(y_test, y_prob))
print('혼동 행렬:\n', confusion_matrix(y_test, y_pred))

---
**완료.** 데이터셋: `hypotension_dataset.csv`, 모델: `checkpoints/hypo_model.pt`