- 입력: /data/sequence/X.npy, /data/sequence/y.npy  
- 출력: best_lstm.pt  
- 목표: 파이프라인 검증 + 기본 성능 확인 (최종 성능 아님)  

## 1) Imports & 설정

In [44]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import random

In [45]:
# 재현성
def seed_all(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

seed_all(42)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

## 2) 데이터 로드

In [46]:
X = np.load("../data/sequence/X.npy")
y = np.load("../data/sequence/y.npy")

print(X.shape, y.shape)

(4200, 40, 85) (4200,)


## 3) Train / Val 분리 (stratified)

In [47]:
X_tr, X_va, y_tr, y_va = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("train:", X_tr.shape, "val:", X_va.shape)

train: (3360, 40, 85) val: (840, 40, 85)


## 4) Dataset / DataLoader

In [48]:
class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [49]:
BATCH_SIZE = 64

train_loader = DataLoader(
    SeqDataset(X_tr, y_tr),
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=True
)

val_loader = DataLoader(
    SeqDataset(X_va, y_va),
    batch_size=BATCH_SIZE,
    shuffle=False
)

## 5) LSTM 모델 정의

In [50]:
class FallLSTM(nn.Module):
    def __init__(self, input_dim=85, hidden_dim=128, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_dim, hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=0.3
        )
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]      # last timestep
        return self.fc(out)

In [51]:
model = FallLSTM().to(DEVICE)

## 6) Class Imbalance 처리

In [52]:
criterion = nn.CrossEntropyLoss()

## 7) Optimizer

In [53]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

## 8) Train / Validate Loop (파일럿)

In [54]:
def run_epoch(loader, train=True):
    model.train() if train else model.eval()
    total_loss = 0
    ys, ps = [], []

    for Xb, yb in loader:
        Xb, yb = Xb.to(DEVICE), yb.to(DEVICE)

        if train:
            optimizer.zero_grad()

        logits = model(Xb)
        loss = criterion(logits, yb)

        if train:
            loss.backward()
            optimizer.step()

        total_loss += loss.item() * len(yb)
        ys.append(yb.detach().cpu().numpy())
        ps.append(logits.argmax(dim=1).detach().cpu().numpy())

    ys = np.concatenate(ys)
    ps = np.concatenate(ps)
    avg_loss = total_loss / len(ys)

    return avg_loss, ys, ps

In [55]:
EPOCHS = 15   # 파일럿용

best_val_loss = float("inf")

for epoch in range(1, EPOCHS + 1):
    tr_loss, _, _ = run_epoch(train_loader, train=True)
    va_loss, y_true, y_pred = run_epoch(val_loader, train=False)

    print(f"[{epoch:02d}] train loss: {tr_loss:.4f} | val loss: {va_loss:.4f}")

    if va_loss < best_val_loss:
        best_val_loss = va_loss
        torch.save(model.state_dict(), "best_lstm.pt")

[01] train loss: 0.4703 | val loss: 0.3665
[02] train loss: 0.3472 | val loss: 0.3416
[03] train loss: 0.3431 | val loss: 0.3402
[04] train loss: 0.2904 | val loss: 0.2945
[05] train loss: 0.2611 | val loss: 0.2642
[06] train loss: 0.2512 | val loss: 0.2848
[07] train loss: 0.2547 | val loss: 0.2685
[08] train loss: 0.2237 | val loss: 0.2342
[09] train loss: 0.2366 | val loss: 0.3167
[10] train loss: 0.2621 | val loss: 0.2364
[11] train loss: 0.1883 | val loss: 0.2021
[12] train loss: 0.1925 | val loss: 0.2322
[13] train loss: 0.1724 | val loss: 0.2459
[14] train loss: 0.1689 | val loss: 0.2120
[15] train loss: 0.1617 | val loss: 0.1869


In [56]:
print(classification_report(y_true, y_pred, digits=4))
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

           0     0.8982    0.9667    0.9312       420
           1     0.9639    0.8905    0.9257       420

    accuracy                         0.9286       840
   macro avg     0.9311    0.9286    0.9285       840
weighted avg     0.9311    0.9286    0.9285       840

[[406  14]
 [ 46 374]]
