<a href="https://colab.research.google.com/github/Denev6/CapStone/blob/main/tdcn_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- 실행환경: Colab Pro
- Python >= 3.8

In [None]:
!pip install torchsummary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import os
import gc
import time
import pickle
import random
import warnings
from google.colab import drive

import numpy as np
import pandas as pd
from tqdm.auto import tqdm, trange
import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    recall_score,
    precision_score,
    confusion_matrix,
)

In [None]:
# 구글 드라이브 연동
drive.mount("/content/drive")
# 불필요한 warnings 무시하기
warnings.simplefilter("ignore")


def clear():
    """불필요한 메모리 정리"""
    gc.collect()
    torch.cuda.empty_cache()

# 작업 중인 최상위 폴더
ROOT = "/content/drive/MyDrive/Capstone"

def join_path(*args):
    """`ROOT`로부터 하위 경로 생성

    Example:
    >>> join_path('data', 'test.pk')
    '/content/drive/MyDrive/Capstone/data/test.pk'
    """
    return os.path.join(ROOT, *args)


# 사용할 데이터 파일 경로
train_csv_path = join_path("data", "train.pk")
dev_csv_path = join_path("data", "dev.pk")
test_csv_path = join_path("data", "test.pk")

# 데이터(pickle) 파일 로드
train_data = pickle.load(open(train_csv_path, 'rb'))
dev_data = pickle.load(open(dev_csv_path, 'rb'))
test_data = pickle.load(open(test_csv_path, 'rb'))

device = "cuda" if torch.cuda.is_available() else "cpu"
RAND_SEED = 42

# 모델 관련 파라미터
BATCH_SIZE = 8
EPOCHS = 10
LEARNING_RATE = 1e-5
MODEL_PATH = join_path("TDCN_SIMPLE.pth")  # 학습한 모델을 저장할 경로

# Gradient Accumulation에 적용할 step
GRAD_STEP = 2
# 참고 자료: https://denev6.tistory.com/entry/Gradient-Accumulation
# (만약 GPU RAM이 충분하다면 사용할 필요 없음)

Mounted at /content/drive


In [None]:
def seed_everything(seed):
    """각 모듈의 랜덤 시드 고정"""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

# 일정한 학습을 위해 랜덤성 제거
seed_everything(RAND_SEED)

In [None]:
class EarlyStopping(object):
    """Evaluation Loss를 통한 Early-Stopping
    
    코드 설명: https://denev6.tistory.com/entry/Pytorch-EarlyStopping 
    """
    def __init__(self, patience=2, save_path="model.pth", eps=1e-6):
        self._min_loss = np.inf
        self._patience = patience
        self._path = save_path
        self._eps = eps
        self.__counter = 0

    def should_stop(self, model, loss):
        if loss < self._min_loss:
            self._min_loss = loss
            self.__counter = 0
            torch.save(model.state_dict(), self._path)
        elif loss > self._min_loss + self._eps:
            self.__counter += 1
            if self.__counter >= self._patience:
                return True
        return False

    def load(self, model):
        model.load_state_dict(torch.load(self._path))
        return model

    @property
    def counter(self):
        return self.__counter

# Dataset

In [None]:
class CustomDataset(Dataset):
    """Pytorch 데이터 로더
    
    Args:
        data: (dict) 전처리된 데이터
        up_count: (int) 데이터 길이 보정
        data_size: (int) 사용할 프레임의 길이 (논문에서는 5000 사용)
        
    Returns:
        tuple: (Features, Pose), Label
    """
    def __init__(self, data, up_count=500, data_size=5000):
        self.data = {}
        for k, v in data.items():
            tmp = {
                'pose' : torch.FloatTensor(v['pose'].astype('float').values),
                'features' : torch.FloatTensor(v['features'].astype('float').values),
                'y' : torch.IntTensor([v['y']])
            }
            self.data[k] = tmp
        self.up_count = up_count
        self.data_size = data_size
        
    def __len__(self):
        return len(self.data) * self.up_count

    def __getitem__(self, index):
        target_data = random.choice(list(self.data.values()))
        x1 = target_data['features']
        x2 = target_data['pose']
        cut = random.randrange(0, len(x1)-self.data_size)
        x1 = x1[cut: cut + self.data_size]
        x2 = x2[cut: cut + self.data_size]
        y = target_data['y']
        return (x1, x2), y

In [None]:
# 데이터셋 준비
training_dataset = CustomDataset(train_data)
dev_dataset = CustomDataset(dev_data, up_count=1)
test_dataset = CustomDataset(test_data, up_count=10)

train_dataloader = DataLoader(training_dataset, batch_size=BATCH_SIZE)
dev_dataloader = DataLoader(dev_dataset, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

# Dilated Conv Block

In [None]:
class DilatedConvBlock(nn.Module):
    """모델 내의 DCN 블록
    
    - 데이터의 특징을 추출하는 가장 작은 단위.
    - TDCN 내에서 총 5번 반복.

    Args:
        in_channels: (int) CNN 입력 채널
        out_channels: (int) CNN 출력 채널
        has_BN: (bool) 배치 정규화 적용 여부
        batch_size: (int) 배치 크기

    `channels` 파라미터 참고 자료: https://denev6.tistory.com/entry/pytorch-cnn
    """

    def __init__(self, in_channels, out_channels, has_BN=False, batch_size=8):
        super(DilatedConvBlock, self).__init__()
        
        # 2차원 CNN layer의 파라미터
        cnn_args = {
            "kernel_size": (3, 3),
            "padding": "same",
        }
        # 2차원 CNN Layer
        self.dilated_conv2 = nn.Conv2d(
            in_channels, out_channels, dilation=2, **cnn_args
        )
        # 1차원 CNN Layer
        self.conv1d = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        # 배치 정규화 layer
        self.batch_norm = nn.BatchNorm2d(out_channels)
        self._has_BN = has_BN

    def forward(self, x):
        x_1d = self.conv1d(x)
        x_2d = self.dilated_conv2(x)
        x_2d = F.elu(x_2d)
        x = x_1d + x_2d

        if self._has_BN:
            x = self.batch_norm(x)
        return x

In [None]:
class TDCN(nn.Module):
    """TDCN 객체

    - 데이터의 특징 추출을 위한 블록.
    - Features와 Pose 데이터에 대해 각각 추출.

    Args:
        channels: (list[int] || tuple[int]) DilatedConvBlock의 연속된 채널 크기
        batch_size: (int) 배치 크기
    """
    def __init__(self, channels, batch_size=8):
        super(TDCN, self).__init__()

        # DilatedConvBlock, MaxPool2d 파라미터
        dcn_args = {"has_BN": True, "batch_size": batch_size}
        pool_args = {"kernel_size": (2, 1)}
        
        # TDCN: Dilated-Conv-Block + Max-Pooling 반복
        self.tdcn = nn.Sequential(
            DilatedConvBlock(channels[0], channels[1], **dcn_args),
            nn.MaxPool2d(**pool_args),
            DilatedConvBlock(channels[1], channels[2], **dcn_args),
            nn.MaxPool2d(**pool_args),
            DilatedConvBlock(channels[2], channels[3], **dcn_args),
            nn.MaxPool2d(**pool_args),
            DilatedConvBlock(channels[3], channels[4], **dcn_args),
            nn.MaxPool2d(**pool_args),
            DilatedConvBlock(
                channels[4], channels[5], batch_size=batch_size, has_BN=False
            ),
        )

    def forward(self, x):
        x = self.tdcn(x)
        return x

In [None]:
class PredictionModel(nn.Module):
    """전체 모델 객체
    
    Args:
        batch_size: (int) 배치 크기
    
    Returns:
        FloatTensor: [우울증이 아닐 확률, 우울증일 확률]
    """
    def __init__(self, batch_size=8):
        super(PredictionModel, self).__init__()

        # Features, pose의 특징을 추출할 TDCN
        # channels는 논문을 참고하여 설정 (변경 가능)
        self.feat_tdcn = TDCN(channels=[1, 64, 64, 32, 8, 8], batch_size=batch_size)
        self.pose_tdcn = TDCN(channels=[1, 32, 16, 32, 16, 8], batch_size=batch_size)

        # 추출된 특징을 학습/분류할 layer
        #   - Linear 내부 파라미터 변경 가능 (임의로 설정)
        #   - Activation Function 변경 가능 (임의로 설정)
        #   - 논문에서는 (Linear + Activation)를 3번 사용
        self.classifier = nn.Sequential(
            nn.Flatten(1, -1),
            nn.Linear(142 * 312 * 8, 32), 
            nn.ReLU(inplace=True),
            nn.Linear(32, 2),
            nn.ReLU(inplace=True),
            nn.Softmax(-1)
        )
    
    def forward(self, feat_x, pose_x):
        feat_x = self.feat_tdcn(feat_x)
        pose_x = self.feat_tdcn(pose_x)
        out = torch.cat((feat_x, pose_x), dim=-1)
        out = self.classifier(out)
        return out

# Train

In [None]:
# Loss Function 
loss_fn = nn.CrossEntropyLoss()

# TDCN 모델
model = PredictionModel(BATCH_SIZE)

# Optimizer: Adam
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
# TDCN 논문에서는 아래와 같이 SGD + momentum 사용
# optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

# Lambda-scheduler를 이용해 epoch이 증가함에 따라 Learning-rate 감소
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: 0.9**epoch)

In [None]:
model.to(device)

# 모델 정보 요약
# [(1, 5000, 136), (1, 5000, 6)]은 각각 Features와 Pose 데이터의 크기
summary(model, [(1, 5000, 136), (1, 5000, 6)], batch_size=BATCH_SIZE)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [8, 64, 5000, 136]             128
            Conv2d-2         [8, 64, 5000, 136]             640
       BatchNorm2d-3         [8, 64, 5000, 136]             128
  DilatedConvBlock-4         [8, 64, 5000, 136]               0
         MaxPool2d-5         [8, 64, 2500, 136]               0
            Conv2d-6         [8, 64, 2500, 136]           4,160
            Conv2d-7         [8, 64, 2500, 136]          36,928
       BatchNorm2d-8         [8, 64, 2500, 136]             128
  DilatedConvBlock-9         [8, 64, 2500, 136]               0
        MaxPool2d-10         [8, 64, 1250, 136]               0
           Conv2d-11         [8, 32, 1250, 136]           2,080
           Conv2d-12         [8, 32, 1250, 136]          18,464
      BatchNorm2d-13         [8, 32, 1250, 136]              64
 DilatedConvBlock-14         [8, 32, 12

In [None]:
def train(train_loader, dev_loader, model, loss_fn, optimizer, scheduler):
    """모델 학습

    Args: 
        train_loader: (DataLoader) 학습 데이터
        dev_loader: (DataLoader) 검증 데이터
        model: (Module) 학습할 모델
        loss_fn: (CrossEntropyLoss)
        optimizer: (optim) 
        scheduler: (lr_scheduler)
    
    Returns: 
        Module: 학습된 모델
    """
    
    num_batches = len(train_loader)

    # GPU 사용
    model.to(device)
    loss_fn.to(device)

    # Gradient 초기화
    optimizer.zero_grad()

    # Early Stopping 적용
    early_stopper = EarlyStopping(patience=2, save_path=MODEL_PATH)

    # 진행 상황 시각화
    epoch_progress = trange(1, EPOCHS + 1)
    tqdm.write("\nEpoch | Train Loss | Test Loss")
    tqdm.write("-" * 30)

    for epoch in epoch_progress:
        
        model.train()  # 모델 학습 모드로 변경
        train_loss = 0
        start = time.time()

        for (x1, x2), label in train_loader:
            # 학습에 사용할 데이터
            x1 = x1.unsqueeze(1).to(device)
            x2 = x2.unsqueeze(1).to(device)
            label = label.to(device).long().reshape(-1)

            # 모델 출력
            pred = model(x1, x2)
            
            # Loss 값 계산
            loss = loss_fn(pred, label)
            train_loss += loss.item()

            # 역전파로 학습
            loss.backward()

            if epoch % GRAD_STEP == 0:
                # 모델 학습
                optimizer.step()
                optimizer.zero_grad()

        train_loss /= num_batches

        # 검증 데이터로 Loss 계산
        dev_loss = evaluate(dev_loader, model, loss_fn)

        end = time.time()
        duration = (end - start) / 60  # 학습 소요 시간 계산
        
        # ' 에폭 | 학습 Loss | 검증 Loss | 소요 시간 ' 출력
        tqdm.write(f"{epoch:5} | {train_loss:10.5f} | {dev_loss:9.5f} | {duration:.1f}")

        if early_stopper.should_stop(model, dev_loss):
            # Early Stopping 검사
            # 학습된 모델 저장
            tqdm.write(f"--EarlyStopping: [Epoch: {epoch - early_stopper.counter}]")
            break
        
        # scheduler로 Learning-rate 조정
        scheduler.step()

    # Loss가 가장 낮은 모델 로드
    model = early_stopper.load(model)
    return model


def evaluate(test_loader, model, loss_fn, return_metrics=False):
    """모델 검증

    Args:
        test_loader: (DataLoader) 테스트 데이터
        model: (Module) 테스트할 모델
        loss_fn: (CrossEntropyLoss)
        return_metrics: (bool) 성능 평가 지표 반환 여부
    
    Returns:
        IF return_metrics
        == False (default): 
            float: 테스트 Loss 
        == True: 
            dict: 성능 지표 반환
    """
    model.eval()  # 모델 eval 모드로 변경
    num_batches = len(test_loader)
    test_loss = 0
    true_labels = list()
    pred_values = list()

    with torch.no_grad():
        # gradient 계산 비활성화
        for (x1, x2), label in test_loader:
            x1 = x1.unsqueeze(1).to(device)
            x2 = x2.unsqueeze(1).to(device)
            label = label.to(device).long().reshape(-1)

            pred = model(x1, x2)
            loss = loss_fn(pred, label)
            test_loss += loss.item()

            if return_metrics:
                # 성능 지표를 위한 값 임시 저장
                true_labels += label.detach().cpu().numpy().tolist()
                pred_values += pred.argmax(-1).detach().cpu().numpy().tolist()

    if not return_metrics:
        # train 함수 안에서 Loss 확인을 위해 사용
        test_loss /= num_batches
        return test_loss

    else:
        # 학습이 종료된 후 성능 평가 지표를 확인하기 위해 사용
        accuracy = accuracy_score(true_labels, pred_values)
        f1 = f1_score(true_labels, pred_values)
        f1_macro = f1_score(true_labels, pred_values, average="macro")
        recall = recall_score(true_labels, pred_values)
        precision = precision_score(true_labels, pred_values)
        matrix = confusion_matrix(true_labels, pred_values).ravel()

        return {
            "accuracy": accuracy,
            "f1": f1,
            "f1-macro": f1_macro,
            "recall": recall,
            "precision": precision,
            "loss": test_loss,
            "matrix": matrix,
        }

In [None]:
# 모델 학습
model = train(train_dataloader, dev_dataloader, model, loss_fn, optimizer, scheduler)

  0%|          | 0/10 [00:00<?, ?it/s]


Epoch | Train Loss | Test Loss
------------------------------
    1 |    0.69317 |   0.69315 | 86.3
    2 |    0.57784 |   0.79660 | 91.1
    3 |    0.58298 |   0.65493 | 90.6
    4 |    0.54954 |   0.72992 | 91.5
    5 |    0.53017 |   0.56599 | 90.8
    6 |    0.52962 |   0.57993 | 91.3
    7 |    0.54408 |   0.67993 | 90.9
--EarlyStopping: [Epoch: 5]


# Evaluation

In [None]:
# 메모리 정리
clear()

# 성능 평가 지표 계산 및 출력
metrics = evaluate(test_dataloader, model, loss_fn, return_metrics=True)

print(f"Accuracy:  {metrics['accuracy']:.3f}")
print(f"F1-score:  {metrics['f1']:.3f}")
print(f"F1-macro:  {metrics['f1-macro']:.3f}")
print(f"Recall:    {metrics['recall']:.3f}")
print(f"Precision: {metrics['precision']:.3f}")

print("-" * 30)
tn, fp, fn, tp = metrics["matrix"]
print(f"TN: {tn}")
print(f"FP: {fp}")
print(f"FN: {fn}")
print(f"TP: {tp}")

Accuracy:  0.702
F1-score:  0.000
F1-macro:  0.412
Recall:    0.000
Precision: 0.000
------------------------------
TN: 330
FP: 0
FN: 140
TP: 0


In [None]:
def show_probs(test_data, model, max=6):
    """정답 레이블과 모델이 출력하는 확률 값 확인
    
    Args:
        test_data: (Dataset) 확인할 데이터
        model: (Module) 확인할 모델
        max: (int) 출력할 데이터 갯수 
    
    Example: 
        >>> show_probs(test_dataset, model, 4)
        1: [1.000  0.000]
        0: [1.000  0.000]
        0: [1.000  0.000]
        1: [1.000  0.000]
    """
    dataloader = DataLoader(training_dataset, batch_size=1, shuffle=True)

    model.eval()
    neg_max = max // 2
    pos_max = max - neg_max
    pos_count = 0
    neg_count = 0
    
    with torch.no_grad():
        for (x1, x2), label in dataloader:
            x1 = x1.unsqueeze(1).to(device)
            x2 = x2.unsqueeze(1).to(device)

            if label.item() == 0 and pos_count < pos_max:
                pos_count += 1
                label = label.item()
            elif label.item() == 1 and neg_count < neg_max:
                neg_count += 1
                label = label.item()
            elif pos_count + neg_count == max:
                break
            else:
                continue

            pred = model(x1, x2)
            normal, abnormal = pred.squeeze(0)
            print(f"{label}: [{normal:.3f}  {abnormal:.3f}]")


# 모델 출력 값 확인
show_probs(test_dataset, model, 30)

1: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
1: [1.000  0.000]
0: [1.000  0.000]
1: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
1: [0.972  0.028]
0: [1.000  0.000]
0: [1.000  0.000]
1: [1.000  0.000]
0: [1.000  0.000]
0: [1.000  0.000]
1: [0.500  0.500]
1: [0.995  0.005]
1: [1.000  0.000]
1: [1.000  0.000]
1: [1.000  0.000]
1: [1.000  0.000]
1: [0.837  0.163]
1: [1.000  0.000]
1: [1.000  0.000]
1: [1.000  0.000]
