In [1]:
cd /workspace/sunggu/0.Challenge/Med_tutorial_ChatGPT

/workspace/sunggu/0.Challenge/Med_tutorial_ChatGPT


In [2]:
!nvidia-smi
import os
os.environ["CUDA_DEVICE_ORDER"]     =  'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"]  =  '0'
print("CPU 갯수 = ", os.cpu_count())

Fri Jun 16 13:36:25 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.108.03   Driver Version: 510.108.03   CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:3B:00.0 Off |                    0 |
| N/A   27C    P0    23W / 250W |      0MiB / 32768MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-PCIE...  Off  | 00000000:86:00.0 Off |                    0 |
| N/A   44C    P0    78W / 250W |  23107MiB / 32768MiB |    100%      Default |
|       

In [None]:
# !pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117
# !pip freeze > /workspace/sunggu/0.Challenge/requirements.txt
!pip install -r requirements.txt

# 0. Fix Seed

In [None]:
'''
[Fix Seed]
I want you to act as a AI developer in pytorch and python code for me. 
Fix the randomness of numpy, pytorch, cuda, and random function for reproducibility.
'''

In [3]:
import random
import numpy as np
import torch

# 시드(seed) 설정
seed = 42

# Python의 random 모듈 시드 설정
random.seed(seed)

# Numpy 시드 설정
np.random.seed(seed)

# Torch 시드 설정
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# 1. Dataset

In [None]:
'''
[Define Dataset]
I want you to act as a AI developer in pytorch and python code for me. 
Please help with creating the dataset class that process image processing and augmentation based on binary classification deep learning framework.

===INFO===
data_dir = '/workspace/sunggu/0.Challenge/Med_tutorial_ChatGPT/dataset'
csv_file = 'rsna_data.csv'
target_class = 'cancer'

Code for creating the RSNA breast cancer dataset class called "RSNA_Dataset". 
This code is a custom dataset class that inherits "Dataset" from PyTorch. 
The dataset can be initialized in either "train" or "valid" mode and reads the file path and label information from the corresponding CSV file.

Each entry in the dataset is defined as follows:
- Read the DICOM file using the image path.
- Apply the Modality Lookup Table (LUT) and Value of Interest (VOI) LUT for the pixel array in the DICOM file.
- Returns the read image and the corresponding label.

In addition to the dataset class, we define an compose transform function that performs data preprocessing and augmentation through the "get_transforms" function using Albumentation Library. 

Please conduct step by step using the following procedure.
	1. In the training mode, perform image resizing (224x224), minmax normalization, change_to_uint8, Contrast Limited Adaptive Histogram Equation (CLAHE), change_to_float32, horizontal flip, brightness and contrast adjustment, Shift-Scale-Rotate conversion, image inversion, minmax normalization, and tensor conversion. 
    2. In validation mode, perform image resizing (224x224), minmax normalization, change_to_uint8, fixed CLAHE, change_to_float32, minmax normalization, and tensor conversion. 
	3. Finally, create "train_dataset" and "valid_dataset" and create a DataLoader that loads them into "train_loader" and "valid_loader". "train_loader" and "valid_loader" can be used for training and validation.

Here is a example template:
<
    import torch
    from torch.utils.data import Dataset

    class MyDataset(Dataset):
    def __init__(self, data_dir):
        # Load the data from the specified directory
        # ...
        
        # Preprocess the data
        # ...

    def __getitem__(self, index):
        # Return the data and labels for the specified index
        # ...

    def __len__(self):
        # Return the number of data samples
        # ...

    # 1. Create Dataset
        # ...
    # 2. Create DataLoader
        # ...
>
'''

In [4]:
import os
import re
import cv2
import skimage
import pydicom
import pandas as pd
import albumentations as A
from pydicom.pixel_data_handlers.util import apply_modality_lut, apply_voi_lut
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset

def list_sort_nicely(l):
    def convert(text): return int(text) if text.isdigit() else text
    def alphanum_key(key): return [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

def fixed_clahe(image, **kwargs):
    clahe_mat = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    return clahe_mat.apply(image)

def change_to_uint8(image, **kwargs):
    return skimage.util.img_as_ubyte(image)

def change_to_float32(image, **kwargs):
    return skimage.util.img_as_float32(image)

def min_max_normalization(image, **kwargs):
    if len(np.unique(image)) != 1:
        image = image.astype('float32')
        image -= image.min()
        image /= image.max() 
    return image

def get_transforms(mode="train"):
    # medical augmentation
    if mode == "train":
        return A.Compose([
            # Preprocessing
            A.Resize(224, 224), # 7*2**5 = 224
            A.Lambda(image=min_max_normalization, always_apply=True),
            A.Lambda(image=change_to_uint8, always_apply=True),
            A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), always_apply=True),
            A.Lambda(image=change_to_float32, always_apply=True),

            # Augmentation
            A.HorizontalFlip(p=0.5),
            # A.RandomBrightnessContrast(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=30, p=0.5),
            A.InvertImg(p=0.5),

            # Normalize
            A.Lambda(image=min_max_normalization, always_apply=True),
            A.Normalize(max_pixel_value=1.0, mean=0.5, std=0.5),
            ToTensorV2()
        ])

    else:
        return A.Compose([
            # Preprocessing
            A.Resize(224, 224),
            A.Lambda(image=min_max_normalization, always_apply=True),
            A.Lambda(image=change_to_uint8, always_apply=True),
            A.Lambda(image=fixed_clahe, always_apply=True),
            A.Lambda(image=change_to_float32, always_apply=True),
            
            # Normalize
            A.Lambda(image=min_max_normalization, always_apply=True),
            A.Normalize(max_pixel_value=1.0, mean=0.5, std=0.5),
            ToTensorV2()
        ])


class RSNA_Dataset(Dataset):
    def __init__(self, mode="train"):
        self.root       = '/workspace/sunggu/0.Challenge/Med_tutorial_ChatGPT/dataset/rsna_data.csv'
        temp_df         = pd.read_csv(self.root)
        self.data_df    = temp_df[temp_df['mode'] == mode]
        self.transforms = get_transforms(mode=mode)
        print(f"len of data: {len(self.data_df)}")

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, idx):
        img_path = self.data_df['path'].iloc[idx]
        dcm_data = pydicom.dcmread(img_path)
        temp_img = apply_modality_lut(dcm_data.pixel_array, dcm_data)   
        image    = apply_voi_lut(temp_img, dcm_data)                             

        label    = self.data_df['cancer'].iloc[idx]
        label    = torch.tensor(label).float().unsqueeze(0)

        # add channel axis
        image    = np.expand_dims(image, axis=-1)
        image    = self.transforms(image=image)['image']
        
        return image, label
    

In [5]:
from torch.utils.data import DataLoader

# 1. Create Dataset
train_dataset = RSNA_Dataset(mode="train")
valid_dataset = RSNA_Dataset(mode="valid")

# 2. Create DataLoader
train_loader  = DataLoader(train_dataset, batch_size=200, shuffle=True, num_workers=40)
valid_loader  = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=4)

len of data: 1852
len of data: 232


# 2. Network

In [6]:
import torch.nn as nn
from torchvision import models
from transformers import ViTModel, ViTConfig

class Resnet50(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        self.model = models.resnet50(pretrained=pretrained)
        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.model.fc = nn.Linear(2048, 1)
        
    def forward(self, x):
        x = self.model(x)
        return x
    
class ViT(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        self.model = ViTModel(ViTConfig())
        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.model.fc = nn.Linear(768, 1)
        
    def forward(self, x):
        x = self.model(x)
        return x


In [7]:
model = Resnet50(pretrained=True) # Resnet50
# model = ViT(pretrained=True) # ViT
    
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Number of Learnable Params:', n_parameters)   



Number of Learnable Params: 23503809


# 3. Loss

In [None]:
'''
[Define Loss function]
I want you to act as a deep learning expert and code for me. 
I already have a “RSNA_Dataset” of Mammography Breast Cancer. 
Please design a popular loss def function in binary classification task and define the loss as "criterion".
The target and predict(logit) are given in this function and they should be same shape.
The predict is the state before the non-linear activation layer.
Give me a simple example of how to use it.
'''

In [8]:
import torch
import torch.nn as nn

# 인기 있는 손실 함수인 이진 교차 엔트로피 손실(Binary Cross Entropy Loss)을 사용합니다.
criterion = nn.BCEWithLogitsLoss()

# 예시 사용법
# target = torch.tensor([0, 1, 0, 1])
# logit = torch.tensor([-1.2, 2.5, -0.8, 1.7])
# loss = criterion(logit, target)


# 4. Optimizer

In [None]:
'''
[Define Optimizer]
I want you to act as a deep learning expert and code for me. 
I already have a “RSNA_Dataset” of Mammography Breast Cancer. 
Please design some popular or SOTA optimizers in binary classification network.
Learning rate is 1e-4.
weight_dacay is 0.
Other optimizer's params are set default.
Please discuss optimizer hyperparams' key features and use cases.
'''

In [None]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=0)
# optimizer = optim.SGD(model.parameters(), lr=1e-4, weight_decay=0)
# optimizer = optim.AdamW(params=model.parameters(), lr=1e-4, weight_decay=0) # 230616 model

In [11]:
# optimizer = torch.optim.Adam(params=model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=5e-4, amsgrad=False)  
# optimizer = torch.optim.AdamW(params=model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=5e-2, amsgrad=False) # 230615 model
optimizer = torch.optim.AdamW(params=model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) # 230616 model

# 5. LR scheduler

In [None]:
'''
[Define learning rate(LR) scheduler]
I want you to act as a deep learning expert and code for me. 
Please design some popular or SOTA LR schedulers in binary classification network.
LR scheduler's params are set default.
Please discuss LR scheduler hyperparams' key features and use cases.
'''

In [None]:
import torch.optim.lr_scheduler as lr_scheduler

# scheduler = lr_scheduler.StepLR(optimizer)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer)

# 6. Check the resume point

In [None]:
'''
[Resume training]
I want you to act as a deep learning expert and code for me. 
Please code for loading the model weight from the checkpoint if it is exist.

==INFO===
start_epoch     = 0
total_epoch     = 1000
checkpoint_dir  = '/workspace/sunggu/0.Challenge/Med_tutorial_ChatGPT/checkpoints/230616_ResNet50'
save_dir        = '/workspace/sunggu/0.Challenge/Med_tutorial_ChatGPT/predictions/230616_ResNet50'
checkpoint_path = os.path.join(checkpoint_dir, 'checkpoint.pth')

Please conduct step by step using the following procedure.
    1. make checkpoints and prediction folders if not exist (using exist_ok=True)
    2. load model, optimizer, scheduler, start_epoch
'''

In [13]:
start_epoch     = 0
total_epoch     = 1000
checkpoint_dir  = '/workspace/sunggu/0.Challenge/Med_tutorial_ChatGPT/checkpoints/230616_ResNet50'
save_dir        = '/workspace/sunggu/0.Challenge/Med_tutorial_ChatGPT/predictions/230616_ResNet50'
checkpoint_path = os.path.join(checkpoint_dir, 'checkpoint.pth')

import os

os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(save_dir, exist_ok=True)

In [None]:
# 체크포인트 파일이 존재하는 경우 모델 가중치 로드
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    start_epoch = checkpoint['epoch'] + 1

print("모델 가중치를 체크포인트에서 성공적으로 불러왔습니다.")

In [None]:
# Optimizer Error fix...!
# for state in optimizer.state.values():
#     for k, v in state.items():
#         if torch.is_tensor(v):
#             state[k] = v.cuda()

# 7. Using the DataParallel for multi-gpu training

In [None]:
'''
[Multi-Gpu Training]
I want you to act as a deep learning expert and code for me. 
Please code for using multi-gpu training (DataParallel) of the model.
First, check the cuda if it is available.
'''

In [14]:
import torch
import torch.nn as nn
from torch.nn.parallel import DataParallel

# CUDA 사용 가능 여부 확인
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# 모델 정의
# 이미 "model"이 정의되어 있다고 가정합니다.

# 모델을 CUDA 장치로 이동
model = model.to(device)

# 멀티 GPU 학습을 위해 모델을 DataParallel로 감싸기
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
    

# 8. Metric

In [None]:
'''
[Define Metrics]
I want you to act as a deep-learning expert and code for me. 
Please def function code for metrics in binary classification for cancer detection.
Here is what I want metrics: AUC, accuracy, f1, sensitivity, specificity...
Give me a simple example of how to use it.
'''

In [3]:
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, recall_score, confusion_matrix, precision_score

def calculate_metrics(predictions, targets):
    # AUC 계산
    auc = roc_auc_score(targets, predictions)

    # 정확도 계산
    accuracy = accuracy_score(targets, np.round(predictions))

    # F1 점수 계산
    f1 = f1_score(targets, np.round(predictions))

    # 민감도 (재현율) 계산
    sensitivity = recall_score(targets, np.round(predictions))

    # 특이도 계산
    tn, fp, fn, tp = confusion_matrix(targets, np.round(predictions)).ravel()
    specificity = tn / (tn + fp)

    return auc, accuracy, f1, sensitivity, specificity

# # 예시 사용법
# predictions = [0.8, 0.3, 0.6, 0.9]
# targets = [1, 0, 0, 1]
# auc, accuracy, f1, sensitivity, specificity = calculate_metrics(predictions, targets)


# 9. Training & Validation Loop

In [None]:
'''
[Define Training & Validation Loop function]
I want you to act as a deep-learning expert and code for me. 
Please create the loop def function code for training and validation. 
In this code, you will use the AverageMeter class to calculate the average of the metrics, and the train_loop_fn function and the valid_loop_fn function to perform the training and validation process. 
In addition, the train_loop_fn function and the valid_loop_fn function will utilize a loss function (criterion) to calculate the loss, which will be used to calculate the metrics using 'calculate_metrics' function (accuracy, F1, AUC, sensitivity, and specificity).

Please conduct step by step using the following procedure.
	1. understand the AverageMeter class and how it calculates the average of the metrics.
	2. understand the structure and role of the train_loop_fn and valid_loop_fn functions.
	3. see how to utilize the loss function (criterion) to calculate loss.
	4. understand how to calculate the metrics (accuracy, F1, AUC, sensitivity, specificity) using calculate_metrics function used in the train_loop_fn and valid_loop_fn functions.
	5. train_loader, model, criterion, optimizer, device, epoch are given in train_loop_fn.
	6. valid_loader, model, criterion, device, epoch are given in valid_loop_fn.
    7. analyze the methods (update, average) of AverageMeter that are called by the train_loop_fn and valid_loop_fn functions.
	8. Implement a training and validation loop based on the given code, and check the resulting metrics.
'''

In [None]:
from collections import defaultdict
from tqdm import tqdm
import numpy as np

class AverageMeter:
    def __init__(self):
        self.reset()

    def reset(self):
        self.data = defaultdict(lambda: {'sum': 0, 'count': 0})

    def update(self, key, value, n):
        self.data[key]['sum'] += value * n
        self.data[key]['count'] += n
    
    def average(self):
        return {k: v['sum'] / v['count'] for k, v in self.data.items()}

def train_loop_fn(train_loader, model, criterion, optimizer, device, epoch):
    model.train()
    metric_logger = AverageMeter()
    # epoch_iterator = tqdm(train_loader, desc=f"Training (Epoch {epoch})")
    epoch_iterator = tqdm(train_loader, desc="Training (X / X Steps) (loss=X.X)", dynamic_ncols=True, total=len(train_loader))

    for step, batch_data in enumerate(epoch_iterator):
        image, target = batch_data
        image, target = image.to(device), target.to(device)

        logit = model(image)
        loss = criterion(logit, target)
        loss_value = loss.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        metric_logger.update(key='train_loss', value=loss_value, n=image.shape[0])
        metric_logger.update(key='lr', value=optimizer.param_groups[0]["lr"], n=1)    
        epoch_iterator.set_description("Training: Epochs %d (%d / %d Steps), (train_loss=%2.5f)" % (epoch, step, len(train_loader), loss_value))
        # epoch_iterator.set_postfix(loss=loss_value)

    # return {k: round(v, 7) for k, v in metric_logger.average().items()}
    return metric_logger.average()
    

@torch.no_grad()
def valid_loop_fn(valid_loader, model, criterion, device):
    model.eval()
    metric_logger = AverageMeter()
    # epoch_iterator = tqdm(valid_loader, desc="Validating")
    epoch_iterator = tqdm(valid_loader, desc="Validating (X / X Steps) (loss=X.X)", dynamic_ncols=True, total=len(valid_loader))

    preds = []
    gts = []
    for batch_data in epoch_iterator:
        image, target = batch_data
        image, target = image.to(device), target.to(device)

        logit = model(image)
        loss = criterion(logit, target)
        loss_value = loss.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))

        metric_logger.update(key='valid_loss', value=loss_value, n=image.shape[0])
        # epoch_iterator.set_postfix(loss=loss_value)
        epoch_iterator.set_description("Validating: Epochs %d (%d / %d Steps), (valid_loss=%2.5f)" % (epoch, step, len(valid_loader), loss_value))

        preds.append(logit.sigmoid().squeeze().detach().cpu().numpy())
        gts.append(target.squeeze().detach().cpu().numpy())

    preds = np.concatenate(preds)
    gts = np.concatenate(gts)

    # Calculate metrics
    auc, accuracy, f1, sensitivity, specificity = calculate_metrics(preds, gts)

    metric_logger.update(key='valid_loss', value=loss.item(), n=image.size(0))
    metric_logger.update(key='valid_auc', value=auc, n=image.size(0))
    metric_logger.update(key='valid_accuracy', value=accuracy, n=image.size(0))
    metric_logger.update(key='valid_f1', value=f1, n=image.size(0))
    metric_logger.update(key='valid_sensitivity', value=sensitivity, n=image.size(0))
    metric_logger.update(key='valid_specificity', value=specificity, n=image.size(0))

    return metric_logger.average()
    # return {k: round(v, 7) for k, v in metric_logger.average().items()}


In [None]:
'''
[Implementation Training & Validation]
I want you to act as a deep-learning expert and code for me. 
You are given a code that trains a model using a training loop and performs validation using a validation loop. 
The code also includes functionality for saving checkpoints, updating the learning rate scheduler, and logging the training and validation statistics. 

Please conduct step by step using the following procedure:
	Step 1: Import the necessary libraries and modules for the code, including warnings, time, and datetime.
	Step 2: Set up a warning filter to ignore any warnings that occur during execution.
	Step 3: Print a message indicating the start of the training process.
	Step 4: Start a loop that iterates over the specified number of epochs, ranging from the start_epoch to the total_epoch.
	Step 5: Within each epoch, call the train_loop_fn function to perform the training process. Pass the train_loader, model, criterion, optimizer, device, and current epoch as arguments. Store the returned training statistics in a variable.
	Step 6: Print the averaged training statistics.
	Step 7: Call the valid_loop_fn function to perform the validation process. Pass the valid_loader, model, criterion, device, and current epoch as arguments. Also, provide the save_dir where the checkpoints will be saved. Store the returned validation statistics in a variable.
	Step 8: Print the averaged validation statistics.
	Step 9: Adjust the learning rate scheduler using the valid_loss from the validation statistics.
	Step 10: Save a checkpoint of the current model, including the epoch, model_state_dict, optimizer_state_dict, and scheduler_state_dict. The checkpoint should be saved in a designated checkpoint_path with a filename that includes the epoch number.
	Step 11: Log the training and validation statistics, including the epoch, learning rate, and both train and valid statistics. The statistics should be stored in a log file located in the checkpoint_path.
	Step 12: Repeat steps 4-11 until all epochs have been completed.
	Step 13: Calculate the total training time by subtracting the start_time from the current time. Format the total time as a human-readable string.
	Step 14: Print the total training time.
'''

In [None]:
import warnings
import time
import datetime

warnings.filterwarnings(action='ignore')

print("학습 시작")
start_time = time.time()

for epoch in range(start_epoch, total_epoch):
    train_stats = train_loop_fn(train_loader, model, criterion, optimizer, device, epoch)
    print("==> 평균 학습 통계: " + str(train_stats))
    
    valid_stats = valid_loop_fn(valid_loader, model, criterion, device, epoch, save_dir)
    print("==> 평균 검증 통계: " + str(valid_stats))

    # 학습률 스케줄러 업데이트
    scheduler.step(valid_stats['valid_loss'])

    # 체크포인트 저장
    checkpoint_path = f"{save_dir}/epoch_{epoch}_checkpoint.pth"
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.module.state_dict() if hasattr(model, 'module') else model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
    }, checkpoint_path)

    # 로그 기록
    log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, 
                 **{f'valid_{k}': v for k, v in valid_stats.items()}, 
                 'epoch': epoch,
                 'lr': optimizer.param_groups[0]['lr']}
      
    with open(f"{save_dir}/log.txt", "a") as f:
        f.write(json.dumps(log_stats) + "\n")

# 학습 완료
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('학습 시간: {}'.format(total_time_str))
