In [1]:
import os
import gc
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import pytorch_lightning as L

from glob import glob
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from torchvision.io import read_image
from torchvision.transforms import v2 as  transforms
from torch.utils.data import Dataset, DataLoader
from transformers import Swinv2Config, Swinv2Model, AutoImageProcessor, AutoModelForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.optim.lr_scheduler import ReduceLROnPlateau
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2

2024-04-20 12:12:51.826616: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-20 12:12:51.826707: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-20 12:12:51.924096: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
device_ids = [i for i in range(torch.cuda.device_count())]
print(f"Using {len(device_ids)} GPUs")

Using 2 GPUs


In [3]:
import torch # 파이토치 
import random
import numpy as np
import os

# 시드값 고정
def seed_everything(seed):
    random.seed(seed) ##random module의 시드 고정
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]= "0,1"
    os.environ['PYTHONHASHSEED'] = str(seed) #해시 함수의 랜덤성 제어, 자료구조 실행할 때 동일한 순서 고정
    np.random.seed(seed) #numpy 랜덤 숫자 일정
    torch.manual_seed(seed) # torch라이브러리에서 cpu 텐서 생성 랜덤 시드 고정
    torch.cuda.manual_seed(seed) # cuda의 gpu텐서에 대한 시드 고정
    torch.backends.cudnn.deterministic = True # 백엔드가 결정적 알고리즘만 사용하도록 고정 
    torch.backends.cudnn.benchmark = True # CuDNN이 여러 내부 휴리스틱을 사용하여 가장 빠른 알고리즘 동적으로 찾도록 설정

In [4]:
seed_everything(42)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cuda')

In [6]:
train_df = pd.read_csv('/kaggle/input/brid-image/train.csv')
train_df['img_path'] = train_df['img_path'].apply(lambda x: os.path.join('/kaggle/input/brid-image', x))
train_df['upscale_img_path'] = train_df['upscale_img_path'].apply(lambda x: os.path.join('/kaggle/input/brid-image', x))
le = LabelEncoder()
train_df['class'] = le.fit_transform(train_df['label'])

In [7]:
if not len(train_df) == len(os.listdir('/kaggle/input/brid-image/train')):
    raise ValueError()

In [8]:
train_df

Unnamed: 0,img_path,upscale_img_path,label,class
0,/kaggle/input/brid-image/./train/TRAIN_00000.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Ruddy Shelduck,19
1,/kaggle/input/brid-image/./train/TRAIN_00001.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Gray Wagtail,9
2,/kaggle/input/brid-image/./train/TRAIN_00002.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Indian Peacock,13
3,/kaggle/input/brid-image/./train/TRAIN_00003.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Common Kingfisher,3
4,/kaggle/input/brid-image/./train/TRAIN_00004.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Common Kingfisher,3
...,...,...,...,...
15829,/kaggle/input/brid-image/./train/TRAIN_15829.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Common Myna,4
15830,/kaggle/input/brid-image/./train/TRAIN_15830.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Common Kingfisher,3
15831,/kaggle/input/brid-image/./train/TRAIN_15831.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Cattle Egret,2
15832,/kaggle/input/brid-image/./train/TRAIN_15832.jpg,/kaggle/input/brid-image/./upscale_train/TRAIN...,Ruddy Shelduck,19


In [9]:
SEED = 42
N_SPLIT = 5
BATCH_SIZE = 24

In [10]:
skf = StratifiedKFold(n_splits=N_SPLIT, random_state=SEED, shuffle=True)

## 데이터셋 정의

In [11]:
import cv2
from torch.utils.data import Dataset # 데이터 생성을 위한 클래스
import numpy as np

class ImageDataset(Dataset):
    # 초기화 메서드(생성자)
    def __init__(self, df, path_col,  mode='train',transform=None):
        super().__init__() # 상속받은 Dataset의 __init__() 메서드 호출
        self.df = df
        self.path_col = path_col
        self.mode = mode
        self.transform = transform
        
    # 데이터셋 크기 반환 메서드 
    def __len__(self):
        return len(self.df)
    
    # 인덱스(idx)에 해당하는 데이터 반환 메서드
    def __getitem__(self, idx):
        if self.mode == 'train':
            row = self.df.iloc[idx]   # 이미지 ID
            img_path = row[self.path_col]
            image = cv2.imread(img_path)              # 이미지 파일 읽기
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 이미지 색상 보정
            label = row['class']
            # 이미지 변환 
            if self.transform is not None:
                image = self.transform(image=image)['image']
            # 테스트 데이터면 이미지 데이터만 반환, 그렇지 않으면 타깃값도 반환 
                return image,label
            else:
                return image
        
        elif self.mode == 'val':
            row = self.df.iloc[idx]   # 이미지 ID
            img_path = row[self.path_col]
            image = cv2.imread(img_path)              # 이미지 파일 읽기
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 이미지 색상 보정
            label = row['class']
            
            if self.transform is not None:
                image = self.transform(image=image)['image']
            # 테스트 데이터면 이미지 데이터만 반환, 그렇지 않으면 타깃값도 반환 
                return image,label
            else:
                return image

In [12]:
train_transform = A.Compose([
    A.Resize(256, 256, interpolation=cv2.INTER_CUBIC),  # 이미지 크기를 조정
    A.RandomBrightnessContrast(brightness_limit=0.2, # 밝기 대비 조절
                               contrast_limit=0.2, p=0.3),
    A.VerticalFlip(p=0.2),    # 상하 대칭 변환
    A.HorizontalFlip(p=0.5),  # 좌우 대칭 변환 
    A.ShiftScaleRotate(       # 이동, 스케일링, 회전 변환
        shift_limit=0.1,
        scale_limit=0.2,
        rotate_limit=30, p=0.3),
    A.OneOf([A.Emboss(p=1),   # 양각화, 날카로움, 블러 효과
             A.Sharpen(p=1),
             A.Blur(p=1)], p=0.3),
    A.PiecewiseAffine(p=0.3), # 어파인 변환 
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225),max_pixel_value=255.0, always_apply=False, p=1.0),  # 정규화
    ToTensorV2()   
])

# 검증용 변환은 데이터 증강을 포함하지 않고 기본 전처리만 포함합니다.
val_transform = A.Compose([
    A.Resize(256, 256, interpolation=cv2.INTER_CUBIC),  # 이미지 크기를 조정
#     transforms.CenterCrop(128),  # 중앙에서 크롭
              # 텐서로 변환
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225),max_pixel_value=255.0, always_apply=False, p=1.0),  # 정규화
    ToTensorV2()   
])

In [13]:
img_dir = 'img_path'

dataset_train = ImageDataset(train_df, path_col=img_dir, transform=train_transform)
dataset_valid = ImageDataset(train_df, path_col=img_dir, transform=val_transform)

In [14]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
    
g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x78d13f916110>

In [15]:
class CustomModel(nn.Module):
    def __init__(self, model):
        super(CustomModel, self).__init__()
        self.model = model
        self.clf = nn.Sequential(
            nn.Tanh(),
            nn.LazyLinear(25),
        )
        

#     @torch.compile
    def forward(self, x):
        x = self.model(x).pooler_output
        x = self.clf(x)
        return x

In [16]:
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

early_stopping = EarlyStopping(
    monitor='val_macro_f1',
    min_delta=0.01,
    patience=3,
    verbose=True,
    mode='max'
)

In [17]:
learning_rate = 8e-6
num_epochs = 3

In [18]:
import torch.nn as nn # 신경망 모듈

criterion = nn.CrossEntropyLoss()

In [19]:
from transformers import get_cosine_schedule_with_warmup

# 스케줄러 생성

In [20]:
import torch


class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, adaptive=False, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, adaptive=adaptive, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups
        self.defaults.update(self.base_optimizer.defaults)

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                self.state[p]["old_p"] = p.data.clone()
                e_w = (torch.pow(p, 2) if group["adaptive"] else 1.0) * p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.data = self.state[p]["old_p"]  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                    torch.stack([
                        ((torch.abs(p) if group["adaptive"] else 1.0) * p.grad).norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm

    def load_state_dict(self, state_dict):
        super().load_state_dict(state_dict)
        self.base_optimizer.param_groups = self.param_groups

In [None]:
for fold_idx, (train_index, val_index) in enumerate(skf.split(train_df, train_df['class'])):

    train_fold_df = train_df.loc[train_index,:]
    val_fold_df = train_df.loc[val_index,:]
    
    dataset_train = ImageDataset(train_fold_df, path_col=img_dir, transform=train_transform)
    dataset_valid = ImageDataset(val_fold_df, path_col=img_dir, transform=val_transform)
    print(dataset_train,dataset_valid)
    
    loader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, 
                          shuffle=True, worker_init_fn=seed_worker,
                          generator=g, num_workers=2)
    loader_valid = DataLoader(dataset_valid, batch_size=BATCH_SIZE, 
                          shuffle=False, worker_init_fn=seed_worker,
                          generator=g, num_workers=2)
    
    
    
    model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    model = CustomModel(model).cuda()
    
    
    model.to(device)
    dummy_input = torch.randn(1, 3, 224, 224).to(device)
    outputs = model(dummy_input)
    
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
      # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model,device_ids=device_ids).to(device)
    
    print(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    criterion = torch.nn.CrossEntropyLoss()
    
    scheduler = get_cosine_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=len(loader_train)//10, 
                                            num_training_steps=len(loader_train)*num_epochs)
    
    checkpoint_callback = ModelCheckpoint(
    monitor='val_macro_f1',
    mode='max',
    dirpath='./kaggle/working/',
    filename=f'swinv2-large-resize-fold_idx={fold_idx}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
    save_top_k=1,
    save_weights_only=False,
    verbose=True
)
    best_macro_f1=0
    
    for epoch in range(num_epochs):
        
        model.train()
        train_loss = 0
        if epoch == 0:
            checkpoint_callback.on_validation_epoch_end(epoch, {'val_loss': np.Inf, 'val_macro_f1': 0})
        
        # train_loader 반복 시 tqdm 적용
        train_loop = tqdm(loader_train, desc=f"Epoch {epoch+1}/{num_epochs} - Training", leave=False)
        for images, labels in train_loop:
            images = images.float().to(device)
            labels = labels.long().to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            train_loss += loss.item()
            preds = outputs.argmax(dim=1)
            train_loop.set_postfix(loss=loss.item())
            
        train_loss /= len(loader_train)
        print(f'에폭 [{epoch+1}/{num_epochs}] - 훈련 데이터 손실값 : {train_loss:.4f}')    
        
        # == [ 검증 ] ==============================================
        model.eval()
        val_loss = 0
        y_true = []
        y_pred = []
        
        
        with torch.no_grad():
            val_loop = tqdm(loader_valid, desc=f"Epoch {epoch+1}/{num_epochs} - Training", leave=False)
            for images, labels in val_loop:
                images = images.float().to(device)
                labels = labels.long().to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)


                val_loss += loss.item()

                preds = outputs.argmax(dim=1)
                val_loop.set_postfix(loss=loss.item())


                y_true.extend(labels.cpu().numpy())
                y_pred.extend(preds.cpu().numpy())

        val_loss /= len(loader_valid)

        macro_f1 = f1_score(y_true, y_pred, average='macro')

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, val loss: {val_loss:.4f}, val_macro_f1: {macro_f1:.4f}')
    
        early_stopping.on_validation_epoch_end(epoch, {'val_loss': val_loss, 'val_macro_f1': macro_f1})
        checkpoint_callback.on_validation_epoch_end(epoch, {'val_loss': val_loss, 'val_macro_f1': macro_f1})
        
        if macro_f1>best_macro_f1:
            best_model_path = f'/kaggle/working/swinv2-large-resize-fold_idx={fold_idx}'+ f'-{epoch:2d}-{train_loss:.4f}-{macro_f1:.4f}'
            torch.save(model.state_dict(), best_model_path)
            best_macro_f1 = macro_f1
    
    model.cpu()
    del model
    gc.collect()
    torch.cuda.empty_cache()

<__main__.ImageDataset object at 0x78d13e3d3280> <__main__.ImageDataset object at 0x78d13e3d36a0>


config.json:   0%|          | 0.00/69.9k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/787M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


Let's use 2 GPUs!
cuda


Epoch 1/3 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

In [None]:
model.cpu()
del model
gc.collect()
torch.cuda.empty_cache()