### Class 3,7 학습

In [45]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

from torch.utils.data import random_split
from sklearn.model_selection import KFold
from torch import nn, optim

In [46]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [47]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [48]:
class MixUp:
    def __init__(self, alpha=1.0):
        self.alpha = alpha

    def mixup_data(self, x, y):
        if self.alpha > 0:
            lam = np.random.beta(self.alpha, self.alpha)
        else:
            lam = 1

        batch_size = x.size()[0]
        index = torch.randperm(batch_size)
        mixed_x = lam * x + (1 - lam) * x[index, :]
        y_a, y_b = y, y[index]
        return mixed_x, y_a, y_b, lam

    def mixup_loss(self, loss_fn, pred, labels_a, labels_b, lam):
        return lam * loss_fn(pred, labels_a) + (1 - lam) * loss_fn(pred, labels_b)

In [49]:
import numpy as np
import torch
import random

def cutmix_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size)

    # Randomly select the bounding box for CutMix
    width, height = x.size()[2], x.size()[3]
    cut_ratio = np.sqrt(1. - lam)
    cut_width = int(width * cut_ratio)
    cut_height = int(height * cut_ratio)

    # Randomly select the position of the bounding box
    cx = np.random.randint(width)
    cy = np.random.randint(height)

    # Ensure bounding box coordinates are within image dimensions
    bb_x1 = np.clip(cx - cut_width // 2, 0, width)
    bb_x2 = np.clip(cx + cut_width // 2, 0, width)
    bb_y1 = np.clip(cy - cut_height // 2, 0, height)
    bb_y2 = np.clip(cy + cut_height // 2, 0, height)

    # Create the CutMix images
    mixed_x = x.clone()
    mixed_x[:, :, bb_y1:bb_y2, bb_x1:bb_x2] = x[index, :, bb_y1:bb_y2, bb_x1:bb_x2]

    # Return mixed images and corresponding labels
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def cutmix_loss(loss_fn, pred, labels_a, labels_b, lam):
    return lam * loss_fn(pred, labels_a) + (1 - lam) * loss_fn(pred, labels_b)

In [50]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

def train_one_epoch(loader, model, optimizer, loss_fn, device, epoch, mixup=None, cutmix=False):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        # Reset gradients
        optimizer.zero_grad(set_to_none=True)

        # Determine if MixUp or CutMix should be applied
        if epoch == 2 and mixup is not None:  # Apply MixUp in the third epoch (index 2)
            mixed_image, targets_a, targets_b, lam = mixup.mixup_data(image, targets)
            preds = model(mixed_image)
            loss = mixup.mixup_loss(loss_fn, preds, targets_a, targets_b, lam)
        
        elif cutmix:  # Apply CutMix if it's enabled
            mixed_image, targets_a, targets_b, lam = cutmix_data(image, targets)
            preds = model(mixed_image)
            loss = cutmix_loss(loss_fn, preds, targets_a, targets_b, lam)
        
        else:  # Regular training for other epochs
            preds = model(image)
            loss = loss_fn(preds, targets)

        # Backpropagation and optimization step
        loss.backward()
        optimizer.step()

        # Update metrics
        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    # Calculate average loss and metrics
    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret


In [51]:
df = pd.read_csv('/root/CV_PJT/CV_PJT/code/trin4.csv')
sample_submission_df = pd.read_csv("/root/CV_PJT/CV_PJT/data/data/sample_submission.csv")

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [None]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model config
model_name = 'efficientnet_b3'

# training config
img_size = 260
LR = 1e-4
EPOCHS = 10
BATCH_SIZE = 32
num_workers = 0
log_interval = 100

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [53]:
import albumentations as A
from albumentations.core.transforms_interface import ImageOnlyTransform
from albumentations.pytorch import ToTensorV2
from augraphy import *
from augraphy.base.augmentationsequence import AugmentationSequence
from augraphy.augmentations import *

import torch
from torch.optim.lr_scheduler import StepLR

In [54]:
from augraphy import AugmentationSequence, InkBleed, ColorPaper, Markup, Scribbles, BleedThrough

In [55]:

# Define AugraphyTransform as before
class AugraphyTransform(ImageOnlyTransform):
    def __init__(self, p=0.7, always_apply=False):
        super(AugraphyTransform, self).__init__(p=p, always_apply=always_apply)
        # Define the Augraphy pipeline with desired augmentations
        self.pipeline = AugmentationSequence([
            # Ink bleed effect
            InkBleed(intensity_range=(0.1, 0.2), kernel_size=(3, 5), severity=(0.4, 0.6), p=0.5),
            # Paper color changes
            ColorPaper(hue_range=(-10, 10), saturation_range=(-30, 30), p=0.5),
            # Markup
            Markup(num_lines_range=(2, 5), p=0.5),
            # Scribbles
            Scribbles(scribbles_type="random", scribbles_ink="random", scribbles_location="random",
                      scribbles_size_range=(250, 600), scribbles_count_range=(1, 6), p=0.5),
            # BleedThrough effect
            BleedThrough(intensity_range=(0.1, 0.2), offsets=(10, 20), p=0.5),
        ])

    def apply(self, img, **params):
        if not isinstance(img, np.ndarray):
            img = np.array(img)
        augmented_image = self.pipeline(img)[0]
        return augmented_image

In [56]:
# Define transformations with and without AugraphyTransform
def get_transform(with_augraphy=True):
    transforms = [
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]
    if with_augraphy:
        transforms.insert(4, AugraphyTransform(p=0.7))  # Insert AugraphyTransform before normalization
    return A.Compose(transforms)

In [57]:
# Initial transformations with AugraphyTransform
trn_transform = get_transform(with_augraphy=True)
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [58]:
# Dataset definition
full_dataset = ImageDataset(
    "/root/CV_PJT/CV_PJT/code/trin4.csv",
    "/root/CV_PJT/CV_PJT/data/data/train",
    transform=trn_transform
)

# Calculate the total number of samples in the dataset
dataset_size = len(full_dataset)

# Define the ratios for training and validation
train_ratio = 0.8  # Use 80% of the data for training
val_ratio = 1 - train_ratio  # Remaining 20% for validation

# Calculate the number of samples for training and validation
train_size = int(train_ratio * dataset_size)
val_size = dataset_size - train_size  # Ensure all samples are accounted for

# Split the dataset into training and validation sets
trn_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Define the test dataset
tst_dataset = ImageDataset(
    "/root/CV_PJT/CV_PJT/data/data/sample_submission.csv",
    "/root/CV_PJT/CV_PJT/data/data/test",
    transform=tst_transform
)

# Print the sizes of the datasets
print("Training dataset size:", len(trn_dataset))
print("Validation dataset size:", len(val_dataset))
print("Test dataset size:", len(tst_dataset))

Training dataset size: 295232
Validation dataset size: 73808
Test dataset size: 3140


In [59]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [60]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)
# scheduler = StepLR(optimizer, step_size=5, gamma=0.1)  # e.g., reduce LR by a factor every 5 epochs
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

scheduler.step(0)  # Apply the initial learning rate adjustment if needed

In [61]:
# for epoch in range(EPOCHS):
#     # Change the training transform to exclude Augraphy after the first epoch
#     if epoch == 1:
#         trn_dataset.transform = get_transform(with_augraphy=False)

#     # Determine if MixUp or CutMix should be applied
#     if epoch == 1:
#         ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, epoch, mixup=MixUp)
#     elif epoch == 2:
#         ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, epoch, cutmix=True)
#     else:
#         ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, epoch)
    
    
#     # Log the results
#     log = f"Epoch: {epoch}\n"
#     for k, v in ret.items():
#         log += f"{k}: {v:.4f}\n"
#     print(log)

#     # Step the scheduler
#     scheduler.step()

In [66]:
# Define the total number of epochs you want to run
EPOCHS = 100  # Adjust this as necessary for your training

# Continue your training loop from start_epoch to EPOCHS
for epoch in range(EPOCHS):
    if epoch == 0:
        trn_dataset.transform = get_transform(with_augraphy=True)
        
    # 데이터셋 epoch 업데이트
        trn_dataset.epoch = epoch
        
        # 세 번째 에포크부터는 데이터의 30%만 사용
    if epoch >= 3:
        indices = list(range(len(trn_dataset)))
        random.shuffle(indices)
        subset_size = int(0.5 * len(trn_dataset))
        subset_indices = indices[:subset_size]
        current_dataset = Subset(trn_dataset, subset_indices)
    else:
        current_dataset = trn_dataset
            
    trn_loader = DataLoader(
        current_dataset, 
        batch_size=BATCH_SIZE, 
        shuffle=True,
        num_workers=0,
        pin_memory=True
    )

    # Determine if MixUp or CutMix should be applied
    if epoch == 1:
        ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, epoch, mixup=MixUp)
    elif epoch == 2:
        ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, epoch, cutmix=True)
    else:
        ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, epoch)

        # 결과 로깅
        log = f"Epoch: {epoch}\n"
        for k, v in ret.items():
            log += f"{k}: {v:.4f}\n"
        print(log)

        scheduler.step()

  0%|          | 0/9226 [00:00<?, ?it/s]




error: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/color.simd_helpers.hpp:92: error: (-2:Unspecified error) in function 'cv::impl::{anonymous}::CvtHelper<VScn, VDcn, VDepth, sizePolicy>::CvtHelper(cv::InputArray, cv::OutputArray, int) [with VScn = cv::impl::{anonymous}::Set<3, 4>; VDcn = cv::impl::{anonymous}::Set<3>; VDepth = cv::impl::{anonymous}::Set<0, 5>; cv::impl::{anonymous}::SizePolicy sizePolicy = cv::impl::<unnamed>::NONE; cv::InputArray = const cv::_InputArray&; cv::OutputArray = const cv::_OutputArray&]'
> Invalid number of channels in input image:
>     'VScn::contains(scn)'
> where
>     'scn' is 260


In [None]:
import torch

folder_path = "/root/CV_PJT/CV_PJT/model" # 저장 폴더 경로 수정
os.makedirs(folder_path, exist_ok=True) 
model_save_path  = os.path.join(folder_path, "entire_model_resnet101_all.pth") # 파일명 수정

# At the end of your training loop
torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    # Add any other information you want to save, like loss or metrics
}, model_save_path)

# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [None]:
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:16<00:00,  6.10it/s]


In [None]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [None]:
sample_submission_df = pd.read_csv("/root/CV_PJT/CV_PJT/data/data/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [None]:
pred_df.to_csv("pred_21.csv", index=False)

In [None]:
len(pred_df)

3140