In [14]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

from torch.utils.data import random_split

In [15]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [16]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [17]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

In [18]:
def mixup_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_loss(loss_fn, pred, labels_a, labels_b, lam):
    return lam * loss_fn(pred, labels_a) + (1 - lam) * loss_fn(pred, labels_b)

In [19]:
# def train_per_epoch(epoch, model, train_loader, loss_fn, optimizer, train_step):
#     preds_list = []
#     labels_list = []

#     for batch_idx, (images, labels) in enumerate(train_loader):
#         images, labels = images.to(device), labels.to(device)

#         current_loss = 0.0

#         if (batch_idx + 1) % 5 == 0:
#             imgs, labels_a, labels_b, lambda_ = mixup_data(images, labels)
#             output = model(imgs)
#             loss = mixup_loss(loss_fn=loss_fn, pred=output, labels_a=labels_a, labels_b=labels_b, lam=lambda_)
#         else: 
#             outputs = model(images)
#             _, preds = torch.max(outputs, 1)
#             loss = loss_fn(outputs, labels)

#             preds_list.extend(preds.detach().cpu().numpy())
#             labels_list.extend(labels.detach().cpu().numpy())

#         # Back propagation
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         current_loss += loss.item()

#         if (batch_idx + 1) % log_interval == 0:
#             train_loss = current_loss / log_interval
#             train_acc = accuracy_score(labels_list, preds_list)
#             train_f1 = f1_score(labels_list, preds_list, average='macro')

#             print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}, Train Acc: {:.4f}, Train F1 Score: {:.4f}".format(
#                 epoch, 
#                 batch_idx * len(images), 
#                 len(train_loader) * len(images), 
#                 100 * batch_idx / len(train_loader), 
#                 train_loss, 
#                 train_acc,
#                 train_f1))

#         train_step += 1

#     return train_step, current_loss / len(train_loader), accuracy_score(labels_list, preds_list), f1_score(labels_list, preds_list, average='macro')
    
# def validation(model, val_loader, loss_fn, train_step):
#     val_preds_list = []
#     val_labels_list = []
#     with torch.no_grad():
#         val_loss = 0.0
#         model.eval()

#         for val_idx, (val_images, val_labels) in enumerate(val_loader):
#             val_images, val_labels = val_images.to(device), val_labels.to(device)

#             val_outputs = model(val_images)
#             _, val_preds = torch.max(val_outputs, 1)

#             val_loss += loss_fn(val_outputs, val_labels) / val_outputs.shape[0]
            
#             val_preds_list.extend(val_preds.detach().cpu().numpy())
#             val_labels_list.extend(val_labels.detach().cpu().numpy())

#         val_epoch_loss = val_loss / len(val_loader)
#         val_epoch_acc = accuracy_score(val_labels_list, val_preds_list)
#         val_epoch_f1 = f1_score(val_labels_list, val_preds_list, average='macro')
        
#         print("Validation dataset: Val Loss: {:.6f}, Val Acc: {:.4f}, Val F1 Score: {:.4f}".format(val_epoch_loss, val_epoch_acc, val_epoch_f1))
#     return val_epoch_loss, val_epoch_acc, val_epoch_f1

In [20]:
df = pd.read_csv('/root/CV_PJT/CV_PJT/code/train3.csv')
sample_submission_df = pd.read_csv("/root/CV_PJT/CV_PJT/data/data/sample_submission.csv")

In [21]:
df_train, df_val = train_test_split(df, test_size=0.2, shuffle=True, random_state=42)
print(len(df_train), len(df_val))

118387 29597


## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [22]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = 'datasets_fin/'

# model config
model_name = 'efficientnet_b4'

# training config
img_size = 260
LR = 1e-4
EPOCHS = 100
BATCH_SIZE = 32
num_workers = 0
log_interval = 100

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [23]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 이미지 크기 조정
    A.Resize(height=img_size, width=img_size),
    # images normalization
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # numpy 이미지나 PIL 이미지를 PyTorch 텐서로 변환
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [24]:
# Dataset definition
full_dataset = ImageDataset(
    "/root/CV_PJT/CV_PJT/train2.csv",
    "/root/CV_PJT/CV_PJT/data/data/train",
    transform=trn_transform
)

# Calculate the total number of samples in the dataset
dataset_size = len(full_dataset)

# Define the ratios for training and validation
train_ratio = 0.8  # Use 80% of the data for training
val_ratio = 1 - train_ratio  # Remaining 20% for validation

# Calculate the number of samples for training and validation
train_size = int(train_ratio * dataset_size)
val_size = dataset_size - train_size  # Ensure all samples are accounted for

# Split the dataset into training and validation sets
trn_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Define the test dataset
tst_dataset = ImageDataset(
    "/root/CV_PJT/CV_PJT/data/data/sample_submission.csv",
    "/root/CV_PJT/CV_PJT/data/data/test",
    transform=tst_transform
)

# Print the sizes of the datasets
print("Training dataset size:", len(trn_dataset))
print("Validation dataset size:", len(val_dataset))
print("Test dataset size:", len(tst_dataset))

Training dataset size: 118387
Validation dataset size: 29597
Test dataset size: 3140


In [25]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [26]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)

model.safetensors:   0%|          | 0.00/77.9M [00:00<?, ?B/s]

In [27]:
for epoch in range(EPOCHS):
    ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device)
    ret['epoch'] = epoch

    log = ""
    for k, v in ret.items():
      log += f"{k}: {v:.4f}\n"
    print(log)

Loss: 0.1514: 100%|██████████| 3700/3700 [17:08<00:00,  3.60it/s]


train_loss: 0.2999
train_acc: 0.8969
train_f1: 0.8963
epoch: 0.0000



Loss: 0.0014: 100%|██████████| 3700/3700 [17:08<00:00,  3.60it/s]


train_loss: 0.0340
train_acc: 0.9884
train_f1: 0.9884
epoch: 1.0000



Loss: 0.0001: 100%|██████████| 3700/3700 [17:05<00:00,  3.61it/s]


train_loss: 0.0114
train_acc: 0.9962
train_f1: 0.9962
epoch: 2.0000



Loss: 0.0001: 100%|██████████| 3700/3700 [17:06<00:00,  3.60it/s]


train_loss: 0.0073
train_acc: 0.9974
train_f1: 0.9974
epoch: 3.0000



Loss: 0.0000:  15%|█▍        | 554/3700 [02:34<14:35,  3.59it/s]


KeyboardInterrupt: 

# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [28]:
preds_list = []

model.eval()
for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:17<00:00,  5.78it/s]


In [29]:
pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [30]:
sample_submission_df = pd.read_csv("/root/CV_PJT/CV_PJT/data/data/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [31]:
pred_df.to_csv("pred_8.csv", index=False)

In [19]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,7
4,00901f504008d884.jpg,2
