In [1]:
import os
import time

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn

from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from torch.optim.lr_scheduler import CosineAnnealingLR

from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

import matplotlib.pyplot as plt
import seaborn as sns

import wandb

INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.13 (you have 1.4.12). Upgrade using: pip install -U albumentations. To disable automatic update checks, set the environment variable NO_ALBUMENTATIONS_UPDATE to 1.


In [4]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# PATH
TRAIN_AUG_CSV_PATH = '/upstage-cv-classification-cv2/data/train_aug_gaussian.csv'
SEMI_SUPERVISED_CSV_PATH = '/upstage-cv-classification-cv2/data/SemiSupervised_train.csv'
TRAIN_AUG_IMAGE_PATH = '/upstage-cv-classification-cv2/data/train_aug_gaussian'

VALID_CSV_PATH = '/upstage-cv-classification-cv2/data/valid.csv'
VALID_IMAGE_PATH = '/upstage-cv-classification-cv2/data/valid'

TEST_CSV_PATH = '/upstage-cv-classification-cv2/data/sample_submission.csv'
TEST_IMAGE_PATH = '/upstage-cv-classification-cv2/data/test'

RESULT_CSV_PATH = '/upstage-cv-classification-cv2'

WANDB_PROJECT_NAME = 'cv_competition_effi04'

# HyperParameter

In [3]:
# training config
img_size = 380
LR = 1e-3
EPOCHS = 100
BATCH_SIZE = 32
num_workers = 0

patience = 5
min_delta = 0.001 # 성능 개선의 최소 변화량

# 1. DATA LOAD

In [10]:
semi_train_df = pd.read_csv(SEMI_SUPERVISED_CSV_PATH)
train_agu_df = pd.read_csv(TRAIN_AUG_CSV_PATH)
train_df = pd.read_csv('/upstage-cv-classification-cv2/data/train.csv')

only_test = semi_train_df[~semi_train_df['ID'].isin(train_df["ID"])]

print(len(only_test[only_test['ID'].isin(train_df['ID'])]))


0


In [5]:
# test image 변환
data_transform = A.Compose([
    A.Resize(height = img_size, width = img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
    ToTensorV2()
])

class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image = img)['image']
    
        return img, target

    def get_labels(self):
        return self.df[:, 1] 

trn_dataset = ImageDataset(
    TRAIN_AUG_CSV_PATH,
    TRAIN_AUG_IMAGE_PATH,
    transform = data_transform
)

val_dataset = ImageDataset(
    VALID_CSV_PATH,
    VALID_IMAGE_PATH,
    transform = data_transform
)

tst_dataset = ImageDataset(
    TEST_CSV_PATH,
    TEST_IMAGE_PATH,
    transform = data_transform
)

labels = trn_dataset.get_labels()
labels = labels.astype(int)

# DataLoader
trn_loader = DataLoader(
    trn_dataset,
    batch_size = BATCH_SIZE,
    shuffle = True,
    num_workers = num_workers,
    pin_memory = True,
    drop_last = False
)

val_loader = DataLoader(
    val_dataset,
    batch_size = BATCH_SIZE,
    num_workers = 0,
    pin_memory = True,
    drop_last = False
)

tst_loader = DataLoader(
    tst_dataset,
    batch_size = BATCH_SIZE,
    shuffle = False,
    num_workers = 0,
    pin_memory = True
)

print(len(trn_dataset), len(tst_dataset))

37680 3140


# 2. Model Train

In [6]:
torch.cuda.empty_cache()

In [7]:
# model
model = timm.create_model('efficientnet_b4',
                        pretrained=True,
                        num_classes = 17).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr = LR)


INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/efficientnet_b4.ra2_in1k)
INFO:timm.models._hub:[timm/efficientnet_b4.ra2_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
INFO:timm.models._builder:Missing keys (classifier.weight, classifier.bias) discovered while loading pretrained weights. This is expected if model is being adapted.


In [8]:
def valid_one_epoch(loader, model, loss_fn, device, epoch):
    model.eval()
    valid_loss = 0

    preds_list =[]
    targets_list = []

    with torch.no_grad():
        pbar = tqdm(loader)
        for step, (image, targets) in enumerate(pbar):
            image = image.to(device)
            targets = targets.to(device)

            preds = model(image)
            loss = loss_fn(preds, targets)
       
            valid_loss += loss.item()
        
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())

            pbar.set_description(f"Loss: {loss.item():.4f}")

            wandb.log({
                "valid_step" : epoch * len(loader) + step,
                "valid_loss_step" : loss.item()
            })

    valid_loss /= len(loader)
    valid_acc = accuracy_score(targets_list, preds_list)
    valid_f1 = f1_score(targets_list, preds_list, average = 'macro')

    ret = {
        "epoch" : epoch,
        "valid_loss" : valid_loss,
        "valid_acc" : valid_acc,
        "valid_f1" : valid_f1
    }

    wandb.log({
        "valid_epoch" : epoch,
        "val_loss_epoch" : valid_loss,
        "val_acc" : valid_acc,
        "val_f1" : valid_f1
    })

    return ret

In [9]:
# one epoch 학습
def train_one_epoch(train_loader, model, optimizer, loss_fn, device, epoch):
    model.train()
    train_loss = 0
    preds_list =[]
    targets_list = []

    pbar = tqdm(train_loader)
    for step, (image, targets) in enumerate(pbar):
        image = image.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none = True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

        wandb.log({
            "train_step" : epoch * len(train_loader) + step,
            "train_loss_step" : loss.item()
        })
        
    train_loss /= len(train_loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average = 'macro')

    ret = {
        "model" : model,
        "train_epoch" : epoch,
        "train_loss" : train_loss,
        "tarin_acc" : train_acc,
        "train_f1" : train_f1
    }

    wandb.log({
        "train_epoch" : epoch,
        "train_loss_epoch" : train_loss,
        "train_acc" : train_acc,
        "train_f1" : train_f1
    })

    return ret

In [10]:
os.environ['WANDB_SILENT'] = 'true'

f1_scores = []
valid_losses = []
trained_models = []
patience_counter = 0
best_loss = 1


wandb.init(project=WANDB_PROJECT_NAME, name="effi_b0")

for epoch in range(EPOCHS):
    print(f"{epoch} epoch")
    trn_ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, epoch)
    val_ret =  valid_one_epoch(val_loader, model, loss_fn, device, epoch)

    f1_scores.append(val_ret['valid_f1'])
    valid_losses.append(val_ret['valid_loss'])
    trained_models.append(trn_ret['model'])

    print(f"valid loss : {val_ret['valid_loss']}")
    print(f"valid f1 : {val_ret['valid_f1']}")

    # 성능 개선 됨
    if val_ret['valid_loss'] < best_loss - min_delta:
        print(f"성능 개선 됨 : {val_ret['valid_loss']} > {best_loss - min_delta}")
        best_loss = val_ret['valid_loss']
        patience_counter = 0  
        
    # 성능 개선 되지 않음
    else:
        patience_counter += 1  
        print(f"성능 개선 안됨 : {val_ret['valid_loss']} > {best_loss - min_delta}")
        print(f"patience counter : {patience_counter}")

    # 성능 개선이 patience 만큼 안되면 학습 중단
    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch}")
        break

best_model_idx = np.argmax(np.array(f1_scores))
best_model = trained_models[best_model_idx]

wandb.finish()

0 epoch


Loss: 0.1218: 100%|██████████| 1178/1178 [07:48<00:00,  2.51it/s]
Loss: 0.2612: 100%|██████████| 10/10 [00:01<00:00,  6.21it/s]


valid loss : 0.4637957789003849
valid f1 : 0.8728892570729305
성능 개선 됨 : 0.4637957789003849 > 0.999
1 epoch


Loss: 0.0172: 100%|██████████| 1178/1178 [07:56<00:00,  2.47it/s]
Loss: 0.2418: 100%|██████████| 10/10 [00:01<00:00,  6.16it/s]


valid loss : 0.5758982695639133
valid f1 : 0.88214331353974
성능 개선 안됨 : 0.5758982695639133 > 0.4627957789003849
patience counter : 1
2 epoch


Loss: 0.0087: 100%|██████████| 1178/1178 [07:39<00:00,  2.57it/s]
Loss: 0.3094: 100%|██████████| 10/10 [00:01<00:00,  6.66it/s]


valid loss : 0.4918346792459488
valid f1 : 0.8880051081326837
성능 개선 안됨 : 0.4918346792459488 > 0.4627957789003849
patience counter : 2
3 epoch


Loss: 0.2238: 100%|██████████| 1178/1178 [07:38<00:00,  2.57it/s]
Loss: 0.9478: 100%|██████████| 10/10 [00:01<00:00,  6.61it/s]


valid loss : 0.7749637499451637
valid f1 : 0.8433233425083331
성능 개선 안됨 : 0.7749637499451637 > 0.4627957789003849
patience counter : 3
4 epoch


Loss: 0.0001: 100%|██████████| 1178/1178 [07:38<00:00,  2.57it/s]
Loss: 1.2129: 100%|██████████| 10/10 [00:01<00:00,  6.65it/s]


valid loss : 0.5139913640916347
valid f1 : 0.8925830457294944
성능 개선 안됨 : 0.5139913640916347 > 0.4627957789003849
patience counter : 4
5 epoch


Loss: 0.0069: 100%|██████████| 1178/1178 [07:38<00:00,  2.57it/s]
Loss: 0.2967: 100%|██████████| 10/10 [00:01<00:00,  6.63it/s]


valid loss : 0.6273767679929734
valid f1 : 0.8858702837200321
성능 개선 안됨 : 0.6273767679929734 > 0.4627957789003849
patience counter : 5
Early stopping at epoch 5


# TEST

In [11]:
preds_list = []

best_model.eval()

for image, _ in tqdm(tst_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = best_model(image)
        
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
pred_df.to_csv(f"{RESULT_CSV_PATH}/effi_b4.csv", index=False)

100%|██████████| 99/99 [00:15<00:00,  6.45it/s]


In [12]:
torch.save(best_model.state_dict(), '/upstage-cv-classification-cv2/results/effi_b4.pth')