In [7]:
import os
import gc

import torch
import torch.nn as nn
import torch.optim as ptim
from torch.utils.data import Dataset, DataLoader

from torch import optim
from torch.optim import optimizer

from torchvision import datasets, transforms
import timm

import random
import pandas as pd
import numpy as np

import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn.metrics import f1_score, accuracy_score

from PIL import Image

from tqdm import tqdm
import wandb

def set_random_seed(seed):
    # Python의 기본 랜덤 시드 고정
    random.seed(seed)
    
    # NumPy의 랜덤 시드 고정
    np.random.seed(seed)
    
    # PyTorch의 랜덤 시드 고정
    torch.manual_seed(seed)
    
    # GPU 사용 시, CuDNN의 비결정적 동작 방지
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # 모든 GPU에 대해 시드 고정
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 시드 설정
set_random_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# PATH 설정

In [8]:
PRE_PATH = '/upstage-cv-classification-cv2/data/'

TRAIN_CSV_PATH = PRE_PATH + 'contra_train.csv'
TRAIN_IMAGE_PATH = PRE_PATH + 'train_aug'
BASE_TRAIN_CSV_PATH = PRE_PATH + 'train.csv'

VALID_CSV_PATH = PRE_PATH + 'valid37.csv'
VALID_IMAGE_PATH = PRE_PATH + 'valid'

WANDB_PROJECT_NAME = 'contra_train'

# 하이퍼 파라미터

In [9]:
IMG_SIZE = 224

MODEL_NAME = 'efficientnet_b4'

BATCH_SIZE = 16
EPOCHS = 5



# 데이터

In [10]:
data_transform = A.Compose([
    A.Resize(height = IMG_SIZE, width = IMG_SIZE),
    A.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
    ToTensorV2()
])

class ImageDataset(Dataset):
    def __init__(self, csv, path, transform = None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx) :
        img1_id, img2_id, target = self.df[idx]
        img1 = np.array(Image.open(os.path.join(self.path, img1_id)))
        img2 = np.array(Image.open(os.path.join(self.path, img2_id)))
        if self.transform:
            img1 = self.transform(image = img1)['image']
            img2 = self.transform(image = img2)['image']

        return img1, img2, target
    
class ValidImageDataset(Dataset):
    def __init__(self, csv, path, transform = None):
        self.df = pd.read_csv(csv)
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx) :
        row = self.df.iloc[idx]
        id, target = row['ID'], row['target']
        img = np.array(Image.open(os.path.join(self.path, id)))
        if self.transform:
            img = self.transform(image = img)['image']
    
        return img, target


In [11]:
train_dataset = ImageDataset(
    TRAIN_CSV_PATH,
    TRAIN_IMAGE_PATH,
    transform = data_transform
)

train_loader = DataLoader(
    train_dataset,
    batch_size = BATCH_SIZE,
    shuffle = True,
    num_workers = 0,
    pin_memory = True,
    drop_last = False
)

valid_dataset = ValidImageDataset(
    VALID_CSV_PATH,
    VALID_IMAGE_PATH,
    transform = data_transform
)

valid_loader = DataLoader(
    valid_dataset,
    batch_size = 1,
    shuffle = False,
    num_workers = 0,
    pin_memory= True,
    drop_last = False
)



In [12]:
base_train_df = pd.read_csv(BASE_TRAIN_CSV_PATH)

class_representatives = {3 : [], 7 : []}

rep_class_3_ids = base_train_df[base_train_df['target'] == 3].sample(n=30, random_state=42)['ID']
rep_class_7_ids = base_train_df[base_train_df['target'] == 7].sample(n=30, random_state=42)['ID']

for rep_class_3_id, rep_class_7_id in zip(rep_class_3_ids, rep_class_7_ids):
    rep_class_3_image = data_transform(image = np.array(Image.open(os.path.join(PRE_PATH, 'train', rep_class_3_id))))['image']
    rep_class_7_image = data_transform(image = np.array(Image.open(os.path.join(PRE_PATH, 'train', rep_class_7_id))))['image']

    class_representatives[3].append(rep_class_3_image)
    class_representatives[7].append(rep_class_7_image)

# 모델

In [13]:
class EfficientNetEmbedding(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        
        in_features = self.model.get_classifier().in_features
        self.model.classifier = nn.Linear(in_features, 128)

    def forward(self, x):
        return self.model(x)
    
class ContrastiveLoss(nn.Module):
    def __init__(self, margin = 1.0):
        super().__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = nn.functional.pairwise_distance(output1, output2)
        loss_contrastive = torch.mean((1 - label) * torch.pow(euclidean_distance, 2) +
                                    (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min = 0.0), 2))
        return loss_contrastive
    

In [14]:
torch.cuda.empty_cache()
gc.collect()

0

In [15]:

base_model = timm.create_model(model_name = MODEL_NAME, pretrained=True)

# 모델, 손실 함수, 옵티마이저 초기화
model = EfficientNetEmbedding(base_model).to(device)
criterion = ContrastiveLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)


INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/efficientnet_b4.ra2_in1k)
INFO:timm.models._hub:[timm/efficientnet_b4.ra2_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.


In [16]:

# 학습 루프

wandb.init(project = WANDB_PROJECT_NAME)

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0.0

    pbar = tqdm(train_loader)
    for img1, img2, label in pbar:
        img1, img2, label = img1.to(device), img2.to(device), label.to(device)

        optimizer.zero_grad()
        
        output1 = model(img1)
        output2 = model(img2)
        
        loss = criterion(output1, output2, label)
        loss.backward()
        optimizer.step()

        pbar.set_description(f"Loss : {loss.item():.4f}")

        wandb.log({
            'train_loss_step' : loss.item() 
        })

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch+1}], Loss: {avg_loss:.4f}')

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtjsgh2770[0m ([33mprefer_leee[0m). Use [1m`wandb login --relogin`[0m to force relogin


Loss : 0.2316: 100%|██████████| 250/250 [00:43<00:00,  5.81it/s]


Epoch [1], Loss: 0.2736


Loss : 0.3078: 100%|██████████| 250/250 [00:42<00:00,  5.91it/s]


Epoch [2], Loss: 0.2695


Loss : 0.3530: 100%|██████████| 250/250 [00:42<00:00,  5.88it/s]


Epoch [3], Loss: 0.2667


Loss : 0.2485: 100%|██████████| 250/250 [00:42<00:00,  5.85it/s]


Epoch [4], Loss: 0.2597


Loss : 0.2643: 100%|██████████| 250/250 [00:42<00:00,  5.88it/s]

Epoch [5], Loss: 0.2477





# 모델 평가

In [17]:
def predict_class(model, image, class_representatives, device):
    model.eval()
    image = image.to(device)

    min_distance = float('inf')
    predicted_class = None

    with torch.no_grad():
        image_embedding = model(image)

    for class_label, rep_images in class_representatives.items():
        total_dist = 0
        for rep_image in rep_images:
            rep_image = rep_image.to(device)
            with torch.no_grad():
                rep_embedding = model(rep_image.unsqueeze(0))

            distance = torch.norm(image_embedding - rep_embedding).item()
            total_dist += distance

        mean_dist = total_dist / len(rep_images)

        if mean_dist < min_distance:
            min_distance = mean_dist
            predicted_class = class_label
    
    return predicted_class

In [18]:
result_list = []
valid_pbar = tqdm(valid_loader)
for img, target in valid_pbar:
    pred = predict_class(model, img, class_representatives, device)

    result_list.append({"pred" : pred, "target" : target.item()})

result_df = pd.DataFrame(result_list)



100%|██████████| 32/32 [00:21<00:00,  1.46it/s]


In [19]:
pred_list = result_df['pred'].to_list()
target_list = result_df['target'].to_list()


print(f"f1 score : {f1_score(target_list, pred_list, pos_label=3)}")
print(f"Acc : {accuracy_score(target_list, pred_list)}")

f1 score : 0.631578947368421
Acc : 0.5625
