In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.autograd import Variable
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.dropout import Dropout

import time


from PIL import Image
from tqdm.auto import tqdm

import random
import numpy as np
import os
import cv2

from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import timm

import pandas as pd
import csv

import wandb

In [2]:
CFG = {
    'IMG_WIDTH':540,
    'IMG_HEIGTH':960,
    'EPOCHS':50,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':16,
    'SEED':3
}

In [3]:
wandb.init(
    project="Final Project", 
    entity="aitech4_cv3",
    name='effb0_sam',
    config = {
        "lr" : CFG['LEARNING_RATE'],
        "epoch" : CFG['EPOCHS'],
        "batch_size" : CFG['BATCH_SIZE'],
    }
    )

config = wandb.config

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkdoyoon[0m ([33maitech4_cv3[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
data_file = pd.read_csv('/opt/ml/data/data.csv')
label = data_file['info']
data =  data_file['filepath']
X_train, y_train, X_test, y_test = train_test_split(data, label, test_size=0.2, random_state=CFG['SEED'], stratify= label)

In [7]:
train_transform = A.Compose([A.Resize(CFG['IMG_HEIGTH'],CFG['IMG_WIDTH']),
                             A.ToGray(p=0.5),
                             A.HorizontalFlip(p=0.3),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])


val_transform = A.Compose([
                            A.Resize(CFG['IMG_HEIGTH'],CFG['IMG_WIDTH']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])



test_transform = A.Compose([
                            A.Resize(CFG['IMG_HEIGTH'],CFG['IMG_WIDTH']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [8]:
X_train = X_train.reset_index()
X_test = X_test.reset_index()
y_train = y_train.reset_index()
y_test = y_test.reset_index()

In [10]:
class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, transforms=None):
        self.img_paths = img_paths['filepath']
        self.labels = labels['info']
        self.transforms = transforms

    def __getitem__(self, index):
        img_path = self.img_paths[index]

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
            
        
        if self.labels is not None:
            label = self.labels[index]
            return image, label
        else:
            return image
    
    def __len__(self):
        return len(self.img_paths)

In [11]:
train_dataset = CustomDataset(X_train, X_test, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(y_train, y_test, val_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [13]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=1):
        super(BaseModel, self).__init__()
        self.backbone = timm.create_model(model_name='efficientnet_b0', pretrained=True)
        self.backbone.classifier = nn.Linear(in_features= self.backbone.classifier.in_features, out_features=num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = nn.Sigmoid()(x)
        return x

In [14]:
def calculate_accuracy(y_pred, y):
    top_pred = torch.round(y_pred)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [17]:
def validation(model, criterion, test_loader, device):
    model.eval()
    
    model_preds = []
    true_labels = []
    
    val_loss = []
    val_acc = []
    
    with torch.no_grad():
        for img, label in iter(test_loader):
            img, label = img.float().to(device), label.float().to(device).reshape(-1,1)
            
            model_pred = model(img)

            loss = criterion(model_pred, label)
            
            val_loss.append(loss.item())

            val_acc.append(calculate_accuracy(model_pred, label).item())
            
            true_labels += label.detach().cpu().numpy().tolist()

    return np.mean(val_loss), np.mean(val_acc)

In [18]:
def time_of_epoch(start, end):
    time_per_epoch = end - start
    time_per_epoch_min = int(time_per_epoch / 60)
    time_per_epoch_sec = int(time_per_epoch -(time_per_epoch_min*60))
    return time_per_epoch_min, time_per_epoch_sec

In [19]:
class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                e_w = p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"
                self.state[p]["e_w"] = e_w

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.sub_(self.state[p]["e_w"])  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                    torch.stack([
                        p.grad.norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm

In [20]:
def train(model, optimizer, train_loader, test_loader, scheduler, device):
    model.to(device)

    criterion = nn.BCELoss().to(device)

    best_score = 0
    best_model = None

    for epoch in range(1, CFG["EPOCHS"]+1):
        model.train()
        start_time = time.monotonic()
        train_loss = []
        epoch_acc = 0
        for step,(img, label) in enumerate(train_loader): #tqdm(iter(train_loader)

            img, label = img.float().to(device), label.float().to(device)
            optimizer.zero_grad()
            label = label.view(-1,1)

            model_pred = model(img)

            acc = calculate_accuracy(model_pred, label)

            loss = criterion(model_pred, label)
            loss.backward()
            
            optimizer.first_step(zero_grad=True)
            
            criterion(model(img), label).backward()
            optimizer.second_step(zero_grad=True)
            
            train_loss.append(loss.item())

            if (step + 1) % 5 == 0:
                print(f'Epoch [{epoch}], Step [{step+1}], Train Loss : [{round(loss.item(),4):.5f}] Train acc : [{acc:.5f}]')
            wandb.log({'train_acc':acc})
        end_time = time.monotonic()
        epoch_min, epoch_sec = time_of_epoch(start_time, end_time)
        train_loss_m = np.mean(train_loss)
        epoch_acc += acc.item()
        val_loss, val_acc = validation(model, criterion, test_loader, device)

        print(f'Epoch [{epoch}], Train Loss : [{train_loss_m:.5f}] Val Loss : [{val_loss:.5f}] Val acc : [{val_acc:.5f}],Time : {epoch_min}m {epoch_sec}s')
        
        if scheduler is not None:
            scheduler.step()
         
        
        torch.save(model.state_dict(),"/opt/ml/work_dir/latest.pth")
            
        if best_score < val_acc:
            best_model = model
            best_score = val_acc
            print(f"save_best_pth EPOCH {epoch}")
            torch.save(model.state_dict(),"/opt/ml/work_dir/best.pth")
        
        wandb.log({'val_acc':val_acc})
    wandb.save("/opt/ml/work_dir/*.pth",base_path="/opt/ml/work_dir/")

    return best_model


In [21]:
torch.cuda.empty_cache()
model = BaseModel()
# optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
optimizer = SAM(model.parameters(), torch.optim.SGD , lr=CFG["LEARNING_RATE"], momentum=0.9)
scheduler = None
infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

Epoch [1], Step [5], Train Loss : [0.67190] Train acc : [0.62500]
Epoch [1], Step [10], Train Loss : [0.73890] Train acc : [0.37500]
Epoch [1], Step [15], Train Loss : [0.73860] Train acc : [0.43750]
Epoch [1], Step [20], Train Loss : [0.66490] Train acc : [0.62500]
Epoch [1], Step [25], Train Loss : [0.67020] Train acc : [0.68750]
Epoch [1], Step [30], Train Loss : [0.64730] Train acc : [0.81250]
Epoch [1], Step [35], Train Loss : [0.70190] Train acc : [0.50000]
Epoch [1], Step [40], Train Loss : [0.67670] Train acc : [0.56250]
Epoch [1], Step [45], Train Loss : [0.68820] Train acc : [0.68750]
Epoch [1], Step [50], Train Loss : [0.68740] Train acc : [0.43750]
Epoch [1], Step [55], Train Loss : [0.69100] Train acc : [0.50000]
Epoch [1], Step [60], Train Loss : [0.67740] Train acc : [0.75000]
Epoch [1], Step [65], Train Loss : [0.67390] Train acc : [0.75000]
Epoch [1], Step [70], Train Loss : [0.65040] Train acc : [0.81250]
Epoch [1], Step [75], Train Loss : [0.67320] Train acc : [0.625



Epoch [50], Train Loss : [0.17214] Val Loss : [0.24873] Val acc : [0.88295],Time : 1m 35s


In [20]:
!nvidia-smi

Tue Jan 17 14:51:13 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:00:05.0 Off |                  Off |
| N/A   42C    P0    37W / 250W |  32287MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [369]:
test_img_paths = "테스트 이미지 경로"

In [370]:
class CustomTestDataset(Dataset):
    def __init__(self, img_paths, labels, transforms=None):
        self.img_paths = img_paths
        self.transforms = transforms

    def __getitem__(self, index):
        img_path = self.img_paths

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        return image
    
    def __len__(self):
        return len(self.img_paths)

In [380]:
test_dataset = CustomTestDataset(test_img_paths, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [392]:
def inference(model, img_path, device):
    model = BaseModel()
    model.load_state_dict(torch.load("/opt/ml/model_save_dir/best.pt", map_location=device))
    model = model.to(device)
    
    model_preds = []
    img_paths = []
    img_paths.append(img_path)
    
    model.eval()
    with torch.no_grad():
        for img in iter(test_loader):
            img = img.float().to(device)
            model_pred = model(img)
            model_preds.append(torch.round(model_pred).detach().cpu().numpy())
    
    print('Done.')
    return model_preds[0][0]

In [393]:
preds = inference(infer_model, test_img_paths, device)
print(preds)

Done.
[1.]
