In [1]:
import json
import math
import os
import random
import re
import time
import warnings
from glob import glob

import cv2
import numpy as np
import pandas as pd
import timm
import torch
from PIL import Image
from matplotlib import image as mpimg, pyplot as plt
from sklearn.metrics import f1_score, mean_absolute_error
from sklearn.model_selection import KFold, StratifiedKFold
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from tqdm.auto import tqdm

warnings.filterwarnings('ignore')

def fix_everything(random_seed):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    np.random.seed(random_seed)
    random.seed(random_seed)

fix_everything(42)

In [2]:
TARGET_IDX = 0

class LsdDataset(Dataset):
    def __init__(self, imgs, mode='train'):
        self.imgs = imgs
        self.img_contents = []

        print("Opening Images...")
        for img in tqdm(self.imgs):
          self.img_contents.append(Image.open(img).convert("RGB"))
          
        self.mode = mode
          # Try adding more augmentation:
          # WORSE: 
          # - RandomAdjustSharpness(sharpness_factor=2), 
          # - ColorJitter(brightness=0.4, contrast=0.2, saturation=0.3, hue=0.3),

        self.transforms = {
          "train":    transforms.Compose([
                      transforms.RandomHorizontalFlip(),
                      transforms.RandomVerticalFlip(),
                      transforms.ToTensor(),
                    ]),

          "valid":    transforms.Compose([
                      transforms.ToTensor(),
                    ]),
                                    
          "test":     transforms.Compose([
                      transforms.ToTensor(),
                                  ])
            } 
    
    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        # Get Image.
        img_title = self.imgs[idx]
        img = self.img_contents[idx]
        transforms = self.transforms[self.mode]
        img = transforms(img)  

        # Get label.
        matches = re.findall(r'[0-9]+\.?[0-9]*', img_title.split("/")[-1])
      
        label = [float(x) for x in matches]
        label[0] /= 6 # may change.
        label = [round(x, 4) for x in label]

        if len(label) != 3:
          assert(f"Wrong Image Title. Please check again.\n[Detected]: {img_title}")

        return img, torch.HalfTensor(label)

In [3]:
import torch.nn.functional as F

class Network(nn.Module):
    def __init__(self, model_name, p=0.25):
        super(Network, self).__init__()
        self.model = timm.create_model(model_name, pretrained=True, num_classes=3, drop_rate=0.25)
    def forward(self, img):
        final = self.model(img)
        return final

In [4]:
### MAIN SETTINGS ###
import glob


train_idxs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
val_idxs = [20, 21]

imgs = []
val_imgs = []

for idx in train_idxs:
    imgs.extend(glob.glob(f"/workspace/LSD/new_data/LineStyleData_{idx}/*.png"))

for idx in val_idxs:
    val_imgs.extend(glob.glob(f"/workspace/LSD/new_data/LineStyleData_{idx}/*.png"))

# folds = [(train_idx, valid_idx) for train_idx, valid_idx in KFold(n_splits=5, shuffle=True, random_state=42).split(imgs)]
# Train: 1064, Valid: 267
# Original Image size: 1920 x 1080 => (256, 256)

model_name = "regnetv_064"
device = torch.device("cuda:0")
training_epochs = 25
batch_size = 16

In [5]:
# 1 - [sum of (관측치 - 모델 예상치)^2] / [sum of (관측치 - in sample 평균)^2]
import torch
import numpy as np
import pdb

FIRST_ELEM_MAX = 6
FIRST_ELEM_STEP_SIZE = 0.5

SECOND_ELEM_MAX = 1
SECOND_ELEM_STEP_SIZE = 0.1

THIRD_ELEM_MAX = 1
THIRD_ELEM_STEP_SIZE = 0.1

def process_vals(pred):
    pred[pred > 1] = 1
    pred[pred < 0]= 0

    # Since first element minimum is 1:
    pred[:, 0] = np.round(pred[:, 0] * FIRST_ELEM_MAX / FIRST_ELEM_STEP_SIZE ) * FIRST_ELEM_STEP_SIZE
    pred[:, 0][pred[:, 0] < 1] = 1

    pred[:, 1] = np.round(pred[:, 1] * SECOND_ELEM_MAX / SECOND_ELEM_STEP_SIZE) * SECOND_ELEM_STEP_SIZE
    pred[:, 2] = np.round(pred[:, 2] * THIRD_ELEM_MAX / THIRD_ELEM_STEP_SIZE) * THIRD_ELEM_STEP_SIZE
    return pred

def get_rsquared_total(pred, gt):
    # Unnormalize & Round
    pred = process_vals(pred)
    gt = process_vals(gt)

    one = get_rsquared(pred, gt, 0)
    two = get_rsquared(pred, gt, 1)
    three = get_rsquared(pred, gt, 2)
    return {"width": one, "dynamics": two, "jitter": three}

def nrmse(pred, gt):
    pred = process_vals(pred)
    gt = process_vals(gt)

    one = get_nrmse(pred, gt, 0)
    two = get_nrmse(pred, gt, 1)
    three = get_nrmse(pred, gt, 2)
    return {"width": one, "dynamics": two, "jitter": three}

def get_nrmse(pred, gt, idx):
    pred = pred[:, idx]
    gt = gt[:, idx]
    numerator = np.sqrt(np.mean((gt - pred)**2))
    denominator = np.max(gt) - np.min(gt)
    return numerator / denominator

def get_rsquared(pred, gt, idx):
    pred = pred[:, idx]
    gt =  gt[:, idx]
    r_sq = 1 - sum(np.square(gt- pred)) / sum(np.square(gt - np.mean(gt)))
    return r_sq

def weighted_mse_loss(input, target):
    return (torch.HalfTensor([1, 2, 1.5]).to(device) * (input - target) ** 2).mean()

In [6]:
### TRAINING
from tqdm.auto import tqdm
import pdb

for fold, idxs in enumerate(range(1)):
  
  model = nn.DataParallel(Network(model_name), device_ids=[0,1]) # If multi-gpu
  # model = Network(model_name) # else use this.
  
  model = model.to(device)
  scaler = torch.cuda.amp.GradScaler() 
  optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
  criterion = weighted_mse_loss

  best= 1e5

  print(f"Running fold No.{fold+1}")
  
  # train_idx, valid_idx = idxs
  # train_dataset = LsdDataset(np.array(imgs)[train_idx], mode='train')
  train_dataset = LsdDataset(np.array(imgs), mode='train')

  train_loader = DataLoader(train_dataset,
                    batch_size=batch_size, pin_memory=True, num_workers=4, shuffle=True)

  # valid_dataset = LsdDataset(np.array(imgs)[valid_idx], mode='valid')
  valid_dataset = LsdDataset(np.array(val_imgs), mode='valid')
  valid_loader = DataLoader(valid_dataset, batch_size=batch_size, pin_memory=True, num_workers=4, shuffle=True)
  
  for epoch in range(training_epochs):
      
      start=time.time()
      train_loss = 0
      
      model.train()
      optimizer.zero_grad()

      train_errors = []
      eval_errors = []

      train_vals = []
      eval_vals = []

      with tqdm(train_loader, unit="batch") as t_epoch:
        for idx, batch in enumerate(t_epoch):

          x = torch.tensor(batch[0], dtype=torch.float32, device=device) # img
          y = torch.tensor(batch[1], dtype=torch.float32, device=device) # label
          pred = model(x)
          
          loss = criterion(pred.squeeze(1), y)
          loss.backward()

          optimizer.step()
          optimizer.zero_grad()
          
          train_loss += loss.item()/len(train_loader)
          t_epoch.set_postfix(loss=loss.item())

          train_errors.append(abs(pred - y).detach().cpu())
          train_vals.append((pred.detach().cpu(), y.detach().cpu()))

      model.eval()
      valid_loss = 0
      
      with torch.no_grad():
           with tqdm(valid_loader, unit="batch") as v_epoch:
             for batch in v_epoch:
              x = torch.tensor(batch[0], dtype=torch.float32, device=device)
              y = torch.tensor(batch[1], dtype=torch.float32, device=device)
              
              pred = model(x)
              loss = criterion(pred.squeeze(1), y)
              valid_loss += loss.item()/len(valid_loader)
              v_epoch.set_postfix(loss=loss.item())
              eval_errors.append(abs(pred - y).detach().cpu())
              eval_vals.append((pred.detach().cpu(), y.detach().cpu()))
            
      TIME = time.time() - start
      train_score = abs(torch.cat(train_errors, dim=0)).mean(axis=0)
      valid_score = abs(torch.cat(eval_errors, dim=0)).mean(axis=0)

      if  sum(valid_score) <= best:
          best = sum(valid_score)
          # But it's convsmall, in reality.x``
          torch.save(model.state_dict(), f'/workspace/LSD/result/regnet0207.pth')

      print(f'epoch : {epoch+1}/{training_epochs}    time : {TIME:.0f}s/{TIME*(training_epochs-epoch-1):.0f}s')
      print(f'Train Loss: {sum(train_score):.5f}')
      print(f'Valid Loss: {sum(valid_score):.5f} best : {best:.5f}')
      print(f"NRMSE Valid: {nrmse(torch.cat([x[0] for x in eval_vals]).numpy(), torch.cat([x[1] for x in eval_vals]).numpy())}")
      print(f"R-Squared Valid: {get_rsquared_total(torch.cat([x[0] for x in eval_vals]).numpy(), torch.cat([x[1] for x in eval_vals]).numpy())}")

Running fold No.1
Opening Images...


  0%|          | 0/21296 [00:00<?, ?it/s]

Opening Images...


  0%|          | 0/2662 [00:00<?, ?it/s]

  0%|          | 0/1331 [00:03<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 1/25    time : 508s/12189s
Train Loss: 0.30981
Valid Loss: 0.21417 best : 0.21417
NRMSE Valid: {'width': 0.0678089, 'dynamics': 0.13280481, 'jitter': 0.08232169}
R-Squared Valid: {'width': 0.9540195341848234, 'dynamics': 0.8236288520360001, 'jitter': 0.9322314074363734}


  0%|          | 0/1331 [00:01<?, ?batch/s]

  0%|          | 0/167 [00:00<?, ?batch/s]

epoch : 2/25    time : 363s/8352s
Train Loss: 0.17321
Valid Loss: 0.20634 best : 0.20634
NRMSE Valid: {'width': 0.12151992, 'dynamics': 0.09321457, 'jitter': 0.067919604}
R-Squared Valid: {'width': 0.8523290758827948, 'dynamics': 0.9131104443073054, 'jitter': 0.9538692750735287}


  0%|          | 0/1331 [00:00<?, ?batch/s]

  0%|          | 0/167 [00:00<?, ?batch/s]

epoch : 3/25    time : 360s/7915s
Train Loss: 0.14163
Valid Loss: 0.24213 best : 0.20634
NRMSE Valid: {'width': 0.05746328, 'dynamics': 0.1506809, 'jitter': 0.11306468}
R-Squared Valid: {'width': 0.9669797145003757, 'dynamics': 0.7729526661084907, 'jitter': 0.8721637888470511}


  0%|          | 0/1331 [00:00<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 4/25    time : 462s/9696s
Train Loss: 0.13678
Valid Loss: 0.23725 best : 0.20634
NRMSE Valid: {'width': 0.07711457, 'dynamics': 0.12165895, 'jitter': 0.08945112}
R-Squared Valid: {'width': 0.9405334335086402, 'dynamics': 0.8519909874573054, 'jitter': 0.9199849774150234}


  0%|          | 0/1331 [00:01<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 5/25    time : 574s/11477s
Train Loss: 0.12596
Valid Loss: 0.27039 best : 0.20634
NRMSE Valid: {'width': 0.07372762, 'dynamics': 0.151811, 'jitter': 0.09007885}
R-Squared Valid: {'width': 0.9456423741547708, 'dynamics': 0.7695341840171769, 'jitter': 0.9188580046802962}


  0%|          | 0/1331 [00:01<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 6/25    time : 382s/7265s
Train Loss: 0.11582
Valid Loss: 0.24826 best : 0.20634
NRMSE Valid: {'width': 0.11296495, 'dynamics': 0.1335101, 'jitter': 0.060674895}
R-Squared Valid: {'width': 0.872389181066867, 'dynamics': 0.8217505638893918, 'jitter': 0.9631855757732739}


  0%|          | 0/1331 [00:00<?, ?batch/s]

  0%|          | 0/167 [00:00<?, ?batch/s]

epoch : 7/25    time : 365s/6579s
Train Loss: 0.11075
Valid Loss: 0.12336 best : 0.12336
NRMSE Valid: {'width': 0.027885675, 'dynamics': 0.08648317, 'jitter': 0.05444205}
R-Squared Valid: {'width': 0.9922238918106687, 'dynamics': 0.9252066127835562, 'jitter': 0.9703606335035695}


  0%|          | 0/1331 [00:00<?, ?batch/s]

  0%|          | 0/167 [00:00<?, ?batch/s]

epoch : 8/25    time : 362s/6150s
Train Loss: 0.10749
Valid Loss: 0.20767 best : 0.12336
NRMSE Valid: {'width': 0.078753434, 'dynamics': 0.12697724, 'jitter': 0.05970753}
R-Squared Valid: {'width': 0.9379789631855747, 'dynamics': 0.8387678436888639, 'jitter': 0.9643501119997937}


  0%|          | 0/1331 [00:01<?, ?batch/s]

  0%|          | 0/167 [00:00<?, ?batch/s]

epoch : 9/25    time : 361s/5783s
Train Loss: 0.09746
Valid Loss: 0.28430 best : 0.12336
NRMSE Valid: {'width': 0.12550424, 'dynamics': 0.14572547, 'jitter': 0.066154435}
R-Squared Valid: {'width': 0.8424868519909843, 'dynamics': 0.7876408706665534, 'jitter': 0.9562359127116745}


  0%|          | 0/1331 [00:00<?, ?batch/s]

  0%|          | 0/167 [00:00<?, ?batch/s]

epoch : 10/25    time : 361s/5420s
Train Loss: 0.09721
Valid Loss: 0.19657 best : 0.12336
NRMSE Valid: {'width': 0.09185459, 'dynamics': 0.10652986, 'jitter': 0.07092286}
R-Squared Valid: {'width': 0.9156273478587528, 'dynamics': 0.8865139008195559, 'jitter': 0.9496994766397109}


  0%|          | 0/1331 [00:01<?, ?batch/s]

  0%|          | 0/167 [00:00<?, ?batch/s]

epoch : 11/25    time : 361s/5047s
Train Loss: 0.09240
Valid Loss: 0.24822 best : 0.12336
NRMSE Valid: {'width': 0.07908663, 'dynamics': 0.1329179, 'jitter': 0.09812287}
R-Squared Valid: {'width': 0.9374530428249437, 'dynamics': 0.8233283276653205, 'jitter': 0.9037190116874755}


  0%|          | 0/1331 [00:00<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 12/25    time : 476s/6192s
Train Loss: 0.08810
Valid Loss: 0.21823 best : 0.12336
NRMSE Valid: {'width': 0.07100227, 'dynamics': 0.1316685, 'jitter': 0.084727876}
R-Squared Valid: {'width': 0.9495867768595041, 'dynamics': 0.8266341108776623, 'jitter': 0.9282118727552603}


  0%|          | 0/1331 [00:01<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 13/25    time : 570s/6834s
Train Loss: 0.08452
Valid Loss: 0.25845 best : 0.12336
NRMSE Valid: {'width': 0.08635276, 'dynamics': 0.1616965, 'jitter': 0.084905036}
R-Squared Valid: {'width': 0.9254320060105184, 'dynamics': 0.7385424534409761, 'jitter': 0.9279113474574564}


  0%|          | 0/1331 [00:02<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 14/25    time : 595s/6543s
Train Loss: 0.08076
Valid Loss: 0.22887 best : 0.12336
NRMSE Valid: {'width': 0.0814731, 'dynamics': 0.124844, 'jitter': 0.09532656}
R-Squared Valid: {'width': 0.9336213373403456, 'dynamics': 0.844139745759406, 'jitter': 0.9091284760008164}


  0%|          | 0/1331 [00:01<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 15/25    time : 528s/5279s
Train Loss: 0.08014
Valid Loss: 0.29327 best : 0.12336
NRMSE Valid: {'width': 0.116999805, 'dynamics': 0.16442646, 'jitter': 0.1127486}
R-Squared Valid: {'width': 0.8631104432757325, 'dynamics': 0.72963937143223, 'jitter': 0.8728775404452702}


  0%|          | 0/1331 [00:01<?, ?batch/s]

  0%|          | 0/167 [00:01<?, ?batch/s]

epoch : 16/25    time : 598s/5381s
Train Loss: 0.07873
Valid Loss: 0.25219 best : 0.12336
NRMSE Valid: {'width': 0.08307122, 'dynamics': 0.1384686, 'jitter': 0.087733805}
R-Squared Valid: {'width': 0.9309917355371901, 'dynamics': 0.8082644654520466, 'jitter': 0.9230278014208003}


  0%|          | 0/1331 [00:01<?, ?batch/s]

KeyboardInterrupt: 