### TODO :
 - patch data
 - output mask size가 original image size 와 다른데 어떻게 같게 ?


In [None]:
import torch
import torch.nn as nn
import torch.functional as F
from torch.utils.tensorboard import SummaryWriter
from torchvision.utils import make_grid
import pathlib

import pandas as pd
from tqdm import tqdm
import numpy as np

# import import_ipynb
from steel_dataset import Steel_dataset
from model import U_net
from util import csv_file_load
import util
from pre_processing import Pre_process_img

from collections import OrderedDict
import math

In [None]:
ROOT_PATH = pathlib.Path('steel_images')
# ROOT_PATH = pathlib.Path('.')
IMG_FILE_PATH = ROOT_PATH / 'train_images'
TRAIN_FILE = ROOT_PATH / 'train.csv'
CK_PATH = ROOT_PATH / 'checkpoints'
LOG_PATH = ROOT_PATH / 'logs'

SPILIT_RATIO = 0.8
n_batch = 4
n_classes = 1

In [None]:
train_pd = csv_file_load(TRAIN_FILE)
# train_pd.ClassId = 1
train_idx = int(len(train_pd) * SPILIT_RATIO)

val_pd = train_pd.iloc[train_idx:, :].reset_index(drop=True)
train_pd = train_pd.iloc[:train_idx, :]

device = torch.device('cuda')

In [None]:
train_dataset = Steel_dataset(IMG_FILE_PATH, train_pd, out_size=(256, 1600))
val_dataset = Steel_dataset(IMG_FILE_PATH, val_pd, out_size=(256, 1600))

In [None]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=n_batch, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=n_batch, shuffle=True)

In [6]:
u_net = U_net(n_classes=n_classes, in_channel= n_classes)
# Initialize weights of U-net model with normal distribution std = sqrt(2/N), N = the number of incomding Nodes of one neuron
# ex) 3x3 convd with 64 channels in previous layer -> N = 9 * 64 = 576 
# for p in u_net.parameters():
#   nn.init.normal_(p, std= math.sqrt(2/p[0].numel())) 

  
# Xavier initialize   
# for p in u_net.parameters():
#   nn.init.xavier_normal_(p)

In [18]:
del u_net

In [9]:
def save_model(model, optim, save_path, epoch, loss):
  torch.save({
        'model' : model,
        'model_state_dict': model.state_dict(),
        'epoch': epoch,
        'loss' : loss,
        'optim_state_dict': optim.state_dict()
    }, save_path)
  print(f'model saved \n {save_path}')

In [7]:
def get_class_weights_by_pixel_frequencies(classId, EncodedPixels, img_size):
  '''
     img_size must be 1 dimension. (H*W)
  '''
  p_counts = np.zeros(len(classId.unique())+1)
  
  # 0 is for background
  # other than 0 is for foreground
  u_classId = list(classId.unique())
  u_classId.append(0)
  
  # counts total pixels of training image dataset
  for c, e in zip(classId, EncodedPixels):
    rlc = np.asarray(e.split(' '))
    cls_pixels = sum(rlc[1::2].astype(int))
    p_counts[c] += cls_pixels
    p_counts[0] += img_size - cls_pixels 
  p_counts /= img_size
    
  return util.get_weights_ratio_over_frequnecies(p_counts)


## Class Weight
 : class weight need to be adding the context(spatial variance) of images

$ w~(c)~ = w~0~ + w~(context)~ $ 

$ Loss = -\sum w(c) * p(x) * log(p(x)) $

In [8]:

class_weight = get_class_weights_by_pixel_frequencies(train_pd.ClassId, train_pd.EncodedPixels, 1600*256)

print(class_weight)

[0.00018669895161934006, 0.1309006771936889, 0.6234398782343988, 0.0039001194268747843, 0.01847140301219158]


In [13]:
criterion = nn.CrossEntropyLoss(weight= class_weight)
optim = torch.optim.SGD(u_net.parameters(), momentum=0.99, lr=0.001)
writer = SummaryWriter(LOG_PATH)

In [11]:
train_len=len(train_dataset)
val_len=len(val_dataset)

In [None]:
n_epochs = 100
total_acc_train = 0.0
total_acc_val = 0.0
total_loss_val = 0.0
total_loss_train = 0.0

for epoch in range(n_epochs):
  total_acc_train = 0.0
  total_acc_val = 0.0
  total_loss_val = 0.0
  total_loss_train = 0.0

  for i, (x_, mask) in enumerate(train_dataloader):
    x_ = x_.cuda()
    mask = mask.cuda().long()
    
    optim.zero_grad()

    out = u_net(x_)
    loss_train = criterion(out, mask)
    loss_train.backward()
    optim.step()

    total_acc_train += (torch.argmax(out, dim=1).squeeze() == mask).sum() / float(out.numel())
    total_loss_train += loss_train

    if i%100 == 0 :
      writer.add_scalars('train',{'accuracy': total_acc_train/i, 'loss' : total_loss_train})
      print(f'epoch:{epoch} train batch : {i/train_len * 100}% ---- \n train_loss:{total_loss_train / i} \
                        train_accuracy:{total_acc_train / i }')

  with torch.no_grad():
    
    for j, (val_x, val_mask) in enumerate(val_dataloader):
      val_x = val_x.cuda()
      val_mask = val_mask.cuda().long()
      out_val = u_net(val_x)
      loss_val = criterion(out_val, val_mask)

      total_acc_val += (torch.argmax(out_val, dim=1).squeeze() == val_mask).sum() / float(out_val.numel())
      total_loss_val += loss_val

      if j%100 == 0:
        print(f'epoch:{epoch} val_batch : {j/val_len * 100}% ---- \n val_loss:{total_loss_val / j} \
                        val_accuracy:{total_acc_val / j}')
      if j == val_len:
        # display output images of each classes
        for k in range(len(n_classes)):
          g = make_grid(out_val[:,k,:,:].squeeze(dim=1))
          writer.add_images(f'val_output class{k+1}', g)
        # display class names of each images
        val_cls = [int(val_mask[l].unique()[1]) for l in range(len(val_mask))]
        m_g = mask_grid(val_mask.unsqueeze(dim=1))
        writer.add_images(f'val_mask class {val_cls})', m_g.squeeze(dim=1), dataformats='HW')
        
  writer.add_scalars('val',{'accuracy': total_acc_val/val_len, 'loss': total_loss_val}) 
  print(f'epoch:{epoch} ended. \n -- total_val_loss:{total_loss_val} total_val_accuracy:{total_acc_val}')        
    
  save_model(u_net, optim, f'u_net_{epoch}e_{int(total_loss_val)}l.pt', epoch, total_loss_val)

In [9]:
i = 3
j = 3
def c(x):
  global i
  y = (x-4)/2
  print(f'x : {x} y:{y}')
  if i != 0:
    i -= 1
    return c(y)
  else:
    return y
  
def d(x):
  global j
  y = (x-4)*2
  print(f'x : {x} y:{y}')
  if j != 0:
    j -= 1
    return d(y)
  else:
    return y
  
d(c(1784))-4

x : 1784 y:890.0
x : 890.0 y:443.0
x : 443.0 y:219.5
x : 219.5 y:107.75
x : 107.75 y:207.5
x : 207.5 y:407.0
x : 407.0 y:806.0
x : 806.0 y:1604.0


1600.0

In [7]:
a = iter(train_dataloader).next()
print(a[0].size())
b = u_net(a[0].float())

torch.Size([4, 1, 624, 1968])
torch.Size([4, 1, 624, 1968])


RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 11222138880 bytes. Buy new RAM!
