# Model

### package load

In [1]:
import datetime
import numpy as np
from PIL import Image

import os
import sys
import time
from tqdm import tqdm
import numpy as np
import warnings
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms.functional as TF
import torch.nn.functional as F
from torch.autograd import Variable

from model import convert_bn_to_instancenorm, convert_bn_to_evonorm, convert_bn_to_groupnorm, DeepLabHead 
from helpers import AverageMeter, ProgressMeter, iouCalc, visim, vislbl
from labels import labels

### device

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
#device = 'cpu'

### data load

In [4]:
npzfile = np.load('data_norm_v2.npz')

train_x = npzfile['train_x_jitter']
train_y = npzfile['train_y']
val_x = npzfile['val_x']
val_y = npzfile['val_y']
test_x = npzfile['test_x']
test_y = npzfile['test_y']

npzfile.close()

### DeepLab v3 ResNet50 

In [6]:
model = torchvision.models.segmentation.deeplabv3_resnet50(pretrained=False).to(device)
         # dlv3 backbone resnoet 50 :: output chnnel: 2048 --> DL input channel :: 2048
model.classifier = DeepLabHead(2048, 12).to(device) # 12 = class num

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=2) 

In [7]:
# Initialize metrics
best_miou = 0.0
metrics = {'train_loss' : [],
           'train_acc' : [],
           'test_acc' : [],
           'test_loss' : [],
           'miou' : []}
start_epoch = 0

### Label 

In [8]:
# Create list of class names
  # classLabels: label명
  # validClasses: label에 해당하는 id값

classLabels = []
for label in labels:
    if label.name not in classLabels:
        classLabels.append(label.name)
classLabels.append('void')

validClasses = list(
    np.unique(
        [label.id for label in labels 
         if label.id >= 0
        ] + [11]
    )
)

### train

In [18]:
## parameter

batch_size = 2
num_epoch = 50

result_dir = os.getcwd() + '/result/' + 'jitter/'

In [10]:
from torch.utils.data import TensorDataset, DataLoader

   # X: 0~255, RGB값 가지는 이미지 데이터 (560, 720, 960, 3)
   # Y: 0~11 label값 가지는 픽셀당 클래스 데이터 (560, 720, 960)
X = torch.tensor(train_x, dtype=torch.float32)
Y = torch.tensor(train_y, dtype=torch.long)

data = TensorDataset(X.permute(dims=(0, 3, 1, 2)), Y)
train_data = DataLoader(data, batch_size=batch_size, shuffle=True)

In [11]:
# Create class weight
label_num = {str(_id): 0 for _id in validClasses}
label_num

for y in train_y.flatten():
    label_num[str(y)] += 1
label_num

{'0': 3350644,
 '1': 3682379,
 '2': 1888603,
 '3': 2478108,
 '4': 81471634,
 '5': 4996800,
 '6': 23813698,
 '7': 98899475,
 '8': 16468486,
 '9': 53441175,
 '10': 37943059,
 '11': 10253939}

In [10]:
## (사용 dataset 동일할 때 kernel restart시 실행)
label_num = {'0': 3350644,
 '1': 3682379,
 '2': 1888603,
 '3': 2478108,
 '4': 81471634,
 '5': 4996800,
 '6': 23813698,
 '7': 98899475,
 '8': 16468486,
 '9': 53441175,
 '10': 37943059,
 '11': 10253939}

In [12]:
# delete void -- max value
max_num = max([v for k, v in label_num.items() if k != '11'])
weights = [max_num/num for key, num in label_num.items()]

class_weights = torch.FloatTensor(weights).to(device)
criterion = nn.CrossEntropyLoss(weight = class_weights, ignore_index=12) # weight 파라미터에 class_weight 추가

In [13]:
class_weights

tensor([29.5166, 26.8575, 52.3665, 39.9093,  1.2139, 19.7926,  4.1530,  1.0000,
         6.0054,  1.8506,  2.6065,  9.6450])

In [None]:
model.train()
res = X.shape[1] * X.shape[2] # 720 * 960

for epoch in range(num_epoch):
    
    loss_running = AverageMeter('Loss', ':.4e')
    acc_running = AverageMeter('Accuracy', ':.3f')
    
    iou = iouCalc(classLabels, validClasses, voidClass = 11)
    progress = ProgressMeter(
        len(train_data),
        [loss_running, acc_running],
        prefix="Train, epoch: [{}]".format(epoch))
    
    batch_loss = 0.0
    for batch, (x, y) in enumerate(tqdm(train_data, total=len(train_data))):
        x = x.to(device)
        y = y.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model(x)
        outputs = outputs['out']
        preds = torch.argmax(outputs, 1)
        
        # cross-entropy loss
        loss = criterion(outputs, y)

        # backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        bs = x.size(0)
        loss = loss.item()
        loss_running.update(loss, bs)
        corrects = torch.sum((preds == y) & (y != 12))
        
        nvoid = int((y==12).sum()) 
        acc = corrects.double()/(bs*res-nvoid)
        acc_running.update(acc, bs)
        
        # Calculate IoU scores of current batch
        iou.evaluateBatch(preds, y)
        
        progress.display(epoch)
        
        
    scheduler.step(loss_running.avg)
    miou = iou.outputScores()
    
    # save checkpoint
    now = datetime.datetime.now()
    now_time = now.strftime('%y%m%d%H')
    
    # save path
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    
    save_path = result_dir
    
    with open(save_path + 'train_logs.csv', 'a') as epochs:
            epochs.write('{}, {:.4f}, {:.4f}, {:.4f}\n'.format(
                    epoch+1, loss_running.avg, acc_running.avg, miou))
            
    # Save model to file
    torch.save({
        'epoch' : epoch + 1,
        'model_state_dict' : model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'best_miou': best_miou,
        'metrics': metrics,
        }, save_path + now_time + '_E' + str(epoch+1) + '_checkpoint.pth.tar')
    
    # Save best model to file
    if miou > best_miou:
        print('mIoU improved from {:.4f} to {:.4f}.'.format(best_miou, miou))
        best_miou = miou
        
    print('epoch ', epoch)
    print('loss : {:.4f}   acc : {:.4f}   miou : {:.4f}'.format(loss_running.avg, acc_running.avg, miou))

  0%|▎                                                                               | 1/245 [00:15<1:04:46, 15.93s/it]

Train, epoch: [0][  0/245]	Loss 2.4294e+00 (2.4294e+00)	Accuracy 0.355 (0.355)


  1%|▋                                                                               | 2/245 [00:32<1:05:07, 16.08s/it]

Train, epoch: [0][  0/245]	Loss 2.2819e+00 (2.3557e+00)	Accuracy 0.422 (0.388)


  1%|▉                                                                               | 3/245 [00:48<1:05:05, 16.14s/it]

Train, epoch: [0][  0/245]	Loss 2.2505e+00 (2.3206e+00)	Accuracy 0.535 (0.437)


  2%|█▎                                                                              | 4/245 [01:04<1:05:33, 16.32s/it]

Train, epoch: [0][  0/245]	Loss 2.1018e+00 (2.2659e+00)	Accuracy 0.514 (0.456)


  2%|█▋                                                                              | 5/245 [01:21<1:05:30, 16.38s/it]

Train, epoch: [0][  0/245]	Loss 1.9945e+00 (2.2116e+00)	Accuracy 0.509 (0.467)


  2%|█▉                                                                              | 6/245 [01:37<1:05:14, 16.38s/it]

Train, epoch: [0][  0/245]	Loss 1.9325e+00 (2.1651e+00)	Accuracy 0.534 (0.478)


  3%|██▎                                                                             | 7/245 [01:54<1:04:57, 16.38s/it]

Train, epoch: [0][  0/245]	Loss 2.0186e+00 (2.1442e+00)	Accuracy 0.565 (0.491)


  3%|██▌                                                                             | 8/245 [02:10<1:04:35, 16.35s/it]

Train, epoch: [0][  0/245]	Loss 2.1281e+00 (2.1422e+00)	Accuracy 0.480 (0.489)


  4%|██▉                                                                             | 9/245 [02:26<1:04:28, 16.39s/it]

Train, epoch: [0][  0/245]	Loss 1.9587e+00 (2.1218e+00)	Accuracy 0.517 (0.492)


  4%|███▏                                                                           | 10/245 [02:43<1:03:56, 16.33s/it]

Train, epoch: [0][  0/245]	Loss 1.4433e+00 (2.0539e+00)	Accuracy 0.570 (0.500)


  4%|███▌                                                                           | 11/245 [02:59<1:03:58, 16.40s/it]

Train, epoch: [0][  0/245]	Loss 1.6017e+00 (2.0128e+00)	Accuracy 0.556 (0.505)


  5%|███▊                                                                           | 12/245 [03:16<1:03:59, 16.48s/it]

Train, epoch: [0][  0/245]	Loss 1.6219e+00 (1.9802e+00)	Accuracy 0.533 (0.507)


  5%|████▏                                                                          | 13/245 [03:33<1:04:07, 16.58s/it]

Train, epoch: [0][  0/245]	Loss 1.4292e+00 (1.9379e+00)	Accuracy 0.644 (0.518)


  6%|████▌                                                                          | 14/245 [03:49<1:03:18, 16.45s/it]

Train, epoch: [0][  0/245]	Loss 1.4964e+00 (1.9063e+00)	Accuracy 0.599 (0.524)


  6%|████▊                                                                          | 15/245 [04:05<1:03:01, 16.44s/it]

Train, epoch: [0][  0/245]	Loss 1.5261e+00 (1.8810e+00)	Accuracy 0.643 (0.532)


  7%|█████▏                                                                         | 16/245 [04:22<1:02:42, 16.43s/it]

Train, epoch: [0][  0/245]	Loss 1.5745e+00 (1.8618e+00)	Accuracy 0.570 (0.534)


  7%|█████▍                                                                         | 17/245 [04:38<1:02:46, 16.52s/it]

Train, epoch: [0][  0/245]	Loss 1.5628e+00 (1.8442e+00)	Accuracy 0.546 (0.535)


  7%|█████▊                                                                         | 18/245 [04:55<1:02:28, 16.51s/it]

Train, epoch: [0][  0/245]	Loss 1.5431e+00 (1.8275e+00)	Accuracy 0.650 (0.541)


  8%|██████▏                                                                        | 19/245 [05:12<1:02:25, 16.57s/it]

Train, epoch: [0][  0/245]	Loss 1.2469e+00 (1.7969e+00)	Accuracy 0.723 (0.551)


  8%|██████▍                                                                        | 20/245 [05:28<1:02:32, 16.68s/it]

Train, epoch: [0][  0/245]	Loss 1.7027e+00 (1.7922e+00)	Accuracy 0.684 (0.557)


  9%|██████▊                                                                        | 21/245 [05:45<1:02:08, 16.65s/it]

Train, epoch: [0][  0/245]	Loss 1.3917e+00 (1.7732e+00)	Accuracy 0.653 (0.562)


  9%|███████                                                                        | 22/245 [06:02<1:01:48, 16.63s/it]

Train, epoch: [0][  0/245]	Loss 1.3123e+00 (1.7522e+00)	Accuracy 0.728 (0.570)


  9%|███████▍                                                                       | 23/245 [06:18<1:01:19, 16.57s/it]

Train, epoch: [0][  0/245]	Loss 1.1367e+00 (1.7255e+00)	Accuracy 0.760 (0.578)


 10%|███████▋                                                                       | 24/245 [06:35<1:01:13, 16.62s/it]

Train, epoch: [0][  0/245]	Loss 9.3293e-01 (1.6924e+00)	Accuracy 0.771 (0.586)


 10%|████████                                                                       | 25/245 [06:51<1:00:36, 16.53s/it]

Train, epoch: [0][  0/245]	Loss 1.3039e+00 (1.6769e+00)	Accuracy 0.742 (0.592)


 11%|████████▍                                                                      | 26/245 [07:08<1:00:09, 16.48s/it]

Train, epoch: [0][  0/245]	Loss 1.2101e+00 (1.6589e+00)	Accuracy 0.722 (0.597)


 11%|████████▋                                                                      | 27/245 [07:24<1:00:06, 16.54s/it]

Train, epoch: [0][  0/245]	Loss 1.0066e+00 (1.6348e+00)	Accuracy 0.718 (0.602)


 11%|█████████                                                                      | 28/245 [07:41<1:00:00, 16.59s/it]

Train, epoch: [0][  0/245]	Loss 8.3340e-01 (1.6062e+00)	Accuracy 0.791 (0.608)


 12%|█████████▌                                                                       | 29/245 [07:58<59:58, 16.66s/it]

Train, epoch: [0][  0/245]	Loss 9.9247e-01 (1.5850e+00)	Accuracy 0.765 (0.614)


 12%|█████████▋                                                                     | 30/245 [08:15<1:00:01, 16.75s/it]

Train, epoch: [0][  0/245]	Loss 9.7428e-01 (1.5646e+00)	Accuracy 0.766 (0.619)


 13%|█████████▉                                                                     | 31/245 [08:33<1:01:16, 17.18s/it]

Train, epoch: [0][  0/245]	Loss 9.4128e-01 (1.5445e+00)	Accuracy 0.754 (0.623)


 13%|██████████▎                                                                    | 32/245 [08:50<1:00:33, 17.06s/it]

Train, epoch: [0][  0/245]	Loss 8.5382e-01 (1.5229e+00)	Accuracy 0.757 (0.627)


 13%|██████████▋                                                                    | 33/245 [09:07<1:00:07, 17.02s/it]

Train, epoch: [0][  0/245]	Loss 9.5998e-01 (1.5059e+00)	Accuracy 0.737 (0.631)


 14%|███████████▏                                                                     | 34/245 [09:24<59:58, 17.06s/it]

Train, epoch: [0][  0/245]	Loss 1.0601e+00 (1.4928e+00)	Accuracy 0.720 (0.633)


 14%|███████████▌                                                                     | 35/245 [09:40<59:20, 16.96s/it]

Train, epoch: [0][  0/245]	Loss 8.1564e-01 (1.4734e+00)	Accuracy 0.775 (0.637)


 15%|███████████▉                                                                     | 36/245 [09:57<58:58, 16.93s/it]

Train, epoch: [0][  0/245]	Loss 7.7408e-01 (1.4540e+00)	Accuracy 0.776 (0.641)


 15%|████████████▏                                                                    | 37/245 [10:14<58:33, 16.89s/it]

Train, epoch: [0][  0/245]	Loss 1.1350e+00 (1.4454e+00)	Accuracy 0.704 (0.643)


 16%|████████████▌                                                                    | 38/245 [10:31<58:10, 16.86s/it]

Train, epoch: [0][  0/245]	Loss 7.1167e-01 (1.4261e+00)	Accuracy 0.785 (0.647)


### val

In [None]:
X = torch.tensor(val_x, dtype=torch.float32)
Y = torch.tensor(val_x, dtype=torch.long)

data = torch.utils.data.TensorDataset(X.permute(dims=(0, 3, 1, 2)), Y)

val_data = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)

save_path = result_dir
result = sorted(os.listdir(save_path), reverse=True)

In [None]:
result

In [None]:
for epoch, file in enumerate(result):
    checkpoint = torch.load(save_path + file)
    model.load_state_dict(checkpoint['model_state_dict'], strict=True)
    
    model.eval()
    
    loss_running = AverageMeter('Loss', ':.4e')
    acc_running = AverageMeter('Accuracy', ':.3f')
    
    iou = iouCalc(classLabels, validClasses, voidClass = 11)
    
    batch_loss = 0.0
    
    for batch, (x, y) in enumerate(tqdm(val_data, total=len(val_data))):

        x = x.to(device)
        y = y.to(device)

        # forward
        outputs = model(x)
        outputs = outputs['out']

        preds = torch.argmax(outputs, 1)

        # cross-entropy loss
        loss = criterion(outputs, y)

        # Statistics
        bs = x.size(0)
        loss = loss.item()
        loss_running.update(loss, bs)
        corrects = torch.sum((preds == y) & (y != 12))

        nvoid = int((y==12).sum())
        acc = corrects.double()/(bs*res-nvoid)
        acc_running.update(acc, bs)
    
        # Calculate IoU scores of current batch
        iou.evaluateBatch(preds, y)

    miou = iou.outputScores()
    scheduler.step(loss_running.avg)
    
    with open(save_path + 'val_logs.csv', 'a') as epochs:
            epochs.write('{}, {:.4f}, {:.4f}, {:.4f}\n'.format(
                    epoch+1, loss_running.avg, acc_running.avg, miou))
    
    print('val- epoch: {}'.format(epoch+1))
    print('loss : {:.4f} acc : {:.4f} miou : {:.4f}'.format(loss_running.avg, acc_running.avg, miou)) 

### plot

In [None]:
train_logs = pd.read_csv(save_path + 'train_logs.csv', names = ['epoch', 'loss', 'accuracy', 'miou'])
val_logs = pd.read_csv(save_path + 'val_logs.csv', names = ['epoch', 'loss', 'accuracy', 'miou'])

plt.figure(figsize=(15, 5))
plt.plot(train_logs['epoch'], train_logs['miou'], label = 'train_miou')
plt.plot(train_logs['epoch'], train_logs['accuracy'], label = 'train_accuracy')

plt.plot(val_logs['epoch'], val_logs['miou'], label = 'val_miou')
plt.plot(val_logs['epoch'], val_logs['accuracy'], label = 'val_accuracy')

plt.xticks([i for i in range(1, train_logs.shape[0]+1)])
plt.title("Performances of train, val dataset")
plt.xlabel("epoch")
plt.ylabel("")
plt.legend()
plt.show()

### test

In [16]:
X = torch.tensor(test_x, dtype=torch.float32)
Y = torch.tensor(test_y, dtype=torch.long)

data = torch.utils.data.TensorDataset(X.permute(dims=(0, 3, 1, 2)), Y)

test_data = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)

#save_path = result_dir
#result = sorted(os.listdir(save_path), reverse=True)

In [46]:
result

['0605_best_weights.pth.tar']

In [19]:
checkpoint = torch.load(save_path + result[0]) # train, val 결과보고 결정 (overfitting 시작되기 전)
model.load_state_dict(checkpoint['model_state_dict'], strict=True)

<All keys matched successfully>

In [None]:
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
progress = ProgressMeter(
    len(test_data),
    [batch_time, data_time],
    prefix='Predict: ')
loss_running = AverageMeter('Loss', ':.4e')
acc_running = AverageMeter('Accuracy', ':.3f')
    
iou = iouCalc(classLabels, validClasses, voidClass = 11)

model.eval()

batch_loss = 0.0
for batch, (x, y) in enumerate(tqdm(test_data, total=len(test_data))):

    x = x.to(device)
    y = y.to(device)

    # forward
    outputs = model(x)
    outputs = outputs['out']

    preds = torch.argmax(outputs, 1)

    # cross-entropy loss
    loss = criterion(outputs, y)

    # Statistics
    bs = x.size(0)
    loss = loss.item()
    loss_running.update(loss, bs)
    corrects = torch.sum((preds == y) & (y != 12))

    nvoid = int((y==12).sum())
    acc = corrects.double()/(bs*res-nvoid)
    acc_running.update(acc, bs)

    # Calculate IoU scores of current batch
    iou.evaluateBatch(preds, y)

miou = iou.outputScores()
scheduler.step(loss_running.avg)

print('loss : {:.4f} acc : {:.4f} miou : {:.4f}'.format(loss_running.avg, acc_running.avg, miou))