In [1]:
# add path where .py files can be imported
import sys
sys.path.insert(0, './IndustrialSmokePlumeDetection/segmentation')

In [2]:
# u-net training process
import numpy as np
import random
import torch
from torch import nn, optim
from tqdm.autonotebook import tqdm
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, random_split, SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter
import argparse
from sklearn.metrics import jaccard_score

from network_ori import *
from data import create_dataset

import warnings
warnings.filterwarnings('ignore')

#initialize model and device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = R2AttU_Net(img_ch=12, output_ch=1)
model.to(device)

print('running on...', device)


#default parameters
md = 'r2attunet'
ep = 300 # Number of epochs
bs = 60 # Batch size
lr = 0.5 # Learning rate
mo = 0.7 # Momentum


# setup tensorboard writer
writer = SummaryWriter('../runs/'+"{}_ep{:0d}_lr{:.0e}_bs{:03d}_mo{:.1f}/".format(
    md, ep, lr, bs, mo))

# initialize loss function
loss = nn.BCEWithLogitsLoss()

# initialize optimizer
opt = optim.SGD(model.parameters(), lr=lr, momentum=mo)

# initialize scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, 'min',
                                                 factor=0.5, threshold=1e-4,
                                                 min_lr=1e-6)



# prepare training and validation data loaders
data_all = create_dataset(
    datadir = '../data', # /path/to/image/data/
    seglabeldir = '../labels', # /path/to/segmentation/labels/for/training/
    mult=1)

len_all = len(data_all)
split_1 = (15*len_all) // 100
split_2 = 2*split_1
indices = list(range(len_all))
random.seed(9001)
random.shuffle(indices)

train_sampler = SubsetRandomSampler(indices[split_2:])
val_sampler = SubsetRandomSampler(indices[split_1:split_2])
test_sampler = SubsetRandomSampler(indices[:split_1])

train_dl = DataLoader(data_all, batch_size=bs, num_workers=6, 
                      pin_memory=True, sampler=train_sampler)
val_dl = DataLoader(data_all, batch_size=bs, num_workers=6, 
                      pin_memory=True, sampler=val_sampler)


# run training
for epoch in range(ep):

    model.train()

    train_loss_total = 0
    train_ious = []
    train_acc_total = 0
    train_arearatios = []
    progress = tqdm(enumerate(train_dl), desc="Train Loss: ",
                    total=len(train_dl))
    for i, batch in progress:
        x = batch['img'].float().to(device)
        y = batch['fpt'].float().to(device)

        output = model(x)

        # derive binary segmentation map from prediction
        output_binary = np.zeros(output.shape)
        output_binary[output.cpu().detach().numpy() >= 0] = 1

        # derive IoU values
        ious = []
        for j in range(y.shape[0]):
            z = jaccard_score(y[j].flatten().cpu().detach().numpy(),
                        output_binary[j][0].flatten())
            if (np.sum(output_binary[j][0]) != 0 and
                np.sum(y[j].cpu().detach().numpy()) != 0):
                train_ious.append(z)

        # derive scalar binary labels on a per-image basis
        y_bin = np.array(np.sum(y.cpu().detach().numpy(),
                                axis=(1,2)) != 0).astype(int)
        pred_bin = np.array(np.sum(output_binary,
                                    axis=(1,2,3)) != 0).astype(int)

        # derive image-wise accuracy for this batch
        train_acc_total += accuracy_score(y_bin, pred_bin)

        # derive loss
        loss_epoch = loss(output, y.unsqueeze(dim=1))
        train_loss_total += loss_epoch.item()
        progress.set_description("Train Loss: {:.4f}".format(
            train_loss_total/(i+1)))

        # derive smoke areas
        area_pred = np.sum(output_binary, axis=(1,2,3))
        area_true = np.sum(y.cpu().detach().numpy(), axis=(1,2))

        # derive smoke area ratios
        arearatios = []
        for k in range(len(area_pred)):
            if area_pred[k] == 0 and area_true[k] == 0:
                arearatios.append(1)
            elif area_true[k] == 0:
                arearatios.append(0)
            else:
                arearatios.append(area_pred[k]/area_true[k])
        train_arearatios = np.ravel([*train_arearatios, *arearatios])

        # learning
        opt.zero_grad()
        loss_epoch.backward()
        opt.step()

    # logging
    writer.add_scalar("training loss", train_loss_total/(i+1), epoch)
    writer.add_scalar("training iou", np.average(train_ious), epoch)
    writer.add_scalar("training acc", train_acc_total/(i+1), epoch)
    writer.add_scalar('training arearatio mean',
                        np.average(train_arearatios), epoch)
    writer.add_scalar('training arearatio std',
                        np.std(train_arearatios), epoch)
    writer.add_scalar('learning_rate', opt.param_groups[0]['lr'], epoch)

    torch.cuda.empty_cache()

    # evaluation
    model.eval()
    val_loss_total = 0
    val_ious = []
    val_acc_total = 0
    val_arearatios = []
    progress = tqdm(enumerate(val_dl), desc="val Loss: ",
                    total=len(val_dl))
    for j, batch in progress:
        x = batch['img'].float().to(device)
        y = batch['fpt'].float().to(device)

        output = model(x)

        # derive loss
        loss_epoch = loss(output, y.unsqueeze(dim=1))
        val_loss_total += loss_epoch.item()

        # derive binary segmentation map from prediction
        output_binary = np.zeros(output.shape)
        output_binary[output.cpu().detach().numpy() >= 0] = 1

        # derive IoU values
        ious = []
        for k in range(y.shape[0]):
            z = jaccard_score(y[k].flatten().cpu().detach().numpy(),
                        output_binary[k][0].flatten())
            if (np.sum(output_binary[k][0]) != 0 and 
                np.sum(y[k].cpu().detach().numpy()) != 0):
                val_ious.append(z)

        # derive scalar binary labels on a per-image basis
        y_bin = np.array(np.sum(y.cpu().detach().numpy(),
                                axis=(1,2)) != 0).astype(int)
        pred_bin = np.array(np.sum(output_binary,
                                    axis=(1,2,3)) != 0).astype(int)

        # derive image-wise accuracy for this batch
        val_acc_total += accuracy_score(y_bin, pred_bin)

        # derive smoke areas
        area_pred = np.sum(output_binary, axis=(1,2,3))
        area_true = np.sum(y.cpu().detach().numpy(), axis=(1,2))

        # derive smoke area ratios
        arearatios = []
        for k in range(len(area_pred)):
            if area_pred[k] == 0 and area_true[k] == 0:
                arearatios.append(1)
            elif area_true[k] == 0:
                arearatios.append(0)
            else:
                arearatios.append(area_pred[k]/area_true[k])
        val_arearatios = np.ravel([*val_arearatios, *arearatios])
        
        progress.set_description("val Loss: {:.4f}".format(
            val_loss_total/(j+1)))

    # logging
    writer.add_scalar("val loss", val_loss_total/(j+1), epoch)
    writer.add_scalar("val iou", np.average(val_ious), epoch)
    writer.add_scalar("val acc", val_acc_total/(j+1), epoch)
    writer.add_scalar('val arearatio mean',
                        np.average(val_arearatios), epoch)
    writer.add_scalar('val arearatio std',
                        np.std(val_arearatios), epoch)
    
    print(("Epoch {:d}: train loss={:.3f}, val loss={:.3f}, "
            "train iou={:.3f}, val iou={:.3f}, "
            "train acc={:.3f}, val acc={:.3f}").format(
                epoch+1, train_loss_total/(i+1), val_loss_total/(j+1),
                np.average(train_ious), np.average(val_ious),
                train_acc_total/(i+1), val_acc_total/(j+1)))
    
    # save model checkpoint
    if epoch % 1 == 0:
        torch.save(model.state_dict(),
        '../models/{}_ep{:0d}_lr{:.0e}_bs{:02d}_mo{:.1f}_{:03d}.model'.format(
            md, ep, lr, bs, mo, epoch))

    writer.flush()
    scheduler.step(val_loss_total/(j+1))
    torch.cuda.empty_cache()


writer.close()

running on... cuda:0


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 1: train loss=0.119, val loss=0.079, train iou=0.201, val iou=0.279, train acc=0.643, val acc=0.685


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2: train loss=0.067, val loss=0.076, train iou=0.307, val iou=0.238, train acc=0.726, val acc=0.726


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 3: train loss=0.064, val loss=0.074, train iou=0.339, val iou=0.348, train acc=0.708, val acc=0.686


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 4: train loss=0.059, val loss=0.067, train iou=0.373, val iou=0.350, train acc=0.749, val acc=0.725


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 5: train loss=0.057, val loss=0.064, train iou=0.372, val iou=0.379, train acc=0.755, val acc=0.769


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 6: train loss=0.057, val loss=0.065, train iou=0.379, val iou=0.370, train acc=0.747, val acc=0.760


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 7: train loss=0.055, val loss=0.081, train iou=0.404, val iou=0.358, train acc=0.774, val acc=0.719


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 8: train loss=0.051, val loss=0.124, train iou=0.426, val iou=0.284, train acc=0.797, val acc=0.681


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 9: train loss=0.051, val loss=0.068, train iou=0.442, val iou=0.408, train acc=0.779, val acc=0.781


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 10: train loss=0.049, val loss=0.066, train iou=0.440, val iou=0.417, train acc=0.788, val acc=0.799


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 11: train loss=0.050, val loss=0.060, train iou=0.437, val iou=0.380, train acc=0.789, val acc=0.777


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 12: train loss=0.048, val loss=0.072, train iou=0.449, val iou=0.418, train acc=0.795, val acc=0.816


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 13: train loss=0.046, val loss=0.063, train iou=0.476, val iou=0.362, train acc=0.779, val acc=0.767


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 14: train loss=0.049, val loss=0.070, train iou=0.455, val iou=0.455, train acc=0.795, val acc=0.759


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 15: train loss=0.045, val loss=0.088, train iou=0.476, val iou=0.415, train acc=0.810, val acc=0.832


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 16: train loss=0.044, val loss=0.065, train iou=0.487, val iou=0.428, train acc=0.815, val acc=0.833


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 17: train loss=0.045, val loss=0.055, train iou=0.492, val iou=0.485, train acc=0.815, val acc=0.817


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 18: train loss=0.041, val loss=0.054, train iou=0.508, val iou=0.502, train acc=0.818, val acc=0.832


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 19: train loss=0.043, val loss=0.106, train iou=0.503, val iou=0.428, train acc=0.815, val acc=0.732


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 20: train loss=0.045, val loss=0.077, train iou=0.468, val iou=0.501, train acc=0.805, val acc=0.824


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 21: train loss=0.042, val loss=0.093, train iou=0.493, val iou=0.451, train acc=0.817, val acc=0.744


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 22: train loss=0.047, val loss=0.047, train iou=0.467, val iou=0.460, train acc=0.790, val acc=0.791


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 23: train loss=0.042, val loss=0.055, train iou=0.500, val iou=0.424, train acc=0.806, val acc=0.768


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 24: train loss=0.043, val loss=0.060, train iou=0.511, val iou=0.502, train acc=0.821, val acc=0.820


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 25: train loss=0.042, val loss=0.056, train iou=0.502, val iou=0.474, train acc=0.818, val acc=0.795


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 26: train loss=0.037, val loss=0.046, train iou=0.527, val iou=0.546, train acc=0.834, val acc=0.827


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 27: train loss=0.041, val loss=0.051, train iou=0.519, val iou=0.538, train acc=0.838, val acc=0.843


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 28: train loss=0.040, val loss=0.048, train iou=0.520, val iou=0.517, train acc=0.829, val acc=0.766


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 29: train loss=0.039, val loss=0.076, train iou=0.544, val iou=0.357, train acc=0.848, val acc=0.691


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 30: train loss=0.039, val loss=0.054, train iou=0.521, val iou=0.479, train acc=0.836, val acc=0.783


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 31: train loss=0.036, val loss=0.050, train iou=0.540, val iou=0.489, train acc=0.853, val acc=0.808


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 32: train loss=0.035, val loss=0.057, train iou=0.543, val iou=0.508, train acc=0.849, val acc=0.792


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 33: train loss=0.036, val loss=0.085, train iou=0.541, val iou=0.543, train acc=0.833, val acc=0.840


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 34: train loss=0.037, val loss=0.078, train iou=0.549, val iou=0.473, train acc=0.842, val acc=0.815


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 35: train loss=0.045, val loss=0.166, train iou=0.520, val iou=0.099, train acc=0.836, val acc=0.583


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 36: train loss=0.048, val loss=0.073, train iou=0.494, val iou=0.398, train acc=0.797, val acc=0.756


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 37: train loss=0.040, val loss=0.067, train iou=0.541, val iou=0.347, train acc=0.838, val acc=0.759


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 38: train loss=0.037, val loss=0.060, train iou=0.536, val iou=0.457, train acc=0.848, val acc=0.823


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 39: train loss=0.034, val loss=0.051, train iou=0.555, val iou=0.500, train acc=0.870, val acc=0.821


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 40: train loss=0.034, val loss=0.050, train iou=0.574, val iou=0.495, train acc=0.871, val acc=0.853


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 41: train loss=0.033, val loss=0.057, train iou=0.564, val iou=0.448, train acc=0.876, val acc=0.833


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 42: train loss=0.032, val loss=0.055, train iou=0.570, val iou=0.449, train acc=0.887, val acc=0.843


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 43: train loss=0.034, val loss=0.043, train iou=0.560, val iou=0.493, train acc=0.883, val acc=0.874


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 44: train loss=0.031, val loss=0.056, train iou=0.572, val iou=0.481, train acc=0.891, val acc=0.866


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 45: train loss=0.030, val loss=0.051, train iou=0.579, val iou=0.461, train acc=0.885, val acc=0.842


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 46: train loss=0.029, val loss=0.059, train iou=0.582, val iou=0.501, train acc=0.891, val acc=0.835


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 47: train loss=0.030, val loss=0.060, train iou=0.582, val iou=0.445, train acc=0.894, val acc=0.801


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 48: train loss=0.030, val loss=0.059, train iou=0.571, val iou=0.498, train acc=0.901, val acc=0.883


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 49: train loss=0.028, val loss=0.059, train iou=0.592, val iou=0.434, train acc=0.895, val acc=0.848


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 50: train loss=0.029, val loss=0.049, train iou=0.587, val iou=0.494, train acc=0.899, val acc=0.872


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 51: train loss=0.031, val loss=0.082, train iou=0.584, val iou=0.245, train acc=0.889, val acc=0.754


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 52: train loss=0.030, val loss=0.050, train iou=0.572, val iou=0.455, train acc=0.900, val acc=0.807


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 53: train loss=0.029, val loss=0.058, train iou=0.581, val iou=0.440, train acc=0.904, val acc=0.796


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 54: train loss=0.031, val loss=0.090, train iou=0.585, val iou=0.399, train acc=0.902, val acc=0.901


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 55: train loss=0.031, val loss=0.071, train iou=0.593, val iou=0.498, train acc=0.882, val acc=0.833


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 56: train loss=0.028, val loss=0.053, train iou=0.609, val iou=0.502, train acc=0.904, val acc=0.853


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 57: train loss=0.026, val loss=0.058, train iou=0.598, val iou=0.470, train acc=0.909, val acc=0.855


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 58: train loss=0.026, val loss=0.052, train iou=0.605, val iou=0.453, train acc=0.909, val acc=0.871


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 59: train loss=0.027, val loss=0.047, train iou=0.604, val iou=0.478, train acc=0.913, val acc=0.892


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 60: train loss=0.026, val loss=0.058, train iou=0.611, val iou=0.428, train acc=0.912, val acc=0.845


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 61: train loss=0.028, val loss=0.054, train iou=0.606, val iou=0.485, train acc=0.917, val acc=0.844


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 62: train loss=0.025, val loss=0.056, train iou=0.609, val iou=0.486, train acc=0.913, val acc=0.850


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 63: train loss=0.026, val loss=0.083, train iou=0.606, val iou=0.404, train acc=0.916, val acc=0.799


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 64: train loss=0.027, val loss=0.065, train iou=0.602, val iou=0.422, train acc=0.924, val acc=0.827


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 65: train loss=0.027, val loss=0.063, train iou=0.597, val iou=0.411, train acc=0.921, val acc=0.864


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 66: train loss=0.025, val loss=0.064, train iou=0.619, val iou=0.470, train acc=0.926, val acc=0.874


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 67: train loss=0.025, val loss=0.074, train iou=0.616, val iou=0.418, train acc=0.922, val acc=0.862


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 68: train loss=0.025, val loss=0.052, train iou=0.615, val iou=0.482, train acc=0.923, val acc=0.887


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 69: train loss=0.025, val loss=0.060, train iou=0.611, val iou=0.462, train acc=0.925, val acc=0.834


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 70: train loss=0.025, val loss=0.063, train iou=0.617, val iou=0.423, train acc=0.928, val acc=0.876


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 71: train loss=0.024, val loss=0.106, train iou=0.625, val iou=0.475, train acc=0.921, val acc=0.851


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 72: train loss=0.025, val loss=0.046, train iou=0.612, val iou=0.473, train acc=0.929, val acc=0.910


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 73: train loss=0.024, val loss=0.054, train iou=0.618, val iou=0.479, train acc=0.917, val acc=0.880


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 74: train loss=0.024, val loss=0.094, train iou=0.610, val iou=0.444, train acc=0.932, val acc=0.869


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 75: train loss=0.023, val loss=0.118, train iou=0.621, val iou=0.447, train acc=0.927, val acc=0.873


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 76: train loss=0.023, val loss=0.053, train iou=0.626, val iou=0.459, train acc=0.931, val acc=0.892


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 77: train loss=0.023, val loss=0.074, train iou=0.619, val iou=0.441, train acc=0.929, val acc=0.864


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 78: train loss=0.023, val loss=0.072, train iou=0.624, val iou=0.444, train acc=0.937, val acc=0.873


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 79: train loss=0.023, val loss=0.144, train iou=0.628, val iou=0.443, train acc=0.931, val acc=0.851


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 80: train loss=0.023, val loss=0.072, train iou=0.626, val iou=0.443, train acc=0.930, val acc=0.877


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 81: train loss=0.023, val loss=0.066, train iou=0.633, val iou=0.442, train acc=0.934, val acc=0.864


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 82: train loss=0.023, val loss=0.074, train iou=0.627, val iou=0.459, train acc=0.928, val acc=0.850


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 83: train loss=0.022, val loss=0.061, train iou=0.632, val iou=0.450, train acc=0.933, val acc=0.870


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 84: train loss=0.023, val loss=0.111, train iou=0.627, val iou=0.419, train acc=0.932, val acc=0.867


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 85: train loss=0.023, val loss=0.057, train iou=0.636, val iou=0.448, train acc=0.932, val acc=0.859


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 86: train loss=0.023, val loss=0.064, train iou=0.633, val iou=0.444, train acc=0.932, val acc=0.841


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 87: train loss=0.023, val loss=0.077, train iou=0.630, val iou=0.452, train acc=0.933, val acc=0.831


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 88: train loss=0.023, val loss=0.054, train iou=0.628, val iou=0.457, train acc=0.930, val acc=0.845


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 89: train loss=0.023, val loss=0.148, train iou=0.628, val iou=0.461, train acc=0.935, val acc=0.816


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 90: train loss=0.023, val loss=0.057, train iou=0.629, val iou=0.440, train acc=0.935, val acc=0.890


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 91: train loss=0.023, val loss=0.057, train iou=0.632, val iou=0.451, train acc=0.934, val acc=0.851


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 92: train loss=0.023, val loss=0.065, train iou=0.629, val iou=0.439, train acc=0.933, val acc=0.870


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 93: train loss=0.023, val loss=0.066, train iou=0.629, val iou=0.442, train acc=0.926, val acc=0.841


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 94: train loss=0.023, val loss=0.069, train iou=0.628, val iou=0.454, train acc=0.938, val acc=0.867


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 95: train loss=0.023, val loss=0.061, train iou=0.632, val iou=0.421, train acc=0.926, val acc=0.853


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 96: train loss=0.022, val loss=0.071, train iou=0.631, val iou=0.444, train acc=0.932, val acc=0.853


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 97: train loss=0.022, val loss=0.099, train iou=0.631, val iou=0.451, train acc=0.935, val acc=0.857


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 98: train loss=0.023, val loss=0.064, train iou=0.630, val iou=0.441, train acc=0.937, val acc=0.859


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 99: train loss=0.022, val loss=0.061, train iou=0.636, val iou=0.457, train acc=0.931, val acc=0.852


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 100: train loss=0.022, val loss=0.078, train iou=0.636, val iou=0.450, train acc=0.938, val acc=0.854


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 101: train loss=0.022, val loss=0.058, train iou=0.628, val iou=0.450, train acc=0.947, val acc=0.869


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 102: train loss=0.022, val loss=0.107, train iou=0.631, val iou=0.432, train acc=0.933, val acc=0.885


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 103: train loss=0.022, val loss=0.059, train iou=0.635, val iou=0.440, train acc=0.935, val acc=0.869


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 104: train loss=0.023, val loss=0.074, train iou=0.633, val iou=0.436, train acc=0.939, val acc=0.854


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 105: train loss=0.022, val loss=0.056, train iou=0.635, val iou=0.456, train acc=0.928, val acc=0.872


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 107: train loss=0.023, val loss=0.075, train iou=0.628, val iou=0.459, train acc=0.941, val acc=0.849


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 108: train loss=0.022, val loss=0.085, train iou=0.638, val iou=0.419, train acc=0.934, val acc=0.847


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 109: train loss=0.023, val loss=0.082, train iou=0.632, val iou=0.422, train acc=0.938, val acc=0.831


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

val Loss:   0%|          | 0/8 [00:00<?, ?it/s]

Epoch 110: train loss=0.023, val loss=0.071, train iou=0.630, val iou=0.442, train acc=0.944, val acc=0.869


Train Loss:   0%|          | 0/34 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
#deeplabv3+ resnet101
import sys
sys.path.insert(0,'./IndustrialSmokePlumeDetection/segmentation')
sys.path.insert(0,'./pytorch-deeplab-xception')

import numpy as np
import random
import torch
from torch import nn, optim
from tqdm.autonotebook import tqdm
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, random_split, SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter
import argparse
from sklearn.metrics import jaccard_score

from modeling.deeplab import *
from data import create_dataset

import warnings
warnings.filterwarnings('ignore')



#initialize model and device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = DeepLab(num_classes=1, backbone='resnet')
model.to(device)

print('running on...', device)


#default parameters
md = 'dlabv3res'
ep = 300 # Number of epochs
bs = 100 # Batch size
lr = 0.1 # Learning rate
mo = 0.7 # Momentum


# setup tensorboard writer
writer = SummaryWriter('../runs/'+"{}_ep{:0d}_lr{:.0e}_bs{:03d}_mo{:.1f}/".format(
    md, ep, lr, bs, mo))

# initialize loss function
loss = nn.BCEWithLogitsLoss()

# initialize optimizer
opt = optim.SGD(model.parameters(), lr=lr, momentum=mo)

# initialize scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, 'min',
                                                 factor=0.5, threshold=1e-4,
                                                 min_lr=1e-6)



# prepare training and validation data loaders
data_all = create_dataset(
    datadir = '../data', # /path/to/image/data/
    seglabeldir = '../labels', # /path/to/segmentation/labels/for/training/
    mult=1)

len_all = len(data_all)
split_1 = (15*len_all) // 100
split_2 = 2*split_1
indices = list(range(len_all))
random.seed(9001)
random.shuffle(indices)

train_sampler = SubsetRandomSampler(indices[split_2:])
val_sampler = SubsetRandomSampler(indices[split_1:split_2])
test_sampler = SubsetRandomSampler(indices[:split_1])

train_dl = DataLoader(data_all, batch_size=bs, num_workers=6, 
                      pin_memory=True, sampler=train_sampler)
val_dl = DataLoader(data_all, batch_size=bs, num_workers=6, 
                      pin_memory=True, sampler=val_sampler)


# run training
for epoch in range(ep):

    model.train()

    train_loss_total = 0
    train_ious = []
    train_acc_total = 0
    train_arearatios = []
    progress = tqdm(enumerate(train_dl), desc="Train Loss: ",
                    total=len(train_dl))
    for i, batch in progress:
        x = batch['img'].float().to(device)
        y = batch['fpt'].float().to(device)

        output = model(x)

        # derive binary segmentation map from prediction
        output_binary = np.zeros(output.shape)
        output_binary[output.cpu().detach().numpy() >= 0] = 1

        # derive IoU values
        ious = []
        for j in range(y.shape[0]):
            z = jaccard_score(y[j].flatten().cpu().detach().numpy(),
                        output_binary[j][0].flatten())
            if (np.sum(output_binary[j][0]) != 0 and
                np.sum(y[j].cpu().detach().numpy()) != 0):
                train_ious.append(z)

        # derive scalar binary labels on a per-image basis
        y_bin = np.array(np.sum(y.cpu().detach().numpy(),
                                axis=(1,2)) != 0).astype(int)
        pred_bin = np.array(np.sum(output_binary,
                                    axis=(1,2,3)) != 0).astype(int)

        # derive image-wise accuracy for this batch
        train_acc_total += accuracy_score(y_bin, pred_bin)

        # derive loss
        loss_epoch = loss(output, y.unsqueeze(dim=1))
        train_loss_total += loss_epoch.item()
        progress.set_description("Train Loss: {:.4f}".format(
            train_loss_total/(i+1)))

        # derive smoke areas
        area_pred = np.sum(output_binary, axis=(1,2,3))
        area_true = np.sum(y.cpu().detach().numpy(), axis=(1,2))

        # derive smoke area ratios
        arearatios = []
        for k in range(len(area_pred)):
            if area_pred[k] == 0 and area_true[k] == 0:
                arearatios.append(1)
            elif area_true[k] == 0:
                arearatios.append(0)
            else:
                arearatios.append(area_pred[k]/area_true[k])
        train_arearatios = np.ravel([*train_arearatios, *arearatios])

        # learning
        opt.zero_grad()
        loss_epoch.backward()
        opt.step()

    # logging
    writer.add_scalar("training loss", train_loss_total/(i+1), epoch)
    writer.add_scalar("training iou", np.average(train_ious), epoch)
    writer.add_scalar("training acc", train_acc_total/(i+1), epoch)
    writer.add_scalar('training arearatio mean',
                        np.average(train_arearatios), epoch)
    writer.add_scalar('training arearatio std',
                        np.std(train_arearatios), epoch)
    writer.add_scalar('learning_rate', opt.param_groups[0]['lr'], epoch)

    torch.cuda.empty_cache()

    # evaluation
    model.eval()
    val_loss_total = 0
    val_ious = []
    val_acc_total = 0
    val_arearatios = []
    progress = tqdm(enumerate(val_dl), desc="val Loss: ",
                    total=len(val_dl))
    for j, batch in progress:
        x = batch['img'].float().to(device)
        y = batch['fpt'].float().to(device)

        output = model(x)

        # derive loss
        loss_epoch = loss(output, y.unsqueeze(dim=1))
        val_loss_total += loss_epoch.item()

        # derive binary segmentation map from prediction
        output_binary = np.zeros(output.shape)
        output_binary[output.cpu().detach().numpy() >= 0] = 1

        # derive IoU values
        ious = []
        for k in range(y.shape[0]):
            z = jaccard_score(y[k].flatten().cpu().detach().numpy(),
                        output_binary[k][0].flatten())
            if (np.sum(output_binary[k][0]) != 0 and 
                np.sum(y[k].cpu().detach().numpy()) != 0):
                val_ious.append(z)

        # derive scalar binary labels on a per-image basis
        y_bin = np.array(np.sum(y.cpu().detach().numpy(),
                                axis=(1,2)) != 0).astype(int)
        pred_bin = np.array(np.sum(output_binary,
                                    axis=(1,2,3)) != 0).astype(int)

        # derive image-wise accuracy for this batch
        val_acc_total += accuracy_score(y_bin, pred_bin)

        # derive smoke areas
        area_pred = np.sum(output_binary, axis=(1,2,3))
        area_true = np.sum(y.cpu().detach().numpy(), axis=(1,2))

        # derive smoke area ratios
        arearatios = []
        for k in range(len(area_pred)):
            if area_pred[k] == 0 and area_true[k] == 0:
                arearatios.append(1)
            elif area_true[k] == 0:
                arearatios.append(0)
            else:
                arearatios.append(area_pred[k]/area_true[k])
        val_arearatios = np.ravel([*val_arearatios, *arearatios])
        
        progress.set_description("val Loss: {:.4f}".format(
            val_loss_total/(j+1)))

    # logging
    writer.add_scalar("val loss", val_loss_total/(j+1), epoch)
    writer.add_scalar("val iou", np.average(val_ious), epoch)
    writer.add_scalar("val acc", val_acc_total/(j+1), epoch)
    writer.add_scalar('val arearatio mean',
                        np.average(val_arearatios), epoch)
    writer.add_scalar('val arearatio std',
                        np.std(val_arearatios), epoch)
    
    print(("Epoch {:d}: train loss={:.3f}, val loss={:.3f}, "
            "train iou={:.3f}, val iou={:.3f}, "
            "train acc={:.3f}, val acc={:.3f}").format(
                epoch+1, train_loss_total/(i+1), val_loss_total/(j+1),
                np.average(train_ious), np.average(val_ious),
                train_acc_total/(i+1), val_acc_total/(j+1)))
    
    # save model checkpoint
    if epoch % 1 == 0:
        torch.save(model.state_dict(),
        '../models/{}_ep{:0d}_lr{:.0e}_bs{:02d}_mo{:.1f}_{:03d}.model'.format(
            md, ep, lr, bs, mo, epoch))

    writer.flush()
    scheduler.step(val_loss_total/(j+1))
    torch.cuda.empty_cache()


writer.close()

running on... cuda:0


Train Loss:   0%|          | 0/20 [00:00<?, ?it/s]