<a href="https://colab.research.google.com/github/alexjercan/unsupervised-segmentation/blob/master/tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install matplotlib==3.3.3 albumentations==0.5.2

Collecting matplotlib==3.3.3
[?25l  Downloading https://files.pythonhosted.org/packages/30/f2/10c822cb0ca5ebec58bd1892187bc3e3db64a867ac26531c6204663fc218/matplotlib-3.3.3-cp37-cp37m-manylinux1_x86_64.whl (11.6MB)
[K     |████████████████████████████████| 11.6MB 242kB/s 
[?25hCollecting albumentations==0.5.2
[?25l  Downloading https://files.pythonhosted.org/packages/03/58/63fb1d742dc42d9ba2800ea741de1f2bc6bb05548d8724aa84794042eaf2/albumentations-0.5.2-py3-none-any.whl (72kB)
[K     |████████████████████████████████| 81kB 13.4MB/s 
Collecting opencv-python-headless>=4.1.1
[?25l  Downloading https://files.pythonhosted.org/packages/c3/35/bfc76533f2274cd3da4e2cf255cd13ab9d7f6fc8990c06911e7f8fcc2130/opencv_python_headless-4.5.2.54-cp37-cp37m-manylinux2014_x86_64.whl (38.2MB)
[K     |████████████████████████████████| 38.2MB 78kB/s 
[?25hCollecting imgaug>=0.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/66/b1/af3142c4a85cba6da9f4ebb5ff4e21e2616309552caca5e8acefe9840

In [1]:

!git clone https://github.com/alexjercan/unsupervised-segmentation.git
%cd unsupervised-segmentation

import torch
from IPython.display import clear_output

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

Setup complete. Using torch 1.9.0+cu102 _CudaDeviceProperties(name='Tesla T4', major=7, minor=5, total_memory=15109MB, multi_processor_count=40)


In [None]:
# Download model
torch.hub.download_url_to_file('https://github.com/alexjercan/unsupervised-segmentation/releases/download/v1.0/normal.pth', 'normal.pth')

HBox(children=(FloatProgress(value=0.0, max=302327126.0), HTML(value='')))




In [2]:
# Download dataset
torch.hub.download_url_to_file('https://github.com/alexjercan/unsupervised-segmentation/releases/download/v1.0/bdataset_scene.zip', 'tmp.zip')
!unzip -q tmp.zip -d ../ && rm tmp.zip

HBox(children=(FloatProgress(value=0.0, max=21220838.0), HTML(value='')))




In [3]:
!bash ./get_bdataset.sh

Downloading ...
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   408    0   408    0     0   1441      0 --:--:-- --:--:-- --:--:--  1441
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 7494M    0 7494M    0     0  42.7M      0 --:--:--  0:02:55 --:--:-- 26.4M
Completed in 577 seconds


In [9]:
!git pull
!python dataset.py
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
!python model.py
end.record()
torch.cuda.synchronize()
print(start.elapsed_time(end))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
model ok
6129.1357421875


In [1]:
%cd unsupervised-segmentation/

/content/unsupervised-segmentation


In [2]:
import os
import re
import cv2
import torch
import torch.optim
import albumentations as A
import my_albumentations as M

from datetime import datetime as dt
from albumentations.pytorch.transforms import ToTensorV2
from config import DEVICE
from model import Model, LossFunction
from general import init_weights, load_checkpoint, save_checkpoint
from dataset import create_dataloader, create_dataloader_nyuv2, LoadImages
from metrics import MetricFunction, print_single_error, MetricFunctionNYUv2
from detect import generatePredictions
from train import train_one_epoch
from test import run_test, run_test_nyuv2
from util import save_predictions, plot_predictions

IMAGE_SIZE = 256
DATASET_ROOT = "../bdataset"
TRAIN_JSON_PATH = "train.json"
TEST_JSON_PATH = "test.json"
IMAGES = [
          {"image": "data/0000.png", "depth": "data/0000.exr", "output": "data/out/0000.png"},
          {"image": "data/0001.png", "depth": "data/0001.exr", "output": "data/out/0001.png"},
          {"image": "data/0002.png", "depth": "data/0002.exr", "output": "data/out/0002.png"}
]
BATCH_SIZE = 8
WORKERS = 8

LEARNING_RATE = 1e-5
BETAS = [0.9, 0.999]
EPS = 1e-8
WEIGHT_DECAY = 1e-4

MILESTONES = [10]
GAMMA = 0.1

NUM_EPOCHS = 20
OUT_PATH ="./runs"
LOAD_TRAIN_MODEL = False
LOAD_TEST_MODEL = False
CHECKPOINT_TRAIN_FILE = "normal.pth"
CHECKPOINT_TEST_FILE = "normal.pth"

torch.backends.cudnn.benchmark = True

In [3]:
train_transform = A.Compose(
    [
        M.MyRandomResizedCrop(width=IMAGE_SIZE, height=IMAGE_SIZE),
        M.MyHorizontalFlip(p=0.5),
        M.MyVerticalFlip(p=0.1),
        A.OneOf([
            A.MotionBlur(p=0.2),
            A.MedianBlur(blur_limit=3, p=0.1),
            A.Blur(blur_limit=3, p=0.1),
        ], p=0.2),
        A.OneOf([
            M.MyOpticalDistortion(p=0.3),
            M.MyGridDistortion(p=0.1),
        ], p=0.2),
        A.OneOf([
            A.IAASharpen(),
            A.IAAEmboss(),
            A.RandomBrightnessContrast(),
        ], p=0.3),
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'normal': 'normal',
        'depth': 'depth',
    }
)

test_transform = A.Compose(
    [
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'normal': 'normal',
        'depth': 'depth',
    }
)

detect_transform = A.Compose(
    [
        M.MyLongestMaxSize(max_size=IMAGE_SIZE),
        M.MyPadIfNeeded(min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT, value=0),
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'depth' : 'depth',
    }
)

_, train_dataloader = create_dataloader(DATASET_ROOT, TRAIN_JSON_PATH, 
                                        batch_size=BATCH_SIZE, transform=train_transform, 
                                        workers=WORKERS, pin_memory=True, shuffle=True)

_, test_dataloader = create_dataloader(DATASET_ROOT, TEST_JSON_PATH,
                                       batch_size=BATCH_SIZE, transform=test_transform,
                                       workers=WORKERS, pin_memory=True, shuffle=False)

In [None]:
model = Model(num_classes=10, num_layers=3)
model.apply(init_weights)
solver = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                          lr=LEARNING_RATE, betas=BETAS, 
                          eps=EPS, weight_decay=WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(solver, milestones=MILESTONES, gamma=GAMMA)
model = model.to(DEVICE)
loss_fn = LossFunction()

In [None]:
def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp

print(get_n_params(model))

75530090


In [None]:
epoch_idx = 0
if LOAD_TRAIN_MODEL:
    epoch_idx, model = load_checkpoint(model, CHECKPOINT_TRAIN_FILE, DEVICE)

model.train()
for epoch_idx in range(epoch_idx, NUM_EPOCHS):
    metric_fn = MetricFunction(BATCH_SIZE)
    train_one_epoch(model, train_dataloader, loss_fn, metric_fn, solver, epoch_idx)
    print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())
    lr_scheduler.step()

In [None]:
if LOAD_TEST_MODEL:
    epoch_idx, model = load_checkpoint(model, CHECKPOINT_TEST_FILE, DEVICE)

model.eval()
metric_fn = MetricFunction(BATCH_SIZE)
run_test(model, test_dataloader, loss_fn, metric_fn)
print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())

In [None]:
# if LOAD_TEST_MODEL:
# epoch_idx, model = load_checkpoint(model, CHECKPOINT_TEST_FILE, DEVICE)

model.eval()
images = LoadImages(IMAGES, transform=detect_transform)
for img, predictions, depths, path in generatePredictions_fcn(model, images):
    plot_predictions([img], predictions, depths, [path])
    save_predictions([img], predictions, depths, [path])

In [None]:
output_dir = os.path.join(OUT_PATH, re.sub("[^0-9a-zA-Z]+", "-", dt.now().isoformat()))

save_checkpoint(epoch_idx, model, output_dir)

NYU v2 Dataset

In [None]:
_, test_dataloader_nyuv2 = create_dataloader_nyuv2(batch_size=BATCH_SIZE, transform=test_transform, workers=WORKERS, pin_memory=True, shuffle=False)

In [None]:
if LOAD_TEST_MODEL:
    epoch_idx, model = load_checkpoint(model, CHECKPOINT_TEST_FILE, DEVICE)

model.eval()
metric_fn = MetricFunctionNYUv2(BATCH_SIZE)
run_test_nyuv2(model, test_dataloader_nyuv2, loss_fn, metric_fn)
print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())

  img = torch.from_numpy(np.array(pic, np.int32, copy=False))
  img = torch.from_numpy(np.array(pic, np.int32, copy=False))
100%|██████████| 100/100 [00:42<00:00,  2.38it/s]

eval_avg_error
Epoch: 20, loss=(total:3.4694 c:0.8977 s:2.5717)
IOU=0.0397	P=0.1985	R=0.0016	F1=0.0031







OG Model kanezaki github

In [None]:
from original import OgModel, OgLossFunction, og_train_one_epoch, og_run_test, og_run_test_nyuv2

In [None]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
!python original.py
end.record()
torch.cuda.synchronize()
print(start.elapsed_time(end))

model ok
5622.5693359375


In [None]:
model = OgModel(num_classes=10)
model.apply(init_weights)
solver = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()),
                         lr=LEARNING_RATE, momentum=0.9,
                         dampening=0.1, weight_decay=WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(solver, milestones=MILESTONES, gamma=GAMMA)
model = model.to(DEVICE)
loss_fn = OgLossFunction()

In [None]:
epoch_idx = 0
if LOAD_TRAIN_MODEL:
    epoch_idx, model = load_checkpoint(model, CHECKPOINT_TRAIN_FILE, DEVICE)

model.train()
for epoch_idx in range(epoch_idx, NUM_EPOCHS):
    metric_fn = MetricFunction(BATCH_SIZE)
    og_train_one_epoch(model, train_dataloader, loss_fn, metric_fn, solver, epoch_idx)
    print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())
    lr_scheduler.step()

In [None]:
if LOAD_TEST_MODEL:
    epoch_idx, model = load_checkpoint(model, CHECKPOINT_TEST_FILE, DEVICE)

model.eval()
metric_fn = MetricFunction(BATCH_SIZE)
og_run_test(model, test_dataloader, loss_fn, metric_fn)
print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())

In [None]:
if LOAD_TEST_MODEL:
    epoch_idx, model = load_checkpoint(model, CHECKPOINT_TEST_FILE, DEVICE)

model.eval()
metric_fn = MetricFunctionNYUv2(BATCH_SIZE)
og_run_test_nyuv2(model, test_dataloader_nyuv2, loss_fn, metric_fn)
print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())

  img = torch.from_numpy(np.array(pic, np.int32, copy=False))
  img = torch.from_numpy(np.array(pic, np.int32, copy=False))
100%|██████████| 100/100 [00:35<00:00,  2.81it/s]

eval_avg_error
Epoch: 20, loss=(total:2.9240 s:1.0791 c:1.8449)
IOU=0.0396	P=0.1761	R=0.0014	F1=0.0027







In [None]:
from general import tensors_to_device, generate_layers
from tqdm import tqdm
from torchvision.models.segmentation import fcn_resnet50
from torchvision.models.segmentation.fcn import FCNHead
from detect import generatePredictions_fcn
num_layers = 3

In [None]:
def train_fn(loader, model, optimizer, loss_fn, scaler):
    def runmodel(model, imgs, depths):
        layers = generate_layers(imgs, depths, num_layers)
        x = [model(x)['out'] for x in layers]
        return torch.stack(x, dim=-1)

    loop = tqdm(loader, position=0, leave=True)
    for batch_idx, tensors in enumerate(loop):
        imgs, normals, depths = tensors_to_device(tensors, DEVICE)

        # forward
        with torch.cuda.amp.autocast():
            predictions = runmodel(model, imgs, depths)
            loss = loss_fn(predictions, (normals, depths))

        # backward
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # update tqdm loop
        loop.set_postfix(loss=loss.item())
    loop.close()

In [None]:
model = fcn_resnet50(pretrained=True, num_classes=21)
# set_parameter_requires_grad(model)
model.classifier = FCNHead(2048, channels=14)
model = model.to(DEVICE)

epoch, model = load_checkpoint(model, "checkpoint-epoch-057.pth", DEVICE)

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
scaler = torch.cuda.amp.GradScaler()
loss_fn = LossFunction()

In [None]:
model.train()
for epoch_idx in range(100):
    train_fn(train_dataloader, model, optimizer, loss_fn, scaler)

In [26]:
output_dir = os.path.join(OUT_PATH, re.sub("[^0-9a-zA-Z]+", "-", dt.now().isoformat()))

save_checkpoint(epoch_idx, model, output_dir)

Visual Classification

In [4]:
from train import train_one_epoch_fg
from torchvision.models.resnet import resnet50
from general import set_parameter_requires_grad, tensors_to_device
import torch.nn as nn
from model import FGLossFunction
from metrics import FGMetricFunction
from dataset import create_dataloader_fg
from tqdm import tqdm

In [5]:
train_transform = A.Compose(
    [
        M.MyRandomResizedCrop(width=IMAGE_SIZE, height=IMAGE_SIZE),
        M.MyHorizontalFlip(p=0.5),
        M.MyVerticalFlip(p=0.1),
        A.OneOf([
            A.MotionBlur(p=0.2),
            A.MedianBlur(blur_limit=3, p=0.1),
            A.Blur(blur_limit=3, p=0.1),
        ], p=0.2),
        A.OneOf([
            M.MyOpticalDistortion(p=0.3),
            M.MyGridDistortion(p=0.1),
        ], p=0.2),
        A.OneOf([
            A.IAASharpen(),
            A.IAAEmboss(),
            A.RandomBrightnessContrast(),
        ], p=0.3),
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'depth': 'depth',
    }
)

test_transform = A.Compose(
    [
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'depth': 'depth',
    }
)

detect_transform = A.Compose(
    [
        M.MyLongestMaxSize(max_size=IMAGE_SIZE),
        M.MyPadIfNeeded(min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT, value=0),
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'depth' : 'depth',
    }
)

_, train_dataloader = create_dataloader_fg(DATASET_ROOT, TRAIN_JSON_PATH, 
                                        batch_size=BATCH_SIZE, transform=train_transform, 
                                        workers=WORKERS, pin_memory=True, shuffle=True)

_, test_dataloader = create_dataloader_fg(DATASET_ROOT, TEST_JSON_PATH,
                                       batch_size=BATCH_SIZE, transform=test_transform,
                                       workers=WORKERS, pin_memory=True, shuffle=False)

In [6]:
model = resnet50(pretrained=True)
set_parameter_requires_grad(model)
model.fc = nn.Linear(512 * 4, 30)
model = model.to(DEVICE)

In [7]:
solver = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                          lr=1e-3, betas=BETAS, 
                          eps=EPS, weight_decay=WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(solver, milestones=MILESTONES, gamma=GAMMA)
loss_fn = FGLossFunction()

In [8]:
# epoch_idx = 0
# if LOAD_TRAIN_MODEL:
# epoch_idx, model = load_checkpoint(model, CHECKPOINT_TRAIN_FILE, DEVICE)

model.train()
for epoch_idx in range(NUM_EPOCHS):
    metric_fn = FGMetricFunction(BATCH_SIZE)
    train_one_epoch_fg(model, train_dataloader, loss_fn, metric_fn, solver, epoch_idx)
    print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())
    lr_scheduler.step()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
100%|██████████| 2008/2008 [02:41<00:00, 12.44it/s, epoch=0, loss=(total:1.9447)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 0, loss=(total:1.9447)




100%|██████████| 2008/2008 [02:40<00:00, 12.48it/s, epoch=1, loss=(total:1.3468)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 1, loss=(total:1.3468)




100%|██████████| 2008/2008 [02:41<00:00, 12.40it/s, epoch=2, loss=(total:1.9700)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 2, loss=(total:1.9700)




100%|██████████| 2008/2008 [02:41<00:00, 12.47it/s, epoch=3, loss=(total:2.0001)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 3, loss=(total:2.0001)




100%|██████████| 2008/2008 [02:41<00:00, 12.40it/s, epoch=4, loss=(total:1.1812)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 4, loss=(total:1.1812)




100%|██████████| 2008/2008 [02:41<00:00, 12.45it/s, epoch=5, loss=(total:2.7733)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 5, loss=(total:2.7733)




100%|██████████| 2008/2008 [02:41<00:00, 12.43it/s, epoch=6, loss=(total:1.2002)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 6, loss=(total:1.2002)




100%|██████████| 2008/2008 [02:42<00:00, 12.38it/s, epoch=7, loss=(total:2.0783)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 7, loss=(total:2.0783)




100%|██████████| 2008/2008 [02:41<00:00, 12.40it/s, epoch=8, loss=(total:3.8091)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 8, loss=(total:3.8091)




100%|██████████| 2008/2008 [02:41<00:00, 12.43it/s, epoch=9, loss=(total:0.5534)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 9, loss=(total:0.5534)




100%|██████████| 2008/2008 [02:41<00:00, 12.45it/s, epoch=10, loss=(total:2.4448)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 10, loss=(total:2.4448)




100%|██████████| 2008/2008 [02:41<00:00, 12.42it/s, epoch=11, loss=(total:2.6979)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 11, loss=(total:2.6979)




100%|██████████| 2008/2008 [02:41<00:00, 12.47it/s, epoch=12, loss=(total:1.1273)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 12, loss=(total:1.1273)




100%|██████████| 2008/2008 [02:42<00:00, 12.34it/s, epoch=13, loss=(total:1.5466)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 13, loss=(total:1.5466)




100%|██████████| 2008/2008 [02:43<00:00, 12.31it/s, epoch=14, loss=(total:2.0506)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 14, loss=(total:2.0506)




100%|██████████| 2008/2008 [02:45<00:00, 12.12it/s, epoch=15, loss=(total:1.8909)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 15, loss=(total:1.8909)




100%|██████████| 2008/2008 [02:49<00:00, 11.81it/s, epoch=16, loss=(total:1.7173)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 16, loss=(total:1.7173)




100%|██████████| 2008/2008 [02:52<00:00, 11.63it/s, epoch=17, loss=(total:0.8449)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 17, loss=(total:0.8449)




100%|██████████| 2008/2008 [02:50<00:00, 11.75it/s, epoch=18, loss=(total:2.1857)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 18, loss=(total:2.1857)




100%|██████████| 2008/2008 [02:50<00:00, 11.77it/s, epoch=19, loss=(total:2.4873)]

eval_avg_error
Epoch: 19, loss=(total:2.4873)







In [35]:
model.train()
for epoch_idx in range(NUM_EPOCHS):
    metric_fn = FGMetricFunction(BATCH_SIZE)
    train_one_epoch_fg_og(model, train_dataloader, loss_fn, metric_fn, solver, epoch_idx)
    print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())
    lr_scheduler.step()

100%|██████████| 2008/2008 [01:54<00:00, 17.52it/s, epoch=0, loss=(total:0.6900)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 0, loss=(total:0.6900)




100%|██████████| 2008/2008 [01:56<00:00, 17.28it/s, epoch=1, loss=(total:2.4825)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 1, loss=(total:2.4825)




100%|██████████| 2008/2008 [01:56<00:00, 17.23it/s, epoch=2, loss=(total:3.8943)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 2, loss=(total:3.8943)




100%|██████████| 2008/2008 [01:56<00:00, 17.28it/s, epoch=3, loss=(total:4.2320)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 3, loss=(total:4.2320)




100%|██████████| 2008/2008 [01:56<00:00, 17.19it/s, epoch=4, loss=(total:2.5225)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 4, loss=(total:2.5225)




100%|██████████| 2008/2008 [01:56<00:00, 17.17it/s, epoch=5, loss=(total:0.2197)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 5, loss=(total:0.2197)




100%|██████████| 2008/2008 [01:56<00:00, 17.22it/s, epoch=6, loss=(total:3.0835)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 6, loss=(total:3.0835)




100%|██████████| 2008/2008 [01:56<00:00, 17.19it/s, epoch=7, loss=(total:1.2616)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 7, loss=(total:1.2616)




100%|██████████| 2008/2008 [01:57<00:00, 17.14it/s, epoch=8, loss=(total:1.7264)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 8, loss=(total:1.7264)




100%|██████████| 2008/2008 [01:56<00:00, 17.20it/s, epoch=9, loss=(total:0.9328)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 9, loss=(total:0.9328)




100%|██████████| 2008/2008 [01:56<00:00, 17.25it/s, epoch=10, loss=(total:0.7943)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 10, loss=(total:0.7943)




100%|██████████| 2008/2008 [01:56<00:00, 17.27it/s, epoch=11, loss=(total:1.0338)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 11, loss=(total:1.0338)




  6%|▌         | 125/2008 [00:07<01:56, 16.20it/s, epoch=12, loss=(total:1.0669)]

KeyboardInterrupt: ignored

test fg fcn

In [5]:
from train import train_one_epoch_fg_fcn
from torchvision.models.segmentation import fcn_resnet50
from torchvision.models.segmentation.fcn import FCNHead
from general import set_parameter_requires_grad, tensors_to_device
import torch.nn as nn
from model import FGFCNLossFunction
from metrics import FCNFGMetricFunction
from dataset import create_dataloader_fg
from tqdm import tqdm

In [6]:
train_transform = A.Compose(
    [
        M.MyRandomResizedCrop(width=IMAGE_SIZE, height=IMAGE_SIZE),
        M.MyHorizontalFlip(p=0.5),
        M.MyVerticalFlip(p=0.1),
        A.OneOf([
            A.MotionBlur(p=0.2),
            A.MedianBlur(blur_limit=3, p=0.1),
            A.Blur(blur_limit=3, p=0.1),
        ], p=0.2),
        A.OneOf([
            M.MyOpticalDistortion(p=0.3),
            M.MyGridDistortion(p=0.1),
        ], p=0.2),
        A.OneOf([
            A.IAASharpen(),
            A.IAAEmboss(),
            A.RandomBrightnessContrast(),
        ], p=0.3),
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'depth': 'depth',
    }
)

test_transform = A.Compose(
    [
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'depth': 'depth',
    }
)

detect_transform = A.Compose(
    [
        M.MyLongestMaxSize(max_size=IMAGE_SIZE),
        M.MyPadIfNeeded(min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT, value=0),
        A.Normalize(),
        M.MyToTensorV2(),
    ],
    additional_targets={
        'depth' : 'depth',
    }
)

_, train_dataloader = create_dataloader_fg(DATASET_ROOT, TRAIN_JSON_PATH, 
                                        batch_size=BATCH_SIZE, transform=train_transform, 
                                        workers=WORKERS, pin_memory=True, shuffle=True)

_, test_dataloader = create_dataloader_fg(DATASET_ROOT, TEST_JSON_PATH,
                                       batch_size=BATCH_SIZE, transform=test_transform,
                                       workers=WORKERS, pin_memory=True, shuffle=False)

In [7]:
model = fcn_resnet50(pretrained=True, num_classes=21)
set_parameter_requires_grad(model)
model.classifier = FCNHead(2048, channels=31)
model = model.to(DEVICE)

Downloading: "https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth" to /root/.cache/torch/hub/checkpoints/fcn_resnet50_coco-1167a1af.pth


HBox(children=(FloatProgress(value=0.0, max=141567418.0), HTML(value='')))




In [8]:
solver = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                          lr=1e-3, betas=BETAS, 
                          eps=EPS, weight_decay=WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(solver, milestones=MILESTONES, gamma=GAMMA)
loss_fn = FGFCNLossFunction()

In [None]:
# epoch_idx = 0
# if LOAD_TRAIN_MODEL:
# epoch_idx, model = load_checkpoint(model, CHECKPOINT_TRAIN_FILE, DEVICE)

model.train()
for epoch_idx in range(NUM_EPOCHS):
    metric_fn = FCNFGMetricFunction(BATCH_SIZE)
    train_one_epoch_fg_fcn(model, train_dataloader, loss_fn, metric_fn, solver, epoch_idx)
    print_single_error(epoch_idx, loss_fn.show(), metric_fn.show())
    lr_scheduler.step()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
100%|██████████| 2008/2008 [06:04<00:00,  5.50it/s, epoch=0, loss=(total:0.4072)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 0, loss=(total:0.4072)
IOU=0.5109	P=0.0097	R=0.0012	F1=0.0010




100%|██████████| 2008/2008 [06:03<00:00,  5.53it/s, epoch=1, loss=(total:0.1997)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 1, loss=(total:0.1997)
IOU=0.4839	P=0.0086	R=0.0004	F1=0.0006




100%|██████████| 2008/2008 [06:02<00:00,  5.54it/s, epoch=2, loss=(total:0.1608)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 2, loss=(total:0.1608)
IOU=0.4790	P=0.0083	R=0.0002	F1=0.0004




100%|██████████| 2008/2008 [06:02<00:00,  5.54it/s, epoch=3, loss=(total:0.8615)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 3, loss=(total:0.8615)
IOU=0.4780	P=0.0070	R=0.0001	F1=0.0002




100%|██████████| 2008/2008 [06:01<00:00,  5.55it/s, epoch=4, loss=(total:1.2077)]
  0%|          | 0/2008 [00:00<?, ?it/s]

eval_avg_error
Epoch: 4, loss=(total:1.2077)
IOU=0.4758	P=0.0075	R=0.0001	F1=0.0002




 14%|█▍        | 280/2008 [00:50<05:10,  5.56it/s, epoch=5, loss=(total:0.6628)]