In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
im_size = 32
bz = 32

In [3]:
__author__ = 'anhvo'

import os
from tqdm import tqdm
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image
from avcv.visualize import *
from avcv.utils import images_to_video, put_text
import numpy as np
import cv2

In [4]:
from tools.train import *
import mmcv
print('Num of devices:', torch.cuda.device_count())

parser = make_parser()
args = mmcv.Config(parser.parse_known_args()[0].__dict__)

args.exp_file = '../exps/rotated/nano.py'
args.experiment_name = 'nano-test'
args.devices = 1
args.batch_size = 4
args.occupy = True
args.output_dir = f'./work_dir/{args.name}'
exp = get_exp(args.exp_file, args.name)
exp.merge(args.opts)

from yolox.data import DataPrefetcher
loader = exp.get_data_loader(3, False)
prefetcher = DataPrefetcher(loader)


ModuleNotFoundError: No module named 'tools'

In [None]:
# targets

In [None]:
class InputTarget:
    def __init__(self, cx,cy,w,h,a,im_h=64, im_w=64, normalize=False, train_size=(im_size, im_size)):
        if normalize:
            cx = cx/im_w
            cy = cy/im_h
            w = w/im_w
            h = h/im_h
            a = a/180
        self.cx = cx
        self.cy = cy
        self.w = w
        self.h = h
        self.a = a
        self.im_h = im_h
        self.im_w = im_w
        self._mask = np.zeros([im_h, im_w], dtype='uint8')
        self.train_size = train_size
    
    @property
    def input(self):
        return np.array([self.cx, self.cy, self.w, self.h, self.a]).astype(np.float32)
    
    def set_input(self, cx,cy,w,h,a):
        self.cx = cx
        self.cy = cy
        self.w = w
        self.h = h
        self.a = a
    
    @property
    def target(self):
        rbox = (self.cx*self.im_w, self.cy*self.im_h), (self.w*self.im_w,self.h*self.im_h), self.a*180
        points = cv2.boxPoints(rbox)
        points = np.array(points).astype(int)
        mask = cv2.drawContours(self._mask.copy(), [points], -1, 1, -1)
        
        return cv2.resize(mask, self.train_size)

In [5]:
from yolox.data import DataPrefetcher
from tools.train import *
import mmcv

class RotDS(torch.utils.data.Dataset):
    def __init__(self, n):
        self.input_target = InputTarget(0.5, 0.5, 0.2, 0.3, 30, 128, 128)
        self.n = n
#         parser = make_parser()
#         args = mmcv.Config(parser.parse_known_args()[0].__dict__)

#         args.exp_file = '../exps/rotated/nano.py'
#         args.experiment_name = 'nano-test'
#         args.devices = 1
#         args.batch_size = 4
#         args.occupy = True
#         args.output_dir = f'./work_dir/{args.name}'
#         exp = get_exp(args.exp_file, args.name)
#         exp.merge(args.opts)


        
    def __len__(self):
        return bz*self.n
    def get_input(self, index):
        cx = np.random.uniform(0., 0.8)
        cy = np.random.uniform(0., 0.8)
        h = np.random.uniform(0.08, 0.5)
        w = np.random.uniform(0.08, 0.5)
        a = np.random.uniform(0, 1)
        self.input_target.set_input(cx, cy, w, h, a)
        target = self.input_target.target
        target = cv2.resize(target, (im_size, im_size), interpolation=cv2.INTER_AREA)
        return self.input_target.input.astype(np.float32), target.astype(np.float32)
    
    def __getitem__(self, index):
        return self.get_input(index)


ModuleNotFoundError: No module named 'yolox'

In [105]:
from typing import Optional, List

import torch
import torch.nn.functional as F
from torch.nn.modules.loss import _Loss
# from ._functional import soft_dice_score, to_tensor
# from .constants import BINARY_MODE, MULTICLASS_MODE, MULTILABEL_MODE
#: Loss binary mode suppose you are solving binary segmentation task.
#: That mean yor have only one class which pixels are labled as **1**,
#: the rest pixels are background and labeled as **0**.
#: Target mask shape - (N, H, W), model output mask shape (N, 1, H, W).
BINARY_MODE: str = "binary"

#: Loss multiclass mode suppose you are solving multi-**class** segmentation task.
#: That mean you have *C = 1..N* classes which have unique label values,
#: classes are mutually exclusive and all pixels are labeled with theese values.
#: Target mask shape - (N, H, W), model output mask shape (N, C, H, W).  
MULTICLASS_MODE: str = "multiclass"

#: Loss multilabel mode suppose you are solving multi-**label** segmentation task.
#: That mean you have *C = 1..N* classes which pixels are labeled as **1**,
#: classes are not mutually exclusive and each class have its own *channel*,
#: pixels in each channel which are not belong to class labeled as **0**.
#: Target mask shape - (N, C, H, W), model output mask shape (N, C, H, W).
MULTILABEL_MODE: str = "multilabel"

def to_tensor(x, dtype=None) -> torch.Tensor:
    if isinstance(x, torch.Tensor):
        if dtype is not None:
            x = x.type(dtype)
        return x
    if isinstance(x, np.ndarray):
        x = torch.from_numpy(x)
        if dtype is not None:
            x = x.type(dtype)
        return x
    if isinstance(x, (list, tuple)):
        x = np.array(x)
        x = torch.from_numpy(x)
        if dtype is not None:
            x = x.type(dtype)
        return x


def soft_dice_score(
        output: torch.Tensor, target: torch.Tensor, smooth: float = 0.0, eps: float = 1e-7, dims=None
) -> torch.Tensor:
    assert output.size() == target.size()
    if dims is not None:
        intersection = torch.sum(output * target, dim=dims)
        cardinality = torch.sum(output + target, dim=dims)
    else:
        intersection = torch.sum(output * target)
        cardinality = torch.sum(output + target)
    dice_score = (2.0 * intersection + smooth) / (cardinality + smooth).clamp_min(eps)
    return dice_score
    
    
class DiceLoss(_Loss):

    def __init__(
        self,
        mode: str,
        classes: Optional[List[int]] = None,
        log_loss: bool = False,
        from_logits: bool = True,
        smooth: float = 0.0,
        ignore_index: Optional[int] = None,
        eps: float = 1e-7,
    ):
        """Implementation of Dice loss for image segmentation task.
        It supports binary, multiclass and multilabel cases

        Args:
            mode: Loss mode 'binary', 'multiclass' or 'multilabel'
            classes:  List of classes that contribute in loss computation. By default, all channels are included.
            log_loss: If True, loss computed as `- log(dice_coeff)`, otherwise `1 - dice_coeff`
            from_logits: If True, assumes input is raw logits
            smooth: Smoothness constant for dice coefficient (a)
            ignore_index: Label that indicates ignored pixels (does not contribute to loss)
            eps: A small epsilon for numerical stability to avoid zero division error 
                (denominator will be always greater or equal to eps)

        Shape
             - **y_pred** - torch.Tensor of shape (N, C, H, W)
             - **y_true** - torch.Tensor of shape (N, H, W) or (N, C, H, W)

        Reference
            https://github.com/BloodAxe/pytorch-toolbelt
        """
        assert mode in {BINARY_MODE, MULTILABEL_MODE, MULTICLASS_MODE}
        super(DiceLoss, self).__init__()
        self.mode = mode
        if classes is not None:
            assert mode != BINARY_MODE, "Masking classes is not supported with mode=binary"
            classes = to_tensor(classes, dtype=torch.long)

        self.classes = classes
        self.from_logits = from_logits
        self.smooth = smooth
        self.eps = eps
        self.log_loss = log_loss
        self.ignore_index = ignore_index

    def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:

        assert y_true.size(0) == y_pred.size(0)

        if self.from_logits:
            # Apply activations to get [0..1] class probabilities
            # Using Log-Exp as this gives more numerically stable result and does not cause vanishing gradient on
            # extreme values 0 and 1
            if self.mode == MULTICLASS_MODE:
                y_pred = y_pred.log_softmax(dim=1).exp()
            else:
                y_pred = F.logsigmoid(y_pred).exp()

        bs = y_true.size(0)
        num_classes = y_pred.size(1)
        dims = (0, 2)

        if self.mode == BINARY_MODE:
            y_true = y_true.view(bs, 1, -1)
            y_pred = y_pred.view(bs, 1, -1)

            if self.ignore_index is not None:
                mask = y_true != self.ignore_index
                y_pred = y_pred * mask
                y_true = y_true * mask

        if self.mode == MULTICLASS_MODE:
            y_true = y_true.view(bs, -1)
            y_pred = y_pred.view(bs, num_classes, -1)

            if self.ignore_index is not None:
                mask = y_true != self.ignore_index
                y_pred = y_pred * mask.unsqueeze(1)

                y_true = F.one_hot((y_true * mask).to(torch.long), num_classes)  # N,H*W -> N,H*W, C
                y_true = y_true.permute(0, 2, 1) * mask.unsqueeze(1)  # H, C, H*W
            else:
                y_true = F.one_hot(y_true, num_classes)  # N,H*W -> N,H*W, C
                y_true = y_true.permute(0, 2, 1)  # H, C, H*W

        if self.mode == MULTILABEL_MODE:
            y_true = y_true.view(bs, num_classes, -1)
            y_pred = y_pred.view(bs, num_classes, -1)

            if self.ignore_index is not None:
                mask = y_true != self.ignore_index
                y_pred = y_pred * mask
                y_true = y_true * mask

        scores = self.compute_score(y_pred, y_true.type_as(y_pred), smooth=self.smooth, eps=self.eps, dims=dims)

        if self.log_loss:
            loss = -torch.log(scores.clamp_min(self.eps))
        else:
            loss = 1.0 - scores

        # Dice loss is undefined for non-empty classes
        # So we zero contribution of channel that does not have true pixels
        # NOTE: A better workaround would be to use loss term `mean(y_pred)`
        # for this case, however it will be a modified jaccard loss

        mask = y_true.sum(dims) > 0
        loss *= mask.to(loss.dtype)

        if self.classes is not None:
            loss = loss[self.classes]

        return self.aggregate_loss(loss)

    def aggregate_loss(self, loss):
        return loss.mean()

    def compute_score(self, output, target, smooth=0.0, eps=1e-7, dims=None) -> torch.Tensor:
        return soft_dice_score(output, target, smooth, eps, dims)

In [99]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.embed =  nn.Linear(5, 64)
        
        self.decoder = nn.Sequential(
            nn.LeakyReLU(0.2, True),
            nn.Linear(64, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2, True),
            nn.Linear(128, 256))
        
        self.conv_1 = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, True),

            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, True),
            
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, True),
            
        )
        self.conv_2 = nn.Conv2d(64, 1, 3, padding=1)
        

        
    def forward(self, x):
        embed = self.embed(x)
        
        out1 = self.decoder(embed)
        out1 = out1.reshape([-1, 1, 16, 16])

        out1 = nn.functional.interpolate(out1, (im_size, im_size))
        
        x = self.conv_1(out1) + embed[:,:, None, None]
        out2 = self.conv_2(x)
        
        return out2.sigmoid()


In [100]:
model = Autoencoder().cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(
    model.parameters(), lr=10e-4, weight_decay=1e-5)

In [101]:
!rm -r imgs_epoch
!mkdir -p imgs_epoch

In [102]:
output2.shape

torch.Size([1024, 1024])

In [98]:
pbar = tqdm(range(100))

for epoch in pbar:
    model.train()
    losses = []
    np.random.seed(epoch)
    ds = RotDS(n=10000)
    dl = torch.utils.data.DataLoader(ds, 1024, shuffle=True, num_workers=8)
    for inps, otars in dl:
        inps, tars = inps.cuda(), otars.reshape([-1, im_size*im_size]).cuda()
        # ===================forward=====================
        output = model(inps)
        loss = criterion(output)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    # ===================log========================
        pbar.set_description('epoch [{}/{}], loss:{:.4f}'
              .format(epoch + 1, 100, np.mean(losses)))
    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            outputs = model(inps)[1]
        outputs = outputs.reshape([-1, im_size, im_size]).cpu().numpy()
        plot_images([*outputs[:4], *otars[:4]], dpi=100, mxn=[2,4])
torch.save(model.state_dict(), './sim_autoencoder.pth')

epoch [1/100], loss:0.1541:   0%|          | 0/100 [00:15<?, ?it/s]


KeyboardInterrupt: 

In [None]:
i = 0
inps = []
otars = []
for data in tqdm(loader):
    i+=1
    targets = data[1]
    for target in targets:
        target = target[target.sum(1)!=0]
        target = target[:,5:].cpu().numpy()
        for rbox in target:
            it = InputTarget(*rbox, 640, 640, 1)
            inps.append(it.input)
            otars.append(it.target)
    break
inps = torch.from_numpy(np.array(inps)).float().cuda()

In [None]:
model.eval()
with torch.no_grad():
    outputs = model(inps)[1]
outputs = outputs.reshape([-1, im_size, im_size]).cpu().numpy()
plot_images([*outputs[:4], *otars[:4]], dpi=100, mxn=[2,4])

In [None]:
# input_target.train_size