In [None]:
IN_COLAB = 'google.colab' in str(get_ipython())
if IN_COLAB:
  !pip install git+https://github.com/pete88b/nbdev_colab_helper.git
  from nbdev_colab_helper.core import *
  project_name = 'nextai'
  init_notebook(project_name)

In [None]:
# default_exp vision_core

# vision_core

> Utility methods used in vision training and inference.

*   These methods are used to manipulate tensors defining bounding boxes, categories, and anchor boxes.
*   Input tensors are are of dimension  (bs, k, 4) for bounding boxes or (bs, k, 21) for categories; where bs = batch size and k = number of rows representing a given image.
*   Tensors can run on GPU or CPU, depending on the processing environment 



In [None]:
#hide
#from nbdev import *
#from nbdev.showdoc import *

In [None]:
#hide
!pip install fastai --upgrade --quiet

[?25l[K     |█                               | 10kB 26.2MB/s eta 0:00:01[K     |█▉                              | 20kB 3.4MB/s eta 0:00:01[K     |██▊                             | 30kB 4.0MB/s eta 0:00:01[K     |███▊                            | 40kB 4.4MB/s eta 0:00:01[K     |████▋                           | 51kB 3.9MB/s eta 0:00:01[K     |█████▌                          | 61kB 4.3MB/s eta 0:00:01[K     |██████▌                         | 71kB 4.6MB/s eta 0:00:01[K     |███████▍                        | 81kB 5.0MB/s eta 0:00:01[K     |████████▎                       | 92kB 5.4MB/s eta 0:00:01[K     |█████████▎                      | 102kB 5.2MB/s eta 0:00:01[K     |██████████▏                     | 112kB 5.2MB/s eta 0:00:01[K     |███████████                     | 122kB 5.2MB/s eta 0:00:01[K     |████████████                    | 133kB 5.2MB/s eta 0:00:01[K     |█████████████                   | 143kB 5.2MB/s eta 0:00:01[K     |█████████████▉            

In [None]:
%nbdev_export
from fastai.imports import *
from torch import tensor, Tensor
import torch

In [None]:
%nbdev_export
# Automatically sets for GPU or CPU environments
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  

> Convert bounding box coordinates from CTRHW to x1, y1, x2, y2 formats.<br>
> IMPORTANT: The method expects the input box tensor to be in CxCyHW. 

In [None]:
%nbdev_export
# Helper Functions for Predictor Methods
def ctrhw2tlbr(boxes:Tensor, set_if_input_is_CxCyWH=False):
  ''' Convert bounding box coordinates from CTRHW to x1, y1, x2, y2 formats
      IMPORTANT: The method expects the input box tensor to be in CxCyHW.               
      Inputs:
          Boxes        - torch.tensor of activation bounding boxes
          Dim  -         (batch size x Items in batch x 4). It will fail otherwise.
          Input Format - Center coord, height, width

      Output:
          torch.tensor of activation bounding boxes
          Dim = (batch size, Items in batch, 4)
          Format: x1, y1, x2, y2
          '''
  if set_if_input_is_CxCyWH: boxes = boxes[:,:,[0,1,3,2]]                    # Adjust the format to CxCyHW (height, width). This is the FASTAI format 

  x1 = (boxes[:,:,0] - torch.true_divide(boxes[:,:,3],2.)).view(-1,1)
  x2 = (boxes[:,:,0] + torch.true_divide(boxes[:,:,3],2.)).view(-1,1)
  y1 = (boxes[:,:,1] - torch.true_divide(boxes[:,:,2],2.)).view(-1,1)
  y2 = (boxes[:,:,1] + torch.true_divide(boxes[:,:,2],2.)).view(-1,1)
  
  return torch.cat([x1,y1,x2,y2],dim=1)

In [None]:
#hide
#center coords to top-left, bottom-right transformation
res = ctrhw2tlbr(torch.tensor([[[0,0,2,2],[0,0,2,2],[0,0,2,2]]])); res

tensor([[-1., -1.,  1.,  1.],
        [-1., -1.,  1.,  1.],
        [-1., -1.,  1.,  1.]])

In [None]:
%nbdev_export
def tlbr2cthw(boxes:Tensor, ctrhw=True):
  '''Convert top/left bottom/right format `boxes` to center/size corners.
      Input: 
          boxes - torch.Tensor of activations bounding boxes
                  Unbounded
                  Dim = (batch size, Items in batch, 4)
                  Format: top left xy, bottom right xy
          ctrhw =  True -  Output is in the format CxCyHW
                   False - Output is in the format CxCyWH
      Output:
                  torch.tensor of activation bounding boxes 
                  Dim = (batch size, Items in the batch, 4)
                  Format: center coord xy, height, width'''
  center = torch.true_divide(boxes[:,:, :2] + boxes[:,:, 2:], 2)                     # Calculate box center coord
  sizes = torch.abs(boxes[:,:, 2:] - boxes[:,:, :2])                # Calculate box width & height                                         # 
  results = torch.cat( (center, sizes), 2)
  if ctrhw: results = results[:,:,[0,1,3,2]]                        # The correct FASTAI Size format is CxCyHW (height, width)

  return results

In [None]:
#hide
#transform top-left, bottom-right coordinates to center, h, w coordinates
res = tlbr2cthw(torch.tensor([[[-1,-1,1,1],[-1,-1,1,1],[-1,-1,1,1]]])); res

tensor([[[0., 0., 2., 2.],
         [0., 0., 2., 2.],
         [0., 0., 2., 2.]]])

In [None]:
#hide
#transform top-left, bottom-right coordinates to center, h, w coordinates
res = tlbr2cthw(torch.tensor([[[-1,-1,1,1]],[[-1,-1,1,1]],[[-1,-1,1,1]]])); res

tensor([[[0., 0., 2., 2.]],

        [[0., 0., 2., 2.]],

        [[0., 0., 2., 2.]]])

In [None]:
#hide
# "Return tosender"
res = ctrhw2tlbr(tlbr2cthw(torch.tensor([[[-1,-1,1,1]]]))); res

tensor([[-1., -1.,  1.,  1.]])

In [None]:
%nbdev_export
# We apply Decoding With Variance to both activation boxes and anchor boxes to calculate the final bounding boxes. 
def activ_decode(p_boxes:Tensor, anchors:Tensor):
  ''' Decodes box activations into final bounding boxes by calculating predicted anchor offsets, which are then added to anchor boxes
        Input:
            p_boxes - torcht.tensor of activation bounding boxes
                      dim:       (batch, items in batch, 4)
                      Format:     top left xy, bottom right xy
            anchors - torch.tensor of anchors
                      Dim:       (k * no of classes) x 4
                      Format:    CxCyWH format
        Output:
                      torcht.tensor with anchor boxes offset by box activations
                      dim:    batch x tems in batch x 4)
                      Format: tlbr - top left xy, bottom right xy'''

  sigma_xy, sigma_hw = torch.sqrt(torch.tensor([0.1])), torch.sqrt(torch.tensor([0.2]))             # Variances for center and hw coordinates

  pb = torch.tanh( p_boxes)                 # Set activations into [-1,1] basis (as used in Fastai) 
 
  ctrwh = tlbr2cthw(pb, ctrhw=False)        # Transform box activations from xyxy format to CxCyWH format. 

  # Calculate offset centers. The sequence is Xp, followed by Yp
  offset_centers = ctrwh[:,:,[0,1]].to(device) * sigma_xy.to(device) * anchors[:,[2,3]].to(device) + anchors[:,[0,1]].to(device)
 
  # Calculate offset sizes. The sequence is Wp, followed by Hp
  offset_sizes =  torch.exp(ctrwh[:,:,[2,3]].to(device) *sigma_hw.to(device)) *anchors[:,[2,3]].to(device)
 
  # Return format to CxCyHW and then return, switching back to X1Y1X2Y2 format.
  return torch.clamp(ctrhw2tlbr(torch.cat([offset_centers, offset_sizes], 2), set_if_input_is_CxCyWH=True).view(*p_boxes.shape), min=-1, max=+1)

In [None]:
#hide
anchors = torch.tensor([[0.0,0.0,2,2],[0.0,0.0,1,1]]) 
p_boxes = torch.tensor([[[0.0,0.0,2,2],[0.0,0.0,1,1]]]) 
res = activ_decode(p_boxes, anchors); res

tensor([[[-1.0000, -1.0000,  1.0000,  1.0000],
         [-0.5825, -0.5825,  0.8233,  0.8233]]], device='cuda:0')

In [None]:
%nbdev_export
# Transform activations into final bounding boxes by calculating the predicted offsets to the anchor boxes
def activ_encode(p_boxes:Tensor, anchors:Tensor):
  ''' Transforms activations into final bounding boxes by calculating predicted anchor offsets, which are then added to the anchor boxes
        Input:
            p_boxes - torcht.tensor of activation bounding boxes
                      dim:    (batch, items in batch, 4)
                      Format: top left xy, bottom right xy
        Output:
                      torch.tensor 
  '''
  sigma_ctr, sigma_hw = torch.sqrt(torch.tensor([0.1])), torch.sqrt(torch.tensor([0.2]))         # Variances
  pb = torch.tanh( p_boxes)                 # Set activations into the basis [-1,1] (as used in Fastai)  pb = torch.tanh(p_boxes[...,:] ) 
  to_ctrwh = tlbr2cthw(pb, tlbr=False)      # Transform activaions from xyxy format to ctrwh format. This will facilitate offset calculations below
  
  # Calculate anchors with offsets to serve as predicted bounded boxes
  offset_center = sigma_ctr * (to_ctrwh[:,:,[0,1]].to(device) - anchors[:,[0,1]].to(device)) / anchors[:,[2,3]].to(device)
  offset_size = torch.log(to_ctrwh[:,:,[2,3]].to(device)/anchors[:,[2,3]].to(device)) / sigma_hw
  centers = anchors[:,[0,1]].to(device) + offset_center.to(device)
  sizes =   anchors[:,[2,3]].to(device) + offset_size.to(device)

  return cthw2corners(torch.cat([centers, sizes], 2))

In [None]:
%nbdev_export
# Strip zero-valued rows from a bounding box tensor
def strip_zero_rows(bboxes:Tensor):
  ''' Strip zero-valued rows from a bounding box tensor 
      Input:  bboxes   Bounding boox tensor
      Output: b_out    Tensor with data rows
              z_out    Tensor with zero-filled rows '''
  b_out = []; z_out = []

  for rw in torch.arange(bboxes.shape[0]):
      cc = bboxes[rw,0:][~(bboxes[rw,0:] == 0.).all(1)]                               # Retain the non all-zero rows of the bounding box  
      if cc.nelement() != 0 : b_out.append(cc) 
      zz = bboxes[rw,0:][(bboxes[rw,0:] == 0.).all(1)]                                # Retain the all-zero rows of the bounding box  
      if cc.nelement() == 0 : z_out.append(zz)

  #return (b_out, z_out)
  return (torch.stack(b_out), torch.stack(z_out))

In [None]:
#hide
boxxes = torch.tensor([[[1,2,3,4],[1,2,3,4]],[[1,2,3,4],[1,2,3,4]],[[0,0,0,0],[0,0,0,0]],[[0,0,0,0],[0,0,0,0]]]);boxxes
print(boxxes)
print(boxxes.shape)
strip = strip_zero_rows(boxxes)
print(F'non-zero rows: {strip[0]}')
print(F'zero rows: {strip[1]}')

tensor([[[1, 2, 3, 4],
         [1, 2, 3, 4]],

        [[1, 2, 3, 4],
         [1, 2, 3, 4]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0]]])
torch.Size([4, 2, 4])
non-zero rows: tensor([[[1, 2, 3, 4],
         [1, 2, 3, 4]],

        [[1, 2, 3, 4],
         [1, 2, 3, 4]]])
zero rows: tensor([[[0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0]]])


In [None]:
%nbdev_export
# Graft the all-zero rows back to the bounding box array
def graft_zerorows_to_tensor(bboxes:Tensor, zboxes:Tensor):
  ''' Graft the all-zero rows back to the bounding box row(s) 
      Input   bboxes   Bounding boox tensor  
              zboxes   Tensor containing the zero-valued rows stripped by strip_zero_rows function
      Output  Tensor with data and zero-filled rows of shape[0] = shape bboxes[0} shape.zeroboxes[0]'''

  return torch.cat([bboxes, zboxes], dim=1)  

In [None]:
#hide
res = graft_zerorows_to_tensor(torch.tensor([[[1,2,3,4],[1,2,3,4]],[[1,2,3,4],[1,2,3,4]]]), torch.tensor([[[0,0,0,0],[0,0,0,0]],[[0,0,0,0],[0,0,0,0]]])); res

tensor([[[1, 2, 3, 4],
         [1, 2, 3, 4],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[1, 2, 3, 4],
         [1, 2, 3, 4],
         [0, 0, 0, 0],
         [0, 0, 0, 0]]])

In [None]:
res = graft_zerorows_to_tensor(strip[0], strip[1]);res

tensor([[[1, 2, 3, 4],
         [1, 2, 3, 4],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[1, 2, 3, 4],
         [1, 2, 3, 4],
         [0, 0, 0, 0],
         [0, 0, 0, 0]]])

In [None]:
%nbdev_export
# Flip a bounding box along the y axis
def flip_on_y_axis(bboxes:Tensor):                              
    ''' Flip a bounding box along the y axis 
        Input:   bboxes   Bounding box tensor '''
    return bboxes[...,[2,1,0,3]]*torch.tensor([-1.,1.,-1.,1.])

In [None]:
#hide
res = flip_on_y_axis(torch.tensor([0,0,1,1]));res

tensor([-1.,  0., -0.,  1.])

In [None]:
%nbdev_export
# Flip a bounding box along the x axis
def flip_on_x_axis(bboxes:Tensor):                              
    ''' Flip a bounding box along the x axis 
        Input:   bboxes   Bounding boox tensor '''
    return bboxes[...,[0,3,2,1]]*torch.tensor([1.,-1.,1.,-1.])

In [None]:
#hide
res = flip_on_x_axis(torch.tensor([0,0,1,1]));res

tensor([ 0., -1.,  1., -0.])

In [None]:
%nbdev_export
def rotate_90_plus(bb:Tensor):
  ''' Rotate bounding box(s) by 90 degrees clockwise
      Input:   bboxes   Bounding boox tensor '''
  return bb[...,[3,0,1,2]]*torch.tensor([-1.,1.,-1.,1.])

In [None]:
#hide
#Insensitive to tensor dimensions
rot = rotate_90_plus(torch.tensor([[[0,0,1,1]]])); rot

tensor([[[-1.,  0., -0.,  1.]]])

In [None]:
%nbdev_export
def rotate_90_minus(bb:Tensor):
  ''' Rotate bounding box(s) by 90 degrees counterclockwise
      Input:   bboxes   Bounding boox tensor in xyxy format'''
  return bb[...,[1,2,3,0]]*torch.tensor([1.,-1.,1.,-1.])

Function is insensitive to tensor dimensions

In [None]:
#hide
#Insensitive to tensor dimensions
rot = rotate_90_minus(rotate_90_plus(torch.tensor([[[0,0,1,1]]])));rot

tensor([[[0., 0., 1., 1.]]])

In [None]:
#hide
#Insensitive to tensor dimensions
rot = rotate_90_minus(torch.tensor([0,0,1,1])); rot

tensor([ 0., -1.,  1., -0.])

In [None]:
#hide
#Insensitive to tensor dimensions
rot = rotate_90_minus(torch.tensor([[[0,0,1,1]]])); rot

tensor([[[ 0., -1.,  1., -0.]]])

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_core.ipynb.
Converted 00_inference_core.ipynb.
Converted 01_anchor_boxes.ipynb.
Converted index.ipynb.
