In [1]:
# Show all Jupyter output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import torch
from torch import nn
from torchvision import models

import sys
sys.path.append('..')

In [27]:
from model.roi_layers import ROIAlign

roi_align = ROIAlign((2, 2), 1, 1)


In [3]:
x = torch.tensor([
    [.1, .2, .3],
    [.4, .5, .6],
    [.7, .8, .9]
])
x = x.view(1, 3, 3)
x.shape

x = x.repeat(1, 3, 1, 1)
x.shape
x

torch.Size([1, 3, 3])

torch.Size([1, 3, 3, 3])

tensor([[[[0.1000, 0.2000, 0.3000],
          [0.4000, 0.5000, 0.6000],
          [0.7000, 0.8000, 0.9000]],

         [[0.1000, 0.2000, 0.3000],
          [0.4000, 0.5000, 0.6000],
          [0.7000, 0.8000, 0.9000]],

         [[0.1000, 0.2000, 0.3000],
          [0.4000, 0.5000, 0.6000],
          [0.7000, 0.8000, 0.9000]]]])

In [4]:
x[0,1,:,:] *= 3

In [5]:
x[0,2,:,:] *= 2

In [6]:
x

tensor([[[[0.1000, 0.2000, 0.3000],
          [0.4000, 0.5000, 0.6000],
          [0.7000, 0.8000, 0.9000]],

         [[0.3000, 0.6000, 0.9000],
          [1.2000, 1.5000, 1.8000],
          [2.1000, 2.4000, 2.7000]],

         [[0.2000, 0.4000, 0.6000],
          [0.8000, 1.0000, 1.2000],
          [1.4000, 1.6000, 1.8000]]]])

In [7]:
def make_roi(inp):
    roi = torch.tensor(inp)
    roi = roi.view(1, -1, 5).float()
    return roi

In [None]:
roi = make_roi([[0, 0, 0, 1, 1]])
roi
roi.shape

In [None]:
out = roi_align(x, roi)
out
out.shape

In [8]:
roi = make_roi([
    [0, 0, 0, 1, 1],
    [1, 0, 0, 1, 1],
    [2, 0, 0, 1, 1],
    
])
out = roi_align(x, roi)
out
out.shape

tensor([[[[0.2000, 0.2500],
          [0.3500, 0.4000]],

         [[0.6000, 0.7500],
          [1.0500, 1.2000]],

         [[0.4000, 0.5000],
          [0.7000, 0.8000]]]])

torch.Size([1, 3, 2, 2])

In [9]:
roi = make_roi([
    [0, 0, 0, 1, 1],
    [0, 0, 0, 1, 1],
    [2, 0, 0, 1, 1],
    
])
out = roi_align(x, roi)
out
out.shape

tensor([[[[0.2000, 0.2500],
          [0.3500, 0.4000]],

         [[0.6000, 0.7500],
          [1.0500, 1.2000]],

         [[0.4000, 0.5000],
          [0.7000, 0.8000]]]])

torch.Size([1, 3, 2, 2])

## What happens if multiple rows for the same img id?

In [10]:
roi = make_roi([
    [0, 0, 0, 1, 1],
    [0, 1, 1, 2, 2],
    [0, 0, 0, 2, 2],
    [2, 0, 0, 1, 1],
    
])

In [11]:
out = roi_align(x, roi)
out
out.shape

tensor([[[[0.2000, 0.2500],
          [0.3500, 0.4000]],

         [[0.6000, 0.7500],
          [1.0500, 1.2000]],

         [[0.4000, 0.5000],
          [0.7000, 0.8000]]]])

torch.Size([1, 3, 2, 2])

In [12]:
roi = make_roi([
#     [0, 0, 0, 1, 1],
    [0, 1, 1, 2, 2],
    [0, 0, 0, 2, 2],
    [2, 0, 0, 1, 1],
    
])
out = roi_align(x, roi)
out
out.shape

tensor([[[[0.6000, 0.6500],
          [0.7500, 0.8000]],

         [[1.8000, 1.9500],
          [2.2500, 2.4000]],

         [[1.2000, 1.3000],
          [1.5000, 1.6000]]]])

torch.Size([1, 3, 2, 2])

Seems like it just uses the first found and ignores the others

In [13]:
roi = make_roi([
#     [0, 0, 0, 1, 1],
#     [0, 1, 1, 2, 2],
    [0, 0, 0, 2, 2],
    [2, 0, 0, 1, 1],
    
])
out = roi_align(x, roi)
out
out.shape

tensor([[[[0.3000, 0.4000],
          [0.6000, 0.7000]],

         [[0.9000, 1.2000],
          [1.8000, 2.1000]],

         [[0.6000, 0.8000],
          [1.2000, 1.4000]]]])

torch.Size([1, 3, 2, 2])

## What happens if multichannel image?

In [None]:
x = torch.tensor([
    [.1, .2, .3],
    [.4, .5, .6],
    [.7, .8, .9]
])
x = x.view(1, 3, 3)
x.shape

x = x.expand(2, 1, 3, 3)
x.shape
x

In [None]:
x

In [None]:
roi = make_roi([
        [0, 0, 0, 2, 2],
#         [1, 0, 0, 1, 1]
    ])
roi
roi_align(x, roi)

# Try with pool

In [None]:
from model.roi_layers import ROIPool

roi_pool = ROIPool((2, 2), 1)

In [None]:
x
x.shape

In [None]:
roi
roi.shape

In [None]:
roi_pool.cuda()

In [None]:
out = roi_pool(x.cuda(), roi.cuda())
out
out.shape

Conclusion: gives the same output as ROIAlign. What I'm trying to figure out is how to get individual poolings

In [29]:
rois_value = [
    [0, 0, 0, 1, 3],
    [0, 2, 2, 3, 3],
    [0, 1, 0, 3, 2]
]
roi = torch.tensor(rois_value).float()
roi
roi.shape

tensor([[0., 0., 0., 1., 3.],
        [0., 2., 2., 3., 3.],
        [0., 1., 0., 3., 2.]])

torch.Size([3, 5])

In [30]:
input_value = [[
    [[1], [2], [4], [4]],
    [[3], [4], [1], [2]],
    [[6], [2], [1], [7]],
    [[1], [3], [2], [8]]
]]
inp = torch.tensor(input_value)
inp = inp.view(1,1,4,4).float()
inp
inp.shape



tensor([[[[1., 2., 4., 4.],
          [3., 4., 1., 2.],
          [6., 2., 1., 7.],
          [1., 3., 2., 8.]]]])

torch.Size([1, 1, 4, 4])

In [33]:
out = roi_align(inp, roi)
out
out.shape

tensor([[[[2.7500, 3.2500],
          [4.1250, 2.8750]]],


        [[[2.7500, 5.7500],
          [3.2500, 6.2500]]],


        [[[2.7500, 2.7500],
          [2.0000, 2.7500]]]])

torch.Size([3, 1, 2, 2])

# FUck me, I figured it out. My ROI Tensor was the wrong shape. Previously teh tensor was [batch,rows,coords], but it should JUST BE [rows, coords]. fuck mee

## How about misaligned Batch image id? How to fix?

In [None]:
rois[:, 0] = rois[:,0] - rois[0,0]   # KEY COMPONENT: RESET THE IMAGE IDS PER BATCH SIZE
rois

In [None]:
out = roi_align(x[0], rois=rois)
out.shape
out