In [3]:
import torch
from torch import nn
from torchvision.models import resnet50
from torchvision.models.detection import FasterRCNN
import torch.nn.functional as F
import sys
sys.path.append('core')

import argparse
import os
import cv2
import glob
import numpy as np
import torch
from PIL import Image

from raft import RAFT
from utils import flow_viz
from utils.utils import InputPadder

In [68]:
DEVICE = 'cuda'

In [69]:
resnet = resnet50(pretrained = True).to(DEVICE)



In [77]:
for name, _ in resnet.named_children():
    print(name)

conv1
bn1
relu
maxpool
layer1
layer2
layer3
layer4
avgpool
fc


In [70]:
class FeatureExtractor(nn.Module):
    def __init__(self, model : nn.Module) -> None:
        super(FeatureExtractor, self).__init__()
        self.feature = nn.Sequential(*list(model.children())[:-2])

    def forward(self, x):
        x = self.feature(x)
        return x

In [17]:
def flow_field (img1, img2, model, iters = 20):
    _, flow_up = model(img1, img2, iters = 20, test_mode = True)
    # (1, 2, H, W)
    return flow_up

In [None]:
def load_image(imfile):
    img = np.array(Image.open(imfile)).astype(np.uint8)
    img = torch.from_numpy(img).permute(2, 0, 1).float()
    return img[None].to(DEVICE)

In [75]:
def feature_warp(f_k : torch.Tensor, flow : torch.Tensor):
    n, c, h, w = f_k.shape
    kernel_size = 2
    f_i = torch.zeros_like(f_k)
    flo = F.interpolate(flow, size=(h,w), mode='bilinear', align_corners=False)

    for px in range(w):
        for py in range(h):
            dpx = flo[:, 0:1, py, px]
            dpy = flo[:, 1:, py, px]
            i, j = torch.floor(py + dpy), torch.floor(px + dpx)
            di, dj = py + dpy - i, px + dpx - j
            G = torch.concat([di * dj, di * (1 - dj), (1 - di) * dj, (1 - di) * (1 - dj)], dim=1).reshape(n, 1, kernel_size, kernel_size)
            # n, c, kernel, kernel
            G = G.repeat(1, c, 1, 1).to(DEVICE)
            grid = torch.zeros(n, kernel_size, kernel_size, 2).to(DEVICE)
            for gy in range(kernel_size):
                for gx in range(kernel_size):
                    grid[:, gy, gx, 0:1] = 2 * (j + gx) / (w - 1) - 1
                    grid[:, gy, gx, 1:] = 2 * (i + gy) / (h - 1) - 1
            # n, c, kernel, kernel
            patch = F.grid_sample(f_k, grid,  mode='bilinear', padding_mode='zeros', align_corners=True)
            f_i[:,:, py, px] = torch.sum(G * patch, dim=(2, 3))

    return f_i

In [76]:
f_k= torch.randn(64, 2048, 7, 7).to(DEVICE)
flow = torch.randn(64, 2, 224, 224).to(DEVICE)
f_i = feature_warp(f_k, flow)

In [60]:
f_i.shape

torch.Size([64, 2048, 7, 7])

In [None]:
model = torch.nn.DataParallel(RAFT(args))
model.load_state_dict(torch.load(args.model))

model = model.module
model.to(DEVICE)
model.eval()

In [82]:
w = torch.randn(10, 2048, 1, 1)
f = torch.randn(10, 2048, 7, 7)
a = f*w

In [80]:
a.shape

torch.Size([10, 2048, 7, 7])

In [90]:
from torchvision.ops import MultiScaleRoIAlign
from torchvision.models.detection.anchor_utils import AnchorGenerator

class wrap(nn.Module):
    def __init__(self, model : nn.Module) -> None:
        super().__init__()
        self.feature = nn.Sequential(*list(model.children())[:-2])
        self.out_channels = 2048

    def forward(self, x):
        x = self.feature(x)
        return {"0" : x}
        
def get_faster_rcnn_headed(n_classes=91, ec_type='vagan', out_ch=256):
        wrapped = wrap(resnet)
        return FasterRCNN(
        wrapped,
        num_classes=n_classes,  # COCO数据集有91类（包括背景）
        rpn_anchor_generator = AnchorGenerator(
                sizes=((32, 64, 128, 256, 512),),
                aspect_ratios=((0.5, 1.0, 2.0),) * 5
        ),
        box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0'],
                output_size=7,
                sampling_ratio=2
        )
)

In [91]:
net = get_faster_rcnn_headed()

In [93]:
for n, _ in net.named_children():
    print(n)

transform
backbone
rpn
roi_heads


In [4]:
class EmbeddingNetwork(nn.Module):

    def __init__(self, in_channels):
        super(EmbeddingNetwork, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 512, kernel_size=1, stride=1, padding=0)
        self.conv2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(512, 2048, kernel_size=1, stride=1, padding=0)
        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.global_avg_pool(x)
        x = x.squeeze(-1).squeeze(-1)

        return x

In [5]:
net = EmbeddingNetwork(2048)
fm = torch.randn(64,2048,7,7)
out = net(fm)

In [6]:
out.shape

torch.Size([64, 2048])

In [8]:
x1 = torch.randn(1,4,4)
x2 = torch.randn(1,8,8)
torch.cat((x1,x2), 0)

RuntimeError: Sizes of tensors must match except in dimension 0. Expected size 4 but got size 8 for tensor number 1 in the list.

In [None]:
n, c, h, w