In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as f
# import torch.utils.data as data
import torch.optim as optim
from torch.autograd import Variable
import torchvision
import numpy as np

In [14]:
from imageio import imread, imsave
import cv2
import os

in_dir='../dataset/color/'
img_paths = [x.path for x in os.scandir(in_dir) if x.name.endswith('.jpg') or x.name.endswith('.png')]
len(img_paths)

5578

In [15]:
photo = imread(img_paths[0])

In [16]:
import matplotlib.pyplot as plt
import PIL 

# plt.figure("dog")
# plt.imshow(photo)
# plt.show()

## 生成一个图片载入程序

In [17]:
imsize = 256
loader = torchvision.transforms.Compose([
#     transforms.Scale(imsize), 
    torchvision.transforms.ToTensor()
])

def image_loader(image_name):
    """load image, returns cuda tensor"""
    image = PIL.Image.open(image_name)
    image = loader(image).float()
    image = Variable(image, requires_grad=True)
    image = image.unsqueeze(0)  #this is for VGG, may not be needed for ResNet
    return image #assumes that you're using GPU

image = image_loader(img_paths[0])


In [18]:
image.shape

torch.Size([1, 3, 968, 1296])

## 定义Detector网络

In [19]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, use_bias=True,downsample=None):
        super(BasicBlock, self).__init__()
        self.bn0=nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=5, stride=stride,padding=2, bias=use_bias)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=5, stride=stride,padding=2, bias=use_bias)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.bn0(x)
        out = self.conv1(out)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

In [29]:
class DetectorModel(torch.nn.Module):
    def __init__(self, num_block=3, num_channels=16,conv_ksize=5,
                 use_bias=True, min_scale=2**-3, max_scale=1, num_scales=9):

        self.inplanes = num_channels
        self.num_blocks=num_block
        self.min_scale = min_scale
        self.max_scale=max_scale
        self.num_scales=num_scales

        super(DetectorModel, self).__init__()
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=conv_ksize, stride=1, padding=2,
                               bias=use_bias)
        self.bn1 = nn.BatchNorm2d(self.inplanes)
        self.layer=BasicBlock(self.inplanes, self.inplanes, stride=1, use_bias=True)
        self.soft_conv=nn.Conv2d(16, 1, kernel_size=conv_ksize, stride=1, padding=2,
                               bias=use_bias)
        self.ori_layer=nn.Conv2d(self.inplanes,2,kernel_size=conv_ksize, stride=1, padding=2,
                                bias=True )
#         ori_b_init=torch.nn.init.constant(np.array([1,0], dtype=np.float32))
#         self.ori_layer.bias.data.fill_(ori_b_init)
        if self.num_scales == 1:
            self.scale_factors = [1.0]
        else:
            scale_log_factors = np.linspace(np.log(self.max_scale), np.log(self.min_scale), self.num_scales)
            self.scale_factors = np.exp(scale_log_factors)
        
    def forward(self, x):
        x=self.conv1(x)
        for i in range(self.num_blocks):
            x=self.layer(x)
            print(1)
        x=self.bn1(x)
        score_maps_list = []
        base_height_f = x.shape[2]
        base_width_f = x.shape[3]
        for i, s in enumerate(self.scale_factors):
            feat_height = (base_height_f * s + 0.5).astype(np.uint32)
            feat_width = (base_width_f * s + 0.5).astype(np.uint32)
            rs_feat_maps=torch.nn.functional.interpolate(x,[feat_height, feat_width])
            score_maps = self.soft_conv(rs_feat_maps)
            score_maps_list.append(score_maps)
#         ori_b_init=torch.nn.init.constant(np.array([1,0], dtype=np.float32))
#         self.ori_layer.bias.data.fill_(ori_b_init)
        ori_maps=self.ori_layer(x)
        norm = ori_maps.norm(p=2, dim=1, keepdim=True)
        ori_maps = ori_maps.div(norm.expand_as(ori_maps))
    
        endpoints={}
        endpoints['ori_maps'] = ori_maps
        endpoints['scale_factors'] = self.scale_factors
        return score_maps_list,endpoints

In [30]:
model = DetectorModel()

In [34]:
optimizer = optim.SGD(model.parameters(),lr=0.001)

optimizer.zero_grad()
score_maps_list,endpoints = model(image)

1
1
1


In [11]:
score_maps_list[0].shape

torch.Size([1, 1, 968, 1296])

In [12]:
endpoints['ori_maps'].shape

torch.Size([1, 2, 968, 1296])

### 可以看出来网络输入输出都是一样的尺寸,这样子根据网络输出可以直接得到想要的特征值位置，尺度，方向．

In [13]:
endpoints['ori_maps']

tensor([[[[-0.0640, -0.2276,  0.1485,  ..., -0.9811,  0.3224, -0.8869],
          [ 0.3082, -0.0917,  0.0193,  ..., -0.3908,  0.3010, -0.7215],
          [ 0.4699,  0.6957,  0.9656,  ...,  0.5958, -0.1470, -0.1650],
          ...,
          [ 0.5943,  0.7603,  0.9874,  ..., -0.8233, -0.3601, -0.4108],
          [ 0.9684,  0.3985,  0.5128,  ...,  0.7687,  0.4237,  0.6660],
          [-0.9906, -0.8950,  0.3963,  ...,  0.4654, -0.5938,  0.9230]],

         [[ 0.9979,  0.9738,  0.9889,  ...,  0.1937, -0.9466, -0.4620],
          [ 0.9513,  0.9958,  0.9998,  ...,  0.9205, -0.9536, -0.6924],
          [ 0.8827,  0.7183,  0.2600,  ..., -0.8031,  0.9891,  0.9863],
          ...,
          [ 0.8042,  0.6496, -0.1583,  ..., -0.5676, -0.9329, -0.9117],
          [ 0.2494,  0.9172,  0.8585,  ..., -0.6396, -0.9058, -0.7459],
          [ 0.1371, -0.4461,  0.9181,  ..., -0.8851, -0.8046,  0.3848]]]],
       grad_fn=<DivBackward0>)

In [19]:
endpoints['ori_maps'][0].shape

torch.Size([2, 968, 1296])

In [27]:
endpoints['ori_maps'][0][0][123][0]

tensor(0.3307, grad_fn=<SelectBackward>)

In [28]:
endpoints['ori_maps'][0][1][123][0]

tensor(0.9437, grad_fn=<SelectBackward>)

In [29]:
0.3307**2+0.9437**2

0.99993218

### L2的目的就是让ori map输出的值是一个真实的角度值

## 定义Descriptor网络

In [None]:
class Descriptor(nn.Module):

    def __init__(self,
            out_dim=128,init_num_channels=64,
            num_conv_layers=3,use_bias=False,
            conv_ksize=3):
        super(Descriptor, self).__init__()
        in_channel=2
        channels_list = [init_num_channels * 2 ** i for i in range(num_conv_layers)]

        self.conv1 = nn.Conv2d(in_channel, channels_list[0], kernel_size=conv_ksize, stride=2,padding=1, bias=use_bias)
        self.bn1 = nn.BatchNorm2d(channels_list[0])
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(channels_list[0], channels_list[1], kernel_size=conv_ksize, stride=2,padding=1, bias=use_bias)
        self.bn2 = nn.BatchNorm2d(channels_list[1])
        self.relu = nn.ReLU(inplace=True)
        self.conv3 = nn.Conv2d(channels_list[1], channels_list[2], kernel_size=conv_ksize, stride=2, padding=1, bias=use_bias)
        self.bn2 = nn.BatchNorm2d(channels_list[2])
        self.relu = nn.ReLU(inplace=True)

        self.fc1 = nn.Linear(512 * 4, 512)
        self.fc2 = nn.Linear(512, out_dim)

        # ori_maps = f.normalize(ori_maps, dim=-1)

    def forward(self, x):
        residual = x

        out = self.bn0(x)
        out = self.conv1(out)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

In [37]:
import gc
gc.collect()

35