In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as f
# import torch.utils.data as data
import torch.optim as optim
from torch.autograd import Variable
import torchvision
import numpy as np

In [2]:
from imageio import imread, imsave
import cv2
import os

in_dir='../dataset/color/'
img_paths = [x.path for x in os.scandir(in_dir) if x.name.endswith('.jpg') or x.name.endswith('.png')]
len(img_paths)

5578

In [3]:
photo = imread(img_paths[0])

In [4]:
import matplotlib.pyplot as plt
import PIL 

# plt.figure("dog")
# plt.imshow(photo)
# plt.show()

## 生成一个图片载入程序

In [5]:
imsize = 256
loader = torchvision.transforms.Compose([
#     transforms.Scale(imsize), 
    torchvision.transforms.ToTensor()
])

def image_loader(image_name):
    """load image, returns cuda tensor"""
    image = PIL.Image.open(image_name)
    image = loader(image).float()
    image = Variable(image, requires_grad=True)
    image = image.unsqueeze(0)  #this is for VGG, may not be needed for ResNet
    return image #assumes that you're using GPU

image = image_loader(img_paths[0])


In [6]:
image.shape

torch.Size([1, 3, 968, 1296])

## 定义网络

In [8]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, use_bias=True,downsample=None):
        super(BasicBlock, self).__init__()
        self.bn0=nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=5, stride=stride,padding=2, bias=use_bias)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=5, stride=stride,padding=2, bias=use_bias)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.bn0(x)
        out = self.conv1(out)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

In [19]:
class DetectorModel(torch.nn.Module):
    def __init__(self, num_block=3, num_channels=16,conv_ksize=5,
                 use_bias=True, min_scale=2**-3, max_scale=1, num_scales=9):

        self.inplanes = num_channels
        self.num_blocks=num_block
        self.min_scale = min_scale
        self.max_scale=max_scale
        self.num_scales=num_scales

        super(DetectorModel, self).__init__()
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=conv_ksize, stride=1, padding=2,
                               bias=use_bias)
        self.bn1 = nn.BatchNorm2d(self.inplanes)
        self.layer=BasicBlock(self.inplanes, self.inplanes, stride=1, use_bias=True)
        self.soft_conv=nn.Conv2d(16, 1, kernel_size=conv_ksize, stride=1, padding=2,
                               bias=use_bias)
        self.ori_layer=nn.Conv2d(self.inplanes,2,kernel_size=conv_ksize, stride=1, padding=2,
                                bias=True )
        
    def forward(self, x):
        num_conv = 0

        x=self.conv1(x)
        num_conv+=1
        for i in range(self.num_blocks):
            x=self.layer(x)
            print(1)
        x=self.bn1(x)
        if self.num_scales == 1:
            scale_factors = [1.0]
        else:
            scale_log_factors = np.linspace(np.log(self.max_scale), np.log(self.min_scale), self.num_scales)
            scale_factors = np.exp(scale_log_factors)
        score_maps_list = []

        base_height_f = x.shape[2]
        base_width_f = x.shape[3]

        for i, s in enumerate(scale_factors):
            feat_height = (base_height_f * s + 0.5).astype(np.uint32)
            feat_width = (base_width_f * s + 0.5).astype(np.uint32)
            rs_feat_maps=torch.nn.functional.interpolate(x,[feat_height, feat_width])
            score_maps = self.soft_conv(rs_feat_maps)
            score_maps_list.append(score_maps)

#         ori_b_init=torch.nn.init.constant(np.array([1,0], dtype=np.float32))
#         self.ori_layer.bias.data.fill_(ori_b_init)
        ori_maps=self.ori_layer(x)
        norm = ori_maps.norm(p=2, dim=1, keepdim=True)
        ori_maps = ori_maps.div(norm.expand_as(ori_maps))
    
        endpoints={}
        endpoints['ori_maps'] = ori_maps
        endpoints['scale_factors'] = scale_factors
        return score_maps_list,endpoints

In [23]:
model = DetectorModel()

In [24]:
optimizer = optim.SGD(model.parameters(),lr=0.001)

optimizer.zero_grad()
score_maps_list,endpoints = model(image)

1
1
1


In [12]:
score_maps_list[0].shape

torch.Size([1, 1, 968, 1296])

In [13]:
endpoints['ori_maps'].shape

torch.Size([1, 2, 968, 1296])

In [14]:
del(endpoints)

In [15]:
del(score_maps_list)

In [18]:
endpoints['ori_maps']

tensor([[[[ 0.1830, -0.7260, -0.7894,  ...,  0.9956,  0.9804,  0.9982],
          [-0.7578, -0.9286, -0.9655,  ...,  0.2057,  0.6935,  0.5808],
          [-1.0000, -0.9009, -0.9793,  ...,  0.2473,  0.0525,  0.9998],
          ...,
          [-0.2711, -0.9286, -0.9931,  ...,  0.4832, -0.7531,  0.9981],
          [ 0.0145, -0.5627, -0.9723,  ...,  0.9683,  0.0689,  0.2631],
          [ 0.2282, -0.2648, -0.9616,  ..., -0.0847,  0.3065,  0.6306]],

         [[ 0.9831, -0.6877, -0.6139,  ..., -0.0937, -0.1970,  0.0595],
          [-0.6525, -0.3710, -0.2602,  ...,  0.9786,  0.7205,  0.8140],
          [-0.0079, -0.4340, -0.2026,  ...,  0.9689,  0.9986,  0.0220],
          ...,
          [-0.9626, -0.3711, -0.1175,  ...,  0.8755,  0.6579, -0.0617],
          [-0.9999, -0.8266, -0.2336,  ..., -0.2499, -0.9976, -0.9648],
          [-0.9736, -0.9643, -0.2745,  ..., -0.9964, -0.9519, -0.7761]]]],
       grad_fn=<DivBackward0>)

In [31]:
endpoints['ori_maps']

tensor([[[[ 9.5132e-02, -2.0404e-01,  1.6202e-01,  ...,  7.7709e-04,
            1.4454e-02, -6.6126e-03],
          [-3.2322e-01,  1.6059e-01,  1.4056e+00,  ...,  2.4473e-01,
            2.1986e-01,  3.3805e-01],
          [-9.7903e-01, -4.6141e-01,  3.2169e-01,  ..., -2.4291e-01,
           -7.3376e-01,  1.4821e-01],
          ...,
          [-7.8730e-01,  4.6630e-01,  2.9443e-01,  ..., -2.1323e-01,
           -9.8827e-01, -1.2142e-01],
          [-3.2610e-01,  2.3963e-01,  3.6654e-01,  ...,  6.8808e-01,
            4.2732e-02,  5.7086e-01],
          [ 3.2584e-01, -1.6511e-01,  4.7603e-01,  ..., -3.4453e-01,
            2.9609e-02, -4.1296e-01]],

         [[-1.0500e-01,  1.6093e-01,  6.0468e-01,  ...,  8.2428e-01,
            2.4642e-01,  1.1696e-01],
          [-1.4341e-01,  3.7865e-01,  5.7144e-01,  ...,  5.0299e-01,
           -7.0438e-02,  1.3670e-02],
          [ 9.8710e-02,  6.4920e-02,  1.9095e-01,  ...,  8.2882e-01,
           -1.3654e-01, -4.4763e-01],
          ...,
     

In [32]:
norm = endpoints['ori_maps'].norm(p=2, dim=1, keepdim=True)
d = endpoints['ori_maps'].div(norm.expand_as(endpoints['ori_maps']))

In [33]:
d

tensor([[[[ 6.7142e-01, -7.8518e-01,  2.5882e-01,  ...,  9.4275e-04,
            5.8555e-02, -5.6449e-02],
          [-9.1407e-01,  3.9044e-01,  9.2637e-01,  ...,  4.3752e-01,
            9.5232e-01,  9.9918e-01],
          [-9.9496e-01, -9.9025e-01,  8.5992e-01,  ..., -2.8125e-01,
           -9.8312e-01,  3.1432e-01],
          ...,
          [-9.2010e-01,  9.7184e-01,  4.9200e-01,  ..., -8.8956e-01,
           -9.5701e-01, -3.4740e-01],
          [-8.6403e-01,  6.2791e-01,  6.6816e-01,  ...,  7.2830e-01,
            1.4284e-01,  9.5902e-01],
          [ 7.7143e-01, -9.9247e-01,  9.5835e-01,  ..., -5.6780e-01,
            7.6025e-01, -6.6839e-01]],

         [[-7.4107e-01,  6.1927e-01,  9.6592e-01,  ...,  1.0000e+00,
            9.9828e-01,  9.9841e-01],
          [-4.0556e-01,  9.2063e-01,  3.7660e-01,  ...,  8.9921e-01,
           -3.0510e-01,  4.0406e-02],
          [ 1.0032e-01,  1.3933e-01,  5.1042e-01,  ...,  9.5964e-01,
           -1.8294e-01, -9.4932e-01],
          ...,
     

# 可以看出来网络输入输出都是一样的尺寸

In [13]:
x=output
base_height_f = x.shape[2]
base_width_f = x.shape[3]

In [40]:
min_scale=1
max_scale=2**-3
num_scales=5

scale_log_factors = np.linspace(np.log(max_scale), np.log(min_scale), num_scales)
scale_factors = np.exp(scale_log_factors)

for i, s in enumerate(scale_factors):
    
    # scale are defined by extracted patch size (s of s*default_patch_size) so we need use inv-scale for resizing images
#     inv_s=1.0/s
    feat_height = (base_height_f * s + 0.5).astype(np.uint32)
    feat_width = (base_width_f * s + 0.5).astype(np.uint32)
    rs_feat_maps=torch.nn.functional.interpolate(x,[feat_height, feat_width])

In [42]:
rs_feat_maps.shape

torch.Size([1, 16, 968, 1296])

In [37]:
inv_s=1.0/scale_factors[0]

In [38]:
inv_s

7.999999999999998

In [31]:
feat_height

7744

In [21]:
feat_width

10368

In [35]:
scale_factors

array([0.125     , 0.2102241 , 0.35355339, 0.59460356, 1.        ])

In [69]:
import torch
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,3, 3)
        nn.init.uniform_(self.conv1.weight,a=0,b=0)
        nn.init.uniform_(self.conv1.bias,a=0,b=0)
        nn.init.constant_(self.conv1.bias,val=4)
#         K = torch.Tensor([[1 ,0, -1],[2, 0 ,-2], [1, 0 ,-1]])
# #        I think I should make the shape/size like this?
#         K = torch.unsqueeze(torch.unsqueeze(K,0),0)
#         #with torch.no_grad():
#         with torch.no_grad():
#             self.conv1.weight = torch.nn.Parameter(K)

    def forward(self, x):
        x = self.conv1(x)
        return x

net = Net()
net.conv1.bias.data

tensor([4., 4., 4.])

In [72]:
w = torch.empty(1, 5)
nn.init.constant_(w,val=torch.tensor([1,2,3,4,5]))
w

RuntimeError: _th_fill_ only supports a 0-dimensional value tensor, but got tensor with 1 dimension(s).

In [64]:
torch.tensor([1,0,3], dtype=torch.float32).shape

torch.Size([3])

In [4]:
input_data = torch.tensor([[1.0,2,3],[4.0,5,6],[7.0,8,9]])

In [5]:
output = l2normalize(input_data)

In [6]:
input_data

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [7]:
output

tensor([[0.0592, 0.1185, 0.1777],
        [0.2369, 0.2962, 0.3554],
        [0.4146, 0.4739, 0.5331]])

In [8]:
x = Variable(input_data, requires_grad=True)
norm = x.norm(p=2, dim=1, keepdim=True)
x_normalized = x.div(norm.expand_as(x))

In [9]:
x_normalized

tensor([[0.2673, 0.5345, 0.8018],
        [0.4558, 0.5698, 0.6838],
        [0.5026, 0.5744, 0.6462]], grad_fn=<DivBackward0>)

In [12]:
class L2Norm(nn.Module):
    def __init__(self,n_channels, scale):
        super(L2Norm,self).__init__()
        self.n_channels = n_channels
        self.gamma = scale or None
        self.eps = 1e-10
        self.weight = nn.Parameter(torch.Tensor(self.n_channels))

    def reset_parameters(self):
        init.constant_(self.weight,self.gamma)

    def forward(self, x):
        norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
        #x /= norm
        x = torch.div(x,norm)
        out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
        return out

In [22]:
l2=L2Norm(16,None)

In [23]:
l2(x)

tensor([[[[-3.9875e+22, -2.2435e+23, -4.2435e+23,  ..., -4.8818e+23,
           -3.2238e+23, -4.5585e+23],
          [-4.5239e+23, -1.5145e+23, -5.9890e+22,  ..., -2.2507e+23,
           -7.5980e+22, -2.5818e+23],
          [-8.5684e+22, -2.5993e+23, -2.4635e+23,  ..., -4.4587e+23,
           -1.9367e+23, -2.9402e+23],
          ...,
          [-3.8787e+23, -1.3569e+23, -8.5363e+21,  ..., -5.4141e+23,
           -2.6103e+23, -3.7902e+23],
          [-3.8192e+23, -1.2533e+23, -2.5346e+23,  ..., -2.2754e+23,
           -6.0112e+22, -1.1252e+23],
          [-4.2629e+23, -4.0428e+23, -3.3172e+23,  ..., -3.5366e+23,
           -3.9935e+23, -3.7598e+23]],

         [[ 6.3311e-42,  1.3472e-41,  6.5511e-42,  ...,  1.9802e-41,
            7.0205e-43,  9.1295e-42],
          [ 5.9906e-42,  1.1331e-41,  2.7157e-42,  ...,  1.5414e-44,
            2.1193e-41,  1.4981e-41],
          [ 1.9286e-41,  1.5190e-42,  1.5468e-41,  ...,  6.5581e-43,
            1.2217e-41,  1.1835e-41],
          ...,
     

In [21]:
x=torch.rand([1, 16, 968, 1296])

In [24]:
x

tensor([[[[8.4584e-02, 4.4076e-01, 8.6257e-01,  ..., 9.2973e-01,
           7.0214e-01, 9.3593e-01],
          [9.4628e-01, 3.0881e-01, 1.0506e-01,  ..., 5.0020e-01,
           1.3674e-01, 6.7434e-01],
          [1.7106e-01, 5.6026e-01, 5.9777e-01,  ..., 9.2645e-01,
           3.8451e-01, 7.7736e-01],
          ...,
          [7.0813e-01, 2.3430e-01, 1.6523e-02,  ..., 8.7827e-01,
           4.4968e-01, 8.9249e-01],
          [9.8822e-01, 2.7966e-01, 4.9337e-01,  ..., 5.1153e-01,
           1.2078e-01, 2.6221e-01],
          [9.3376e-01, 9.5439e-01, 7.9173e-01,  ..., 8.6261e-01,
           9.0274e-01, 9.5543e-01]],

         [[3.1928e-01, 6.2933e-01, 3.1664e-01,  ..., 8.9669e-01,
           3.6332e-02, 4.4568e-01],
          [2.9792e-01, 5.4931e-01, 1.1329e-01,  ..., 7.9715e-04,
           9.0684e-01, 9.3040e-01],
          [9.1546e-01, 7.7832e-02, 8.9237e-01,  ..., 3.2413e-02,
           5.7670e-01, 7.4403e-01],
          ...,
          [4.5385e-01, 8.1488e-01, 3.4518e-01,  ..., 6.2675

In [25]:
x1 = Variable(x, requires_grad=True)
norm = x1.norm(p=2, dim=1, keepdim=True)
x_normalized1 = x1.div(norm.expand_as(x1))

In [26]:
x_normalized1

tensor([[[[3.6655e-02, 2.0623e-01, 3.9009e-01,  ..., 4.4876e-01,
           2.9635e-01, 4.1904e-01],
          [4.1586e-01, 1.3922e-01, 5.5054e-02,  ..., 2.0689e-01,
           6.9844e-02, 2.3733e-01],
          [7.8766e-02, 2.3894e-01, 2.2646e-01,  ..., 4.0987e-01,
           1.7804e-01, 2.7028e-01],
          ...,
          [3.5655e-01, 1.2473e-01, 7.8470e-03,  ..., 4.9769e-01,
           2.3995e-01, 3.4842e-01],
          [3.5108e-01, 1.1521e-01, 2.3299e-01,  ..., 2.0916e-01,
           5.5258e-02, 1.0343e-01],
          [3.9187e-01, 3.7164e-01, 3.0493e-01,  ..., 3.2510e-01,
           3.6710e-01, 3.4562e-01]],

         [[1.3836e-01, 2.9447e-01, 1.4320e-01,  ..., 4.3281e-01,
           1.5335e-02, 1.9954e-01],
          [1.3093e-01, 2.4764e-01, 5.9369e-02,  ..., 3.2972e-04,
           4.6321e-01, 3.2745e-01],
          [4.2154e-01, 3.3194e-02, 3.3806e-01,  ..., 1.4340e-02,
           2.6702e-01, 2.5869e-01],
          ...,
          [2.2852e-01, 4.3382e-01, 1.6393e-01,  ..., 3.5516