In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from pycocotools.coco import COCO
import numpy as np
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)
from scipy.misc import imread
import matplotlib.patches as patches
from torch.utils.data import DataLoader
import cv2
import torch
from torch.autograd import  Variable
from faster_rcnn.utils.images import imshow
from faster_rcnn.utils.cython_bbox import bbox_overlaps

from faster_rcnn.fastrcnn.bbox_transform import bbox_transform, bbox_transform_inv
import cPickle
from faster_rcnn.rpn_msr.proposal_target_layer_2 import  ProposalTargetLayer
from torch.optim import SGD, RMSprop, Adam
from torch.optim.lr_scheduler import StepLR
from faster_rcnn.roi_pooling.modules.roi_pool import RoIPool
import gc
torch.backends.cudnn.enabled = False

In [3]:
with open('sorted_index', 'rb') as fp:
    sorted_index = cPickle.load(fp)

### Đọc dữ liệu từ MS COCO dataset


In [4]:
import os
import torchvision.transforms as transforms
from faster_rcnn.utils.datasets.mscoco.dataset import CocoData
from faster_rcnn.utils.datasets.data_generator import CocoGenerator
from faster_rcnn.utils.datasets.data_generator import Enqueuer

dataDir = './data/mscoco'
dataType = 'train2014'
annFile='%s/annotations/instances_%s.json'%(dataDir,dataType)
pre_proposal_folder = './data/mscoco/coco_proposals/MCG/'
batch_size = 2

images_dir = os.path.join(dataDir,'images', dataType)
cap = CocoData(root = images_dir, annFile = annFile)

data_gen = CocoGenerator(data=cap, sorted_index=sorted_index, batch_size=batch_size)
queue = Enqueuer(generator=data_gen)
queue.start(max_queue_size=10, workers=2)
t = queue.get()


loading annotations into memory...
Done (t=9.97s)
creating index...
index created!


Thử hiển thị ảnh cùng các bounding boxes

In [5]:
from faster_rcnn.faster_rcnn_2 import RPN, FastRCNN


### Tính toán feed-forward


Chúng ta sử dụng một ảnh có kích thước đầu vào là  `(width , height) = (600, 800)`

Input:
    - im_data : 
        kích thước : (batch_size, dim, witdh, height)
    - ground_boxes: 
        kích thước (n, 4)
        

In [6]:
categories = ['__background__'] + [x['name'] for x in cap.coco.loadCats(cap.coco.getCatIds())]

In [7]:
proposal_target_layer = ProposalTargetLayer(len(categories))


In [8]:
net = FastRCNN(categories, debug=False)
net.cuda()
net.train()

  nn.init.xavier_normal(self.conv.weight)
  nn.init.xavier_normal(self.fc.weight)


FastRCNN(
  (rpn): RPN(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace)
      (16): MaxPool2d(kernel_size=2, stride=2, p

In [9]:
# train(None, optimizer, exp_lr_scheduler, net.rpn, 1, 300)
# torch.save(net.rpn.state_dict(), 'rpn.pkl')

In [10]:
param = filter(lambda x: x.requires_grad, net.parameters())
optimizer = SGD(param, lr=1e-4, momentum=0.9, weight_decay=0.0005)
exp_lr_scheduler = StepLR(optimizer, step_size=1000, gamma=0.95)
save_interval = 100

In [11]:
def train(data_gen, optimizer, lr_scheduler ,model, epochs, steps_per_epoch):
    model.train()
    train_loss = 0
    cross_entropy = 0
    loss_box = 0
    rpn_loss = 0

    for step in range(1, steps_per_epoch +1):
        lr_scheduler.step()
        blobs = data_gen.next()
        
        max_height = np.max([blob['tensor'].shape[2] for blob in blobs])
        max_width = np.max([blob['tensor'].shape[3] for blob in blobs])
        batch_tensor = torch.Tensor(batch_size, 3, max_height, max_width).fill_(0.)
        total_boxes = 0
        batch_boxes = np.empty((0, 5))
        batch_boxes_index = np.empty((0,), dtype=np.int)
        im_info = np.array([[batch_tensor.shape[2], batch_tensor.shape[3]]])
        for i, blob in enumerate(blobs):
            total_boxes = blob['boxes'].shape[0]
            gt_classes =  blob['gt_classes']
            gt_boxes = np.hstack([blob['boxes'] , gt_classes[:, np.newaxis]])
            batch_boxes = np.vstack((batch_boxes, gt_boxes))
            a =  np.zeros((total_boxes , ), dtype=np.int)
            a.fill(i)
            batch_boxes_index = np.concatenate((batch_boxes_index, a), axis=0)

        im_info = np.array([[batch_tensor.shape[2], batch_tensor.shape[3]]])
        try:
            cls_prob, bbox_pred, rois = model(batch_tensor, im_info, batch_boxes, batch_boxes_index)
        except Exception as e:
            print batch_boxes_index
            continue


        cls_data = cls_prob.data.cpu().numpy()
        max_class_idx = np.argmax(cls_data, axis=1)
        loss = model.loss
        cross_entropy += model.cross_entropy.item()
        loss_box += model.loss_box.item()
        train_loss += loss.item()
        rpn_loss += model.rpn.loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 10 == 0:
            log_text = 'step %d,  loss: %.4f' % (
                step, train_loss /(step))
            print(log_text)

            re_cnt = True


        if step % save_interval == 0:
            torch.save(model.state_dict(), 'faster_model_at_step_%s.pkl' % step)

In [12]:
import sys
for obj in gc.get_objects():
    if torch.is_tensor(obj) and torch.is_tensor(obj.data):
        print(type(obj), obj.size())

(<class 'torch.nn.parameter.Parameter'>, torch.Size([64, 3, 3, 3]))
(<class 'torch.nn.parameter.Parameter'>, torch.Size([64]))
(<class 'torch.nn.parameter.Parameter'>, torch.Size([64, 64, 3, 3]))
(<class 'torch.nn.parameter.Parameter'>, torch.Size([64]))
(<class 'torch.nn.parameter.Parameter'>, torch.Size([128, 64, 3, 3]))
(<class 'torch.nn.parameter.Parameter'>, torch.Size([128]))
(<class 'torch.Tensor'>, torch.Size([1, 3, 1802, 600]))
(<class 'torch.Tensor'>, torch.Size([1, 3, 1828, 600]))
(<class 'torch.Tensor'>, torch.Size([1, 3, 1829, 600]))
(<class 'torch.Tensor'>, torch.Size([1, 3, 1929, 600]))
(<class 'torch.Tensor'>, torch.Size([1, 3, 2000, 600]))
(<class 'torch.Tensor'>, torch.Size([1, 3, 1699, 600]))
(<class 'torch.Tensor'>, torch.Size([1, 3, 1754, 600]))
(<class 'torch.nn.parameter.Parameter'>, torch.Size([128, 128, 3, 3]))
(<class 'torch.nn.parameter.Parameter'>, torch.Size([128]))
(<class 'torch.nn.parameter.Parameter'>, torch.Size([256, 128, 3, 3]))
(<class 'torch.nn.par

In [None]:
train(t, optimizer=optimizer,lr_scheduler=exp_lr_scheduler, model=net, epochs=1, steps_per_epoch=2000)

  cls_prob = F.softmax(cls_score)
INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 10,  loss: 7.6980
[0]
step 20,  loss: 7.1992
step 30,  loss: 6.8135
step 40,  loss: 6.5731
step 50,  loss: 6.4120
step 60,  loss: 6.2265
step 70,  loss: 6.1076
step 80,  loss: 5.9470
step 90,  loss: 5.8382
step 100,  loss: 5.7147
step 110,  loss: 5.6187
step 120,  loss: 5.5275
step 130,  loss: 5.5067
step 140,  loss: 5.4674
step 150,  loss: 5.4290
step 160,  loss: 5.4123
step 170,  loss: 5.3635
step 180,  loss: 5.3186
step 190,  loss: 5.2600
step 200,  loss: 5.2152


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 210,  loss: 5.1615


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


[0 0 0]
step 220,  loss: 5.0721
[0 0 0 0 0 0 0 0 0]
step 230,  loss: 4.9926
step 240,  loss: 4.9310
step 250,  loss: 4.9158
step 260,  loss: 4.8582
step 270,  loss: 4.8718
step 280,  loss: 4.8768


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 290,  loss: 4.8533
[0 0 0 0]
step 300,  loss: 4.8126
step 310,  loss: 4.7819
step 320,  loss: 4.7714


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 330,  loss: 4.7524
[0 0]
step 340,  loss: 4.7059
step 350,  loss: 4.6869
step 360,  loss: 4.6568
step 370,  loss: 4.6161
step 380,  loss: 4.5873
step 390,  loss: 4.5600
step 400,  loss: 4.5491
step 410,  loss: 4.5329
step 420,  loss: 4.5125
step 430,  loss: 4.4973
step 440,  loss: 4.4857
step 450,  loss: 4.4695
step 460,  loss: 4.4616
step 470,  loss: 4.4450
step 480,  loss: 4.4249
step 490,  loss: 4.4267
step 500,  loss: 4.4078


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 510,  loss: 4.3911
[0 0 0 0 0 0 0]
step 520,  loss: 4.3740
step 530,  loss: 4.3514
step 540,  loss: 4.3351


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 550,  loss: 4.3254
[0 0]
step 560,  loss: 4.3079
step 570,  loss: 4.3041
step 580,  loss: 4.2935
step 590,  loss: 4.2877
step 600,  loss: 4.2744
step 610,  loss: 4.2721
step 620,  loss: 4.2578
step 630,  loss: 4.2474
step 640,  loss: 4.2410
step 650,  loss: 4.2499
step 660,  loss: 4.2410
step 670,  loss: 4.2391
step 680,  loss: 4.2309
step 690,  loss: 4.2251
step 700,  loss: 4.2070
step 710,  loss: 4.1953
step 720,  loss: 4.1924
step 730,  loss: 4.1918
step 740,  loss: 4.1801
step 750,  loss: 4.1791
step 760,  loss: 4.1736
step 770,  loss: 4.1706
step 780,  loss: 4.1614
step 790,  loss: 4.1533
step 800,  loss: 4.1385
step 810,  loss: 4.1358
step 820,  loss: 4.1282


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 830,  loss: 4.1172
[0 0 0 0 0]
step 840,  loss: 4.1099
step 850,  loss: 4.0967
step 860,  loss: 4.0872
step 870,  loss: 4.0960
step 880,  loss: 4.1050
step 890,  loss: 4.0985
step 900,  loss: 4.0953
step 910,  loss: 4.0867
step 920,  loss: 4.0879
step 930,  loss: 4.0860


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 940,  loss: 4.0815
[0 0 0 0 0 0 0]
step 950,  loss: 4.0793
step 960,  loss: 4.0769
step 970,  loss: 4.0796
step 980,  loss: 4.0700
step 990,  loss: 4.0659


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 1000,  loss: 4.0690
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
step 1010,  loss: 4.0599
step 1020,  loss: 4.0568


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 1030,  loss: 4.0506
step 1040,  loss: 4.0474
[0 0]
step 1050,  loss: 4.0446
step 1060,  loss: 4.0421
step 1070,  loss: 4.0400
step 1080,  loss: 4.0306
step 1090,  loss: 4.0284
step 1100,  loss: 4.0265
step 1110,  loss: 4.0207


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 1120,  loss: 4.0124
[0]
step 1130,  loss: 4.0078
step 1140,  loss: 4.0046
step 1150,  loss: 4.0001
step 1160,  loss: 3.9928


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 1170,  loss: 3.9852
[0 0 0]


INFO:faster_rcnn.utils.datasets.mscoco.dataset:need more than 0 values to unpack


step 1180,  loss: 3.9784
[0 0]
step 1190,  loss: 3.9721
step 1200,  loss: 3.9670
step 1210,  loss: 3.9589
step 1220,  loss: 3.9554
step 1230,  loss: 3.9538
step 1240,  loss: 3.9540
