In [1]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

sys.path.append('..')

from dataset import Dataset, DatasetIndex, B, V
from detection_mnist import DetectionMnist
from faster_rcnn import FRCNNModel
%matplotlib inline

In [2]:
IMAGE_SHAPE = (128, 256)

In [3]:
ind = DatasetIndex(np.arange(1000))          # index for images
mnist = Dataset(ind, batch_class=DetectionMnist)   # Dataset with transform actions in NoisedMnist class
mnist.cv_split([0.9, 0.1])                      # divide it into train and test

In [33]:
import sys
import tensorflow as tf

sys.path.append('../task_03')

from dataset.dataset.models.tf.layers import conv_block
from dataset.dataset.models.tf import TFModel
from vgg import VGGModel

class FRCNNModel(TFModel):
    """LinkNet as TFModel"""
    def _build(self, inp1, inp2, *args, **kwargs):

        #n_classes = self.num_channels('masks')
        data_format = self.data_format('images')
        dim = self.spatial_dim('images')
        b_norm = self.get_from_config('batch_norm', True)

        conv = {'data_format': data_format}
        batch_norm = {'momentum': 0.1}

        kwargs = {'conv': conv, 'batch_norm': batch_norm}
        
        inp = inp2['images']
        with tf.variable_scope('FRCNN'): # pylint: disable=not-context-manager
            net = VGGModel.fully_conv_block(dim, inp, b_norm, 'VGG6', **kwargs)
            net = conv_block(dim, net, 512, 3, 'ca', **kwargs)
            reg = conv_block(dim, net, 4*9, 1, 'ca', **kwargs)
            cls = conv_block(dim, net, 1*9, 1, 'ca', **kwargs)

        reg = tf.reshape(reg, [-1, 4608, 4], name='RoI')
        cls = tf.reshape(cls, [-1, 4608], name='IoU')
        true_cls = tf.placeholder(tf.int32, shape = [None, 4608], name='proposal_targets')
        true_reg = tf.placeholder(tf.float32, shape = [None, 4608, 4], name='bbox_targets')
        
        loss = self.rpn_loss(reg, cls, true_reg, true_cls)
        loss = tf.identity(loss, name='loss')
        tf.losses.add_loss(loss)
    
    def rpn_loss(self, reg, cls, true_reg, true_cls):
        print(true_cls, cls)
        cls_loss = tf.nn.softmax_cross_entropy_with_logits(labels=true_cls, logits=cls)
        cls_loss = tf.reduce_mean(cls_loss, axis=-1)
        
        sums = tf.reduce_sum((true_reg - reg) ** 2, axis=-1)
        
        reg_mask = tf.cast(true_cls, dtype=tf.float32)

        reg_mask = tf.reshape(reg_mask, shape=[-1, 4608])

        reg_loss = sums * reg_mask
        reg_loss = tf.reduce_mean(reg_loss, axis=-1)
        
        loss = cls_loss + reg_loss
        loss = tf.reduce_mean(loss)
        return loss

In [34]:
placeholders_config = {
                       'images': {'shape': IMAGE_SHAPE + (1,),
                                 'dtype': 'float32',
                                 'data_format': 'channels_last',
                                 'name': 'reshaped_images'},
                       }

In [35]:
model_config = {'inputs': placeholders_config,
                'batch_norm': False,
                'optimizer': 'Adam'}

In [36]:
train_feed_dict = {'images': B('images'),
                   'proposal_targets': B('clsf'),
                   'bbox_targets': B('reg')}        

test_feed_dict = {'images': B('images'),
                   'proposal_targets': B('clsf'),
                   'bbox_targets': B('reg')}  

In [37]:
IMAGE_SIZE = 128

train_pp = (mnist.train.p
            .init_model('static', FRCNNModel, 'frcnn', config=model_config)
            .init_variable('loss_history', init_on_each_run=list)
            .load_images()
            .generate_multi_mnist(image_shape=IMAGE_SHAPE, max_dig=20)
            .create_anchors(IMAGE_SHAPE, (16,32))
            .create_bbox_batch(n_bboxes=10)
            .create_reg_cls()
            .param_reg()
            .train_model('frcnn', 
                         fetches='loss',
                         feed_dict=train_feed_dict,
                         save_to=V('loss_history'), 
                         mode='a'
            ))

Tensor("proposal_targets:0", shape=(?, 4608), dtype=int32) Tensor("IoU:0", shape=(?, 4608), dtype=float32)


In [38]:
test_pp = (mnist.test.p
            .import_model('frcnn', train_pp)
            .init_variable('RoI_predictions', init_on_each_run=list)
            .init_variable('IoU_predictions', init_on_each_run=list)
            .load_images()
            .generate_multi_mnist(image_shape=IMAGE_SHAPE, max_dig=20)
            .create_anchors(IMAGE_SHAPE, (16,32))
            .create_bbox_batch(n_bboxes=10)
            .create_reg_cls()
            .predict_model('frcnn', 
                           fetches=['RoI','IoU'],
                           feed_dict=test_feed_dict,
                           save_to=[V('RoI_predictions'), V('IoU_predictions')], mode='a'))

In [None]:
for i in range(10):
    train_pp.next_batch(100, shuffle=True)
    print(train_pp.get_variable('loss_history')[-1])

clsf: (100, 4608)
54.6595
clsf: (100, 4608)
52.2776
clsf: (100, 4608)


In [13]:
batch = test_pp.next_batch(10, shuffle=True)

In [17]:
batch.data.labels.shape

AttributeError: 'tuple' object has no attribute 'shape'

In [18]:
np.argmax(batch.data.labels[0], axis=-1)

array([9, 7, 9, 4, 3, 4, 1, 3, 4, 7], dtype=int64)

In [12]:
test_pp.get_variable('IoU_predictions')

[array([[[ 2.17222857,  0.        ],
         [ 1.29921365,  0.        ],
         [ 0.        ,  0.        ],
         ..., 
         [ 2.61857748,  0.        ],
         [ 2.27998948,  0.        ],
         [ 3.14285541,  0.        ]],
 
        [[ 2.17213798,  0.        ],
         [ 1.29915667,  0.        ],
         [ 0.        ,  0.        ],
         ..., 
         [ 2.6142087 ,  0.        ],
         [ 2.27471375,  0.        ],
         [ 3.13602495,  0.        ]],
 
        [[ 2.17213798,  0.        ],
         [ 1.29915667,  0.        ],
         [ 0.        ,  0.        ],
         ..., 
         [ 2.61446524,  0.        ],
         [ 2.27502561,  0.        ],
         [ 3.13647294,  0.        ]],
 
        ..., 
        [[ 2.17213798,  0.        ],
         [ 1.29915667,  0.        ],
         [ 0.        ,  0.        ],
         ..., 
         [ 2.6142087 ,  0.        ],
         [ 2.27471375,  0.        ],
         [ 3.13602495,  0.        ]],
 
        [[ 2.48891139,  0.

In [None]:
im = res.data.images[0]
bboxes = res.data.bboxes[0]

fig, ax = plt.subplots(1)

ax.imshow(im)

for bbox in bboxes:    
    rect = patches.Rectangle((bbox[1], bbox[0]), bbox[2], bbox[3] ,linewidth=1,edgecolor='r',facecolor='none')
    ax.add_patch(rect)
plt.show()

In [None]:
anchors = res.data.anchors.reshape((16*32*9,4))
bboxes_best = res.data.reg.reshape((20, 512 * 9,4))[0]
ious = res.data.clsf.reshape((20, 512*9, 2))[:, :, 1]
import time

for i in range(1000,1010):
    fig, ax = plt.subplots(1)

    ax.imshow(im)
    bbox1 = anchors[i]
    bbox2 = bboxes_best[i]
    for bbox in [bbox1, bbox2]:
        rect = patches.Rectangle((bbox[1], bbox[0]), bbox[3], bbox[2] ,linewidth=1,edgecolor='g',facecolor='none')
        ax.add_patch(rect)
    plt.show()
    print("IoU:", ious[0,i])
    time.sleep(0.01)