**Setting**

In [0]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [0]:
import os
os.chdir('/content/drive/My Drive/voc_test')

!pip install tensorflow==2.1.0

# Make Dataset

In [0]:
import pickle
import cv2
import numpy as np

NUM_CLASSES = 21
BATCH_SIZE = 8
IMAGE_SIZE = 224

# path
crack_pkl = './VOC/VOC2007.pkl'
IMAGE_PATH = './VOC/images/'

# .pkl에서 데이터를 불러옵니다.
gt = pickle.load(open(crack_pkl, 'rb'))

# gt의 key는 이미지 이름으로 이루어져 있습니다.
# gt의 value_list는 이미지에 존재하는 객체 수로 이루어져 있다.
# gt의 value는 총 24 길이로 이루어져 있는데,
# 앞의 첫 4개 인덱스는 xmin, xmax, ymin, ymax 좌표입니다.
# 나머지 20개는 클래스를 나타냅니다.
keys = sorted(gt.keys())

# 학습 및 검증 데이터를 8:2로 나누도록 하겠습니다.
num_train = int(round(0.8 * len(keys)))

# 3962
train_keys = keys[:num_train]
# 990
val_keys = keys[num_train:]

num_val = len(val_keys)

In [0]:
# 데이터셋 객체는 이미지(로드)와 레이블을 반환하도록 구성합니다.
# 먼저, 이미지 경로와 해당 값을 리스트에 저장해두도록 하겠습니다.
image_dir_list = list()
value_list = list()

for i in range(BATCH_SIZE):
  image_dir_list.append(train_keys[i])
  value_list.append(gt[train_keys[i]])

image_dir_list = np.array(image_dir_list)
value_list = np.array(value_list)

In [0]:
import tensorflow as tf

image_dir_ds = tf.data.Dataset.from_tensor_slices(image_dir_list)

def get_imageLabel(image_dir):
  image = tf.io.read_file(IMAGE_PATH + image_dir)
  image = tf.image.decode_jpeg(image)
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image, [224, 224])

  return image
  
image_dir_ds = image_dir_ds.map(get_imageLabel)

In [0]:
for i in image_dir_ds.take(1):
  print(i)

000001.jpg
tf.Tensor(
[[[0.         0.00784314 0.        ]
  [0.         0.00784314 0.        ]
  [0.         0.00784314 0.        ]
  ...
  [0.00784314 0.01568628 0.01176471]
  [0.00784314 0.01568628 0.01176471]
  [0.00225854 0.01182193 0.00790036]]

 [[0.         0.00784314 0.        ]
  [0.         0.00784314 0.        ]
  [0.         0.00784314 0.        ]
  ...
  [0.01582528 0.02366842 0.01974685]
  [0.01077799 0.01862112 0.01469956]
  [0.00130068 0.00914382 0.00522225]]

 [[0.         0.00784314 0.        ]
  [0.         0.00784314 0.        ]
  [0.         0.00784314 0.        ]
  ...
  [0.01805985 0.02527273 0.01382315]
  [0.0112843  0.01751523 0.01376951]
  [0.00982129 0.007029   0.00126291]]

 ...

 [[0.06534703 0.14953116 0.2963426 ]
  [0.055067   0.11640386 0.2497711 ]
  [0.14370213 0.20107439 0.3281971 ]
  ...
  [0.17512836 0.2965584  0.55971926]
  [0.17289834 0.28666675 0.55163544]
  [0.11623414 0.22536325 0.48612452]]

 [[0.13348113 0.19181946 0.34077775]
  [0.13535313 0

In [0]:
import matplotlib.pyplot as plt

img_list = None
value_list = None

for i in range(BATCH_SIZE):
    img = cv2.imread(os.path.join(IMAGE_PATH, train_keys[i]))
    img = np.array(img, dtype = np.float64)
    img = cv2.resize(img, (224, 224))
    img = np.expand_dims(img, axis = 0)
    
    values = np.expand_dims(gt[train_keys[i]], axis = 0)
    if img_list is None:
        img_list = img
    else:
        img_list = np.concatenate((img_list, img), axis = 0)
    
    if value_list is None:
        value_list = values
    else:
        value_list.append(np.array(values))

# Model define

In [0]:
import tensorflow as tf
tf.__version__

'2.0.0'

In [0]:
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K
import tensorflow as tf

import numpy as np

class PriorBox(Layer):
    def __init__(self, s_min = None, s_max = None,
                 feature_map_number = None, num_box = None ,**kwargs):
        '''

        :param img_size:
        :param s_min:
        :param s_max:
        :param feature_map_number: [1, 2, 3, 4, 5, 6]
        '''

        self.default_boxes = []
        self.num_box = num_box
        if s_min <= 0:
            raise Exception('min_size must be positive')
        self.s_min = s_min
        self.s_max = s_max
        self.feature_map_number = feature_map_number
        self.aspect_ratio = [[1., 1/1, 2., 1/2],
                             [1., 1/1, 2., 1/2],
                             [1., 1/1, 2., 1/2, 3., 1/3],
                             [1., 1/1, 2., 1/2, 3., 1/3],
                             [1., 1/1, 2., 1/2, 3., 1/3],
                             [1., 1/1, 2., 1/2]]

        super().__init__(**kwargs)
    
    def build(self, input_shape):
        self.batch_size = input_shape[0]
        self.width = input_shape[2]
        self.height = input_shape[1]
        
        self.s_k = self.get_sk(self.s_max, self.s_min, 6, self.feature_map_number)
        self.s_k1 = self.get_sk(self.s_max, self.s_min, 6, self.feature_map_number + 1)  
        
        super(PriorBox, self).build(input_shape)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1] * input_shape[2], 4)
    
    @tf.function
    def call(self, x):
        feature_map_ratio = self.aspect_ratio[self.feature_map_number - 1]
        s = 0.0
        
        print(x.shape[0])

        default_boxes = None
        for eleh in range(self.height):
            center_y = (eleh + 0.5) / float(self.height)
            for elew in range(self.width):
                center_x = (elew + 0.5) / float(self.width)
                for ratio in feature_map_ratio:
                    s = self.s_k

                    if(ratio == 1.0):
                        s = np.sqrt(self.s_k * self.s_k1)

                    box_width = s * np.sqrt(ratio)
                    box_height = s / np.sqrt(ratio)
                    
                    if default_boxes is None:
                        default_boxes = np.array([center_x, center_y, box_width, box_height]).reshape(-1, 4)
                    else:
                        default_boxes = np.concatenate((default_boxes, np.array([[center_x, center_y, box_width, box_height]])), axis = 0)
        
        boxes_tensor = np.expand_dims(default_boxes, axis = 0)
        boxes_tensor = tf.tile(tf.constant(boxes_tensor, dtype='float32'), (tf.shape(x)[0], 1, 1))
        
        return boxes_tensor

    def get_sk(self, s_max, s_min, m, k):
        '''
        :param s_max:
        :param s_min:
        :param m: number of feature map
        :param k: k-th feature map
        :return:
        '''
        sk_value = s_min + ((s_max - s_min) / (m - 1.0)) * (k - 1)

        return sk_value

In [0]:
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ZeroPadding2D
from tensorflow.keras.layers import BatchNormalization, Flatten, add, Activation, Concatenate
from tensorflow.keras.layers import Input, Reshape
from tensorflow.keras.models import Model
import tensorflow as tf

def _conv_block(input_tensor, s,
                c, n, t, stage):
    # s : strides
    # c : channel
    # n : iter
    # t : factor

    conv_name_base = 'res' + str(stage) + '_branch'
    bn_name_base = 'bn' + str(stage) + '_branch'
    x = None

    # Strides == 1 block
    if (s == 1):
        shortcut = None
        for i in range(n):
            x = Conv2D(c, (1, 1), kernel_initializer='he_normal', name=conv_name_base + '2a_' + str(i))(input_tensor)
            x = BatchNormalization(name=bn_name_base + '2a_' + str(i))(x)
            x = Activation('relu')(x)
            x = DepthwiseConv2D((3, 3), depth_multiplier=t, padding='same', name=conv_name_base + '2bdepth_' + str(i))(
                x)
            x = BatchNormalization(name=bn_name_base + '2b_' + str(i))(x)
            x = Activation('relu')(x)
            x = Conv2D(c, (1, 1), kernel_initializer='he_normal', name=conv_name_base + '2c_' + str(i))(x)
            x = BatchNormalization(name=bn_name_base + '2c_' + str(i))(x)
            x = Activation('linear')(x)

            if (shortcut is None):
                shortcut = Conv2D(c, (1, 1), strides=s, padding='same', kernel_initializer='he_normal',
                                  name=conv_name_base + '1_' + str(i))(input_tensor)
            else:
                shortcut = Conv2D(c, (1, 1), strides=s, padding='same', kernel_initializer='he_normal',
                                  name=conv_name_base + '1_' + str(i))(x)

            x = add([x, shortcut], name='c_add_' + str(stage) + '_' + str(i))
    # Strides == 2 block
    elif (s == 2):
        for i in range(n):
            x = Conv2D(c, (1, 1), kernel_initializer='he_normal', name=conv_name_base + '2a_' + str(i))(input_tensor)
            x = BatchNormalization(name=bn_name_base + '2a_' + str(i))(x)
            x = Activation('relu')(x)
            x = DepthwiseConv2D((3, 3), strides=s, depth_multiplier=t, padding='same',
                                name=conv_name_base + '2bdepth_' + str(i))(x)
            x = BatchNormalization(name=bn_name_base + '2b_' + str(i))(x)
            x = Activation('relu')(x)
            x = Conv2D(c, (1, 1), kernel_initializer='he_normal', name=conv_name_base + '2c_' + str(i))(x)
            x = BatchNormalization(name=bn_name_base + '2c_' + str(i))(x)
            x = Activation('linear')(x)
    return x

def _SSD_Conv_fc(x, filter, kernel_size, strides = (1, 1)):
    net = Conv2D(filter, kernel_size = kernel_size, strides = strides)(x)
    x = BatchNormalization()(net)
    x = Activation('relu')(x)

    print(x.shape, 'SSD_Conv_fc')

    return x, net

def _SSD_Conv(x, filter, kernel_size, strides):
    x = Conv2D(filter // 2, kernel_size = (1, 1))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = DepthwiseConv2D(kernel_size=kernel_size, strides = strides, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    net = Conv2D(filter, (1, 1), padding='same')(x)
    x = BatchNormalization()(net)
    x = Activation('relu')(x)

    print(x.shape, 'SSD_Conv')

    return x, net

def _detections(x, feature_map_num, bbox_num, min_s, max_s, num_classes):
    # bbox location
    mbox_loc = Conv2D(bbox_num * 4, (3, 3), padding='same', name=str(feature_map_num) + '_mbox_loc')(x)
    mbox_loc_flat = Flatten(name = str(feature_map_num) + '_mbox_loc_flat')(mbox_loc)

    # class confidence
    mbox_conf = Conv2D(bbox_num * num_classes, (3, 3), padding = 'same', name = str(feature_map_num) + '_mbox_conf')(x)
    mbox_conf_flat = Flatten(name = str(feature_map_num) + '_mbox_conf_flat')(mbox_conf)

    # Anchor box candidate
    mbox_priorbox = PriorBox(min_s, max_s, feature_map_num, bbox_num, name = str(feature_map_num) + 'mbox_prior_box')(x)

    return mbox_loc_flat, mbox_conf_flat, mbox_priorbox

def SSD(input_shape, num_classes):
    img_input = Input(shape=input_shape)

    # MobileNetV2
    # init
    print('init')
    x = Conv2D(32, kernel_size=(3, 3), strides=(2, 2), padding='same', 
               kernel_initializer='he_normal', name='conv1')(img_input)  # (112, 112, 32)

    mobile_conv1 = _conv_block(x, c=16, s=1, n=1, t=1, stage=2)  # (112, 112, 16)
    mobile_conv2 = _conv_block(mobile_conv1, c=24, s=2, n=2, t=6, stage=3)  # (56, 56, 24)
    mobile_conv3 = _conv_block(mobile_conv2, c=32, s=2, n=3, t=6, stage=4)  # (28, 28, 32)
    mobile_conv4 = _conv_block(mobile_conv3, c=64, s=2, n=3, t=6, stage=5)  # (14, 14, 64)
    mobile_conv5 = _conv_block(mobile_conv4, c=160, s=1, n=4, t=6, stage=6)  # (14, 14, 96)
    mobile_conv6 = _conv_block(mobile_conv5, c=160, s=2, n=3, t=6, stage=7)  # (7, 7, 160)
    mobile_conv7 = _conv_block(mobile_conv6, c=320, s=1, n=1, t=6, stage=8)  # (7, 7, 320)

    fc6, fc6_for_feature = _SSD_Conv_fc(mobile_conv7, 1024, kernel_size = (3, 3), strides=(2, 2))
    fc7, fc7_for_feature = _SSD_Conv_fc(fc6, 1024, kernel_size= (1, 1))
    conv8_2, conv8_2_for_feature = _SSD_Conv(fc7, 512, kernel_size=(3, 3), strides = (2, 2))
    conv9_2, conv9_2_for_feature = _SSD_Conv(conv8_2, 512, kernel_size=(3, 3), strides = (1, 1))
    conv10_2, conv10_2_for_feature = _SSD_Conv(conv9_2, 512, kernel_size=(3, 3), strides = (1, 1))

    clf1_mbox_loc_flat, clf1_mbox_conf_flat, clf1_mbox_priorbox = _detections(mobile_conv4, 1, 4, 0.2, 0.9, num_classes)
    clf2_mbox_loc_flat, clf2_mbox_conf_flat, clf2_mbox_priorbox = _detections(fc6_for_feature, 2, 4, 0.2, 0.9, num_classes)
    clf3_mbox_loc_flat, clf3_mbox_conf_flat, clf3_mbox_priorbox = _detections(fc7_for_feature, 3, 6, 0.2, 0.9, num_classes)
    clf4_mbox_loc_flat, clf4_mbox_conf_flat, clf4_mbox_priorbox = _detections(conv8_2_for_feature, 4, 6, 0.2, 0.9, num_classes)
    clf5_mbox_loc_flat, clf5_mbox_conf_flat, clf5_mbox_priorbox = _detections(conv9_2_for_feature, 5, 6, 0.2, 0.9, num_classes)
    clf6_mbox_loc_flat, clf6_mbox_conf_flat, clf6_mbox_priorbox = _detections(conv10_2_for_feature, 6, 4, 0.2, 0.9, num_classes)

    mbox_loc = Concatenate(axis = 1, name = 'mbox_loc')([clf1_mbox_loc_flat, clf2_mbox_loc_flat,
                            clf3_mbox_loc_flat, clf4_mbox_loc_flat,
                            clf5_mbox_loc_flat, clf6_mbox_loc_flat])
    mbox_conf = Concatenate(axis = 1, name = 'mbox_conf')([clf1_mbox_conf_flat, clf2_mbox_conf_flat,
                                                           clf3_mbox_conf_flat, clf4_mbox_conf_flat,
                                                           clf5_mbox_conf_flat, clf6_mbox_conf_flat])
    mbox_priorbox = Concatenate(axis = 1, name = 'mbox_priorbox')([clf1_mbox_priorbox, clf2_mbox_priorbox,
                                                                   clf3_mbox_priorbox, clf4_mbox_priorbox,
                                                                   clf5_mbox_priorbox, clf6_mbox_priorbox])
    
    print('Brfore Reshape', mbox_loc.shape, mbox_conf.shape, mbox_priorbox.shape)
    
    num_boxes = mbox_loc.shape[-1] // 4

    mbox_loc = Reshape((num_boxes, 4), name = 'mbox_loc_final')(mbox_loc)
    mbox_conf = Reshape((num_boxes, num_classes), name = 'mbox_conf_logits')(mbox_conf)
    mbox_conf = Activation('softmax', name = 'mbox_conf_final')(mbox_conf)
    print('After Reshape', mbox_loc.shape, mbox_conf.shape, mbox_priorbox.shape)

    predictions = Concatenate(axis = 2, name = 'predictions')([mbox_loc, mbox_conf, mbox_priorbox])
    print('predictions shape ', predictions.shape)

    model = Model(inputs = img_input, outputs = predictions)

    return model

# Make Model

In [0]:
input_shape = (224, 224, 3)
model = SSD(input_shape, num_classes = 21)

init
(None, 3, 3, 1024) SSD_Conv_fc
(None, 3, 3, 1024) SSD_Conv_fc
(None, 2, 2, 512) SSD_Conv
(None, 2, 2, 512) SSD_Conv
(None, 2, 2, 512) SSD_Conv
None
None
None
None
None
None
Brfore Reshape (None, 3752) (None, 19698) (None, 938, 4)
After Reshape (None, 938, 4) (None, 938, 21) (None, 938, 4)
predictions shape  (None, 938, 29)


# computation

In [0]:
import numpy as np


def intersection(rect1, rect2):
    """
    intersecton of units
    compute boarder line top, left, right and bottom.
    rect is defined as [ top_left_x, top_left_y, width, height ]
    """
    top = np.max(rect1[1], rect2[1])
    left = np.max(rect1[0], rect2[0])
    right = np.min(rect1[0] + rect1[2], rect2[0] + rect2[2])
    bottom = np.min(rect1[1] + rect1[3], rect2[1] + rect2[3])

    result = tf.where(tf.math.logical_and(tf.greater(bottom, top), tf.greater(right, left)), (bottom-top)*(right-left), 0)

    return result


def jaccard(rect1, rect2):
    """
    Jaccard index.
    Jaccard index is defined as #(A∧B) / #(A∨B)
    
    len_rect1 : 4
    len_rect2 : 4
    
    """
    
    # len_rect1_ : 4, len_rect2_ : 4
    rect1_ = []
    for i in range(len(rect1)):
        cond_value = tf.where(rect1[i] >= 0, rect1[i], 0)
        rect1_.append(cond_value)
            
    rect2_ = []
    for i in range(len(rect2)):
        cond_value = tf.where(rect2[i] >= 0, rect2[i], 0)
        rect2_.append(cond_value)
    
    s = tf.add(tf.multiply(rect1_[2], rect1_[3]), tf.multiply(rect2_[2], rect2_[3]))

    # rect1 and rect2 => A∧B
    intersect = intersection(rect1_, rect2_)

    # rect1 or rect2 => A∨B
    union = s - intersect

    # A∧B / A∨B
    return tf.divide(intersect, union)


def corner2center(rect):
    """
    rect is defined as [ top_left_x, top_left_y, width, height ]
    """
    center_x = (2 * rect[0] + rect[2]) * 0.5
    center_y = (2 * rect[1] + rect[3]) * 0.5

    return tf.stack([center_x, center_y, abs(rect[2]), abs(rect[3])])


def center2corner(rect):
    """
    rect is defined as [ top_left_x, top_left_y, width, height ]
    """
    corner_x = rect[0] - rect[2] * 0.5
    corner_y = rect[1] - rect[3] * 0.5

    return tf.stack([corner_x, corner_y, tf.math.abs(rect[:, 2]), tf.amth.abs(rect[:, 3])])


def convert2diagonal_points(rect):
    """
    convert rect format
    Args:
        input format is...
        [ top_left_x, top_left_y, width, height ]
    Returns:
        output format is...
        [ top_left_x, top_left_y, bottom_right_x, bottom_right_y ]
    """
    return [rect[0], rect[1], rect[0]+rect[2], rect[1]+rect[3]]


def convert2wh(rect):
    """
    convert rect format
    Args:
        input format is...
        [ top_left_x, top_left_y, bottom_right_x, bottom_right_y ]
    Returns:
        output format is...
        [ top_left_x, top_left_y, width, height ]
    """
    result = tf.stack([rect[0], rect[1], rect[2] - rect[0], rect[3]-rect[1]])
    
    return result

# MultiBoxLoss

In [0]:
import tensorflow as tf
import numpy as np

from tensorflow.keras.utils import to_categorical

class MultiboxLoss(object):
    '''
        loss func defiend as Loss = (Loss_conf + a * Loss_loc) / N
        need for total loss.

        Need list:
            confidence loss
            location loss
            positive list
            negative list
    '''

    def __init__(self, batch_size):
        self.batch_size = batch_size

    # bbox 의 loc에 대한 loss 계산
    def _smooth_L1_Loss(self, y_true, y_pred, pos):
        abs_loss = tf.abs(y_true - y_pred)
        sq_loss = 0.5 * (y_true - y_pred) ** 2
        l1_loss = tf.where(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5)

        # shape : [?, num_boxes, 4] -> [?, num_boxes]
        return tf.reduce_sum(l1_loss, axis=-1) * pos

    # bbox의 class에 대한 loss 계산
    def _softmax_Loss(self, y_true, y_pred, pos, neg):
        y_pred = tf.maximum(tf.minimum(y_pred, 1 - 1e-15), 1e-15)

        # positive는 IOU를 합격한 박스 1 또는 0
        # 맞춘 만큼 loss값을 감소시킴.
        pos_loss = (tf.log(tf.exp(y_pred) / (tf.reduce_sum(tf.exp(y_pred), axis=-1))))

        # IOU는 불합격했지만, 클래스가 있을 확률이 높은 박스 1 또는 0
        # 맞춘 만큼 loss를 감소시킴
        neg_loss = tf.log(y_pred)

        softmax_loss = -tf.reduce_sum((y_true * (pos_loss + neg_loss)), axis = - 1) * (pos + neg)

        return softmax_loss

    # total_loss 계산
    def comute_loss(self, y_true, y_pred):
        """ Compute multibox loss
        # Arguments
            @y_true:
                tensor of shape (?, num_object, 4 + 4) -> [?, ?, 8]
            @y_pred:
                tensor of shape(?, num_boxes, 4 + num_classes(4) + 4)

            @class_num = 4

            @configration of y_pred + y_true:
                y_pred[:, :, :4]:
                    bbox_loc
                y_pred[:, :, 4:8]:
                    class_confidence
                y_pred[:, :, 10:]:
                    mbox_priorbox(cx, cy, w, h)
        """
        # y_true class : <class 'tensorflow.python.framework.ops.Tensor'>
        default_boxes = y_pred[:, :, -4:] # default_boxes shape : (8, 938, 4) --> 8 is batch_size
        
        positives = []
        negatives = []
        ex_gt_labels = []
        ex_gt_boxes = []
        
        num_boxes = y_pred.shape[1]
        matcher = Matcher(num_boxes, default_boxes)
        print('make Matcher=======================')
        actual_locs = []
        actual_labels = []
        for i in range(self.batch_size):
          # y_true[i][:, :, :4], [-1, 4] --> (i, num_box, 4)
          # tf.reshape(y_true[i][:, :, :4], [-1, 4]) --> (num_box, 4)
          locs = tf.reshape(y_true[i][:, :, :4], [-1, 4])
          labels = tf.argmax(y_true[i][:, :, 4:], axis = -1) # (1, num_box)
          labels = tf.reshape(labels, [-1]) # (num_box, )
          for i in range(len(locs)):
            loc = locs[i] # loc : Tensor("TensorArrayV2Read/TensorListGetItem:0", shape=(4,), dtype=float32)
            loc = convert2wh(loc)
            loc = corner2center(loc) # Tensor("PartitionedCall_1:0", shape=(4,), dtype=float32)
            actual_locs.append(loc)
                
          for i in range(len(labels)):
            label = labels[i]
            actual_labels.append(labels)
            
          pred_locs = y_pred[i][:, :4] # <class 'tensorflow.python.framework.ops.Tensor'> (938, 4)
          pred_confs = y_pred[i][:, 4:-4] # <class 'tensorflow.python.framework.ops.Tensor'> (938, 25)
          
          print('go in matcher.matching#####')
          pos_list, neg_list, t_gtl, t_gtb = matcher.matching(pred_confs, pred_locs, actual_labels, actual_locs, i)
          positives.append(pos_list) # (?, default_box_num_pos)
          negatives.append(neg_list) # (?, default_box_num_neg)
          ex_gt_labels.append(t_gtl) # (?, default_box_num_label)
          ex_gt_boxes.append(t_gtb)  # (?, default_box_num, loc)

        ex_gt_labels_to_categorical = to_categorical(ex_gt_labels)
        # 클래스에 대한 손실 함수
        conf_loss = self._softmax_Loss(ex_gt_labels_to_categorical,
                                       y_pred[:, :, 4:8],
                                       positives, negatives)  # [?, 984]

        # 박스 위치에 대한 손실 함수
        loc_loss = self._smooth_L1_Loss(y_true[:, :, :4],
                                        y_pred[:, :, 4], positives)  # [?, 984]

        total_loss = tf.reduce_sum(conf_loss + loc_loss)

        return total_loss

# Matching

In [0]:
classes = 21

In [0]:
import numpy as np

class Matcher:
    
    def __init__(self, num_boxes, default_boxes):
        """
                initializer require feature-map shapes and default boxes
                Args:
                    fmap_shapes: feature-map's shape
                    default_boxes: generated default boxes
                """
        self.num_boxes = num_boxes
        self.default_boxes = default_boxes
    
    def matching(self, pred_confs, pred_locs, actual_labels, actual_locs, batch_size):
        """
                match default boxes and bouding boxes.
                matching computes pos and neg count for the computation of loss.
                now, the most noting point is that it is not important that
                whether class label is correctly predicted.
                class label loss is evaled by loss_conf
                matches variable have some Box instance and most of None.
                if jaccard >= 0.5, that matches box has Box(gt_loc, gt_label).
                then, sort by pred_confs loss and extract 3*pos boxes, which they
                have Box([], classes) => background.
                when compute losses, we need transformed ground truth labels and locations
                because each box has self confidence and location.
                so, we should prepare expanded labels and locations whose size is as same as len(matches).
                Args:
                    pred_confs: predicated confidences
                    pred_locs: predicated locations
                    actual_labels: answer class labels
                    actual_locs: answer box locations
                Returns:
                    postive_list: if pos -> 1 else -> 0
                    negative_list: if neg and label is not classes(not unknown class) 1 else 0
                    expanded_gt_labels: gt_label if pos else classes
                    expanded_gt_locs: gt_locs if pos else [0, 0, 0, 0]
        """
        self.pos = 0
        self.neg = 0
        pos_list = []
        neg_list = []
        expanded_gt_labels = []
        expanded_gt_locs = []
        
        matches = []
        matched = []
        # pred_confs.shsape, pred_locs.shape, len(actual_locs), len(actual_labels)
        # (938, 21) (938, 4) 2 2
        print('In Matcher1!===============================')
        for _ in range(self.num_boxes):
            matches.append(None) # len is 938
            
        
        
        test_int = 0
        for gt_label, gt_box in zip(actual_labels, actual_locs):
          for i in range(len(matches)):
            # jacc : Tensor("PartitionedCall:0", shape=(), dtype=float64) () <class 'tensorflow.python.framework.ops.Tensor'>
            jacc = jaccard(gt_box, self.default_boxes[batch_size, i]) # self.default_boxes[batch_size, i] -> (4, )
            if(tf.math.greater_equal(jacc, 0.5)):
              matches[i] = 4
              self.pos += 1
              matched.append(gt_label)

        print('In Matcher2!===============================')

        # neg, pos 비율 값
        neg_pos = 5

        max_length = tf.multiply(neg_pos, self.pos)

        loss_confs = []
        loss_conf_cnt = 0

        # pred_confs.shape : (938, 21)
        # 각 default box의 confidence에 대해서
        for i in range(pred_confs.get_shape()[0]):
            pred_conf = pred_confs[i] # (21, )
            # 각 예측 값을 소프트 맥스함수를 통해 해당 클래스 인덱스로 치환한다.
            # (num_box, num_class) -> (num_box, )
            pred = tf.reduce_max(tf.divide(tf.math.exp(pred_conf), (tf.reduce_sum(tf.math.exp(pred_conf)) + 1e-5))) # (), Tensor
            loss_confs.append(pred)
            loss_conf_cnt += 1
            
        # loss_conf_cnt : 938, int
        # max_length : tf.multimul(:)
        
        size = tf.math.minimum(loss_conf_cnt, neg_pos * self.pos) # Tensor("Minimum:0", shape=(), dtype=int32)
        
        # TopKV2(values=<tf.Tensor 'TopKV2:0' shape=(None,) dtype=float64>, indices=<tf.Tensor 'TopKV2:1' shape=(None,) dtype=int32>)
        indices = tf.math.top_k(loss_confs, size) # class : <clas`s 'tensorflow.python.ops.gen_nn_ops.TopKV2'>
        indice_values = indices[1]
        print(indice_values.__class__, indice_values.dtype, indice_values.shape, indice_values)
        print('In Matcher3!===============================')

        for i in range(indice_values.get_shape()[0]):
          # i : 0 ~
          temp_index = indice_values[i]
          print(temp_index, 'temp_index')

          # negative를 적당히 사용해야 되는데,
          # positive * neg_pos 비율보다 높으면 좋지 않으므로 break 시킨다.
          if self.neg > self.pos * neg_pos:
              break

          maches_index = tf.gather(matches, temp_index)
          pred_confs_index = tf.gather(pred_confs, temp_index)
          pred_conf = tf.argmax(pred_confs_index)

          # classes - 1은 배경을 의미함.
          # 박스가 안겹치면서 배경이 아닌 경우
          # False Negative -> 박스가 없다고 판단했지만 객체가 있을 수도 있는 부분 -하종우
          if(matches_index is None and (classes - 1 != pred_conf)):
              matches[temp_index] = 1
              self.neg += 1
                
        print('In Matcher4!===============================')

        # matches는 None이거나 Box instance가 들어있는 array이다.
        for box in matches:
            # 박스가 없으면
            if box is None:
                pos_list.append(0)
                neg_list.append(0)
                expanded_gt_labels.append(classes - 1)
                expanded_gt_locs.append([0] * 4)
            # False Negative 부분
            elif(0 == len(box.loc)):
                pos_list.append(0)
                neg_list.append(1)
                expanded_gt_labels.append(classes - 1)
                expanded_gt_locs.append([0] * 4)
            # 박스가 존재한다면
            else:
                pos_list.append(1)
                neg_list.append(0)
                expanded_gt_labels.append(box.index)
                expanded_gt_locs.append(box.loc)

        return pos_list, neg_list, expanded_gt_labels, expanded_gt_locs

# Run

In [0]:
tf.keras.backend.set_floatx('float32')

In [0]:
test_pred = model(img_list) # [batch_size, 938, 29]
test_pred = tf.cast(test_pred, tf.float64)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

8
8
8
8
8
8


In [0]:
loss_fn = MultiboxLoss(NUM_CLASSES)

with tf.GradientTape() as tape:
    logits = model(img_list)
    
    loss = loss_fn.comute_loss(value_list, test_pred.numpy())

go in matcher.matching#####


AxisError: ignored

# 테스트



<tf.Tensor: id=90638, shape=(), dtype=float64, numpy=0.0>

In [0]:
import numpy as np

@tf.function
def intersection(rect1, rect2):
    print('intersection==================================')
    top = tf.math.maximum(rect1[1], rect2[1])
    left = tf.math.maximum(rect1[0], rect2[0])
    right = tf.math.minimum(rect1[0] + rect1[2], rect2[0] + rect2[2])
    bottom = tf.math.minimum(rect1[1] + rect1[3], rect2[1] + rect2[3])

    result = tf.where(tf.math.logical_and(tf.greater(bottom, top), tf.greater(right, left)), (bottom-top)*(right-left), 0)

    return result

@tf.function
def jaccard(rect1, rect2):
    # len_rect1_ : 4, len_rect2_ : 4
    rect1_ = []
    for i in range(len(rect1)):
        cond_value = tf.where(rect1[i] >= 0, rect1[i], 0)
        rect1_.append(cond_value)
            
    rect2_ = []
    for i in range(len(rect2)):
        cond_value = tf.where(rect2[i] >= 0, rect2[i], 0)
        rect2_.append(cond_value)
    
    s = tf.add(tf.multiply(rect1_[2], rect1_[3]), tf.multiply(rect2_[2], rect2_[3]))
    # rect1 and rect2 => A∧B
    intersect = intersection(rect1_, rect2_)
    # rect1 or rect2 => A∨B
    union = s - intersect
    # A∧B / A∨B
    return tf.divide(intersect, union)

@tf.function
def test():
  number = 0
  
  jacc = jaccard([10, 10, 10, 10], [20, 20, 20, 20])
  
  for i in range(10):
    if(tf.math.greater_equal(jacc, 0.5)):
      number += 1
  
  print(number)
  print(number * 2) # Tensor("mul:0", shape=(), dtype=int32)
  
  # number을 이용해서 여러 연산을 하려고 합니다. 
  # 그런데 Tensor 형태라서 제한이 있습니다. 어떻게 해야하나요?
  for k in range(number * 2):
    print(k) # Tensor("placeholder:0", shape=(), dtype=int32)
    
test()

Tensor("cond_9/Identity:0", shape=(), dtype=int32)
Tensor("mul:0", shape=(), dtype=int32)
Tensor("placeholder:0", shape=(), dtype=int32)


In [0]:
@tf.function
def test():
  number = 0
  
  for i in range(10):
    for j in range(2):
      if(tf.math.greater_equal(1., 0.5)):
        number += 1
        
  print(number)
test()

Tensor("cond_19/Identity:0", shape=(), dtype=int32)


In [0]:
@tf.function
def test():
  a = tf.constant([1, 2, 3, 4])
  
  matches = []
  # pred_confs.shsape, pred_locs.shape, len(actual_locs), len(actual_labels)
  # (938, 21) (938, 4) 2 2
  print('In Matcher1!===============================')
  for _ in range(10):
      matches.append(999) # len is 938
  
  loss_conf_cnt = 0
  neg_pos = 5
  reduce_max_list = []
  for i in range(10):
    reduce_max_list.append(tf.reduce_max(a))
    loss_conf_cnt += 1
    
  size = tf.math.minimum(2, loss_conf_cnt)
  
  # TopKV2(values=<tf.Tensor 'TopKV2:0' shape=(2,) dtype=int32>, 
  # indices=<tf.Tensor 'TopKV2:1' shape=(2,) dtype=int32>)
  test_top_k = tf.math.top_k(reduce_max_list, size)
  
  # test_top_k[1].get_shape()[0] : 2
  for i in range(test_top_k[1].get_shape()[0]):
    temp_index = test_top_k[1][i]
    print(temp_index) # Tensor("strided_slice:0", shape=(), dtype=int32)
    
    print(tf.gather(matches, temp_index))
    
    

test()

In [0]:
indices

In [0]:
@tf.function
def test():
  
  tf.print('aaaa')
  a = tf.constant(938)
  b = tf.multiply(a, 2)
  test()
  
  if(a > b):
    tf.print('aa')
  else:
    tf.print('bb')

test()

In [0]:
a = 0
tf.where(True, )