# BB regression
## Custom dataset

https://towardsdatascience.com/getting-started-with-bounding-box-regression-in-tensorflow-743e22d0ccb3

### loading

In [23]:
import os
import glob
import numpy as np

from PIL import Image, ImageDraw


input_dim = 228

images = []
image_paths = glob.glob('training_images/*.jpg')

In [24]:
for imagefile in image_paths:
    image = Image.open(imagefile).resize((input_dim, input_dim))
    image = np.asarray(image) / 255.0
    images.append(image)

### process the XML annotations
- the annotations are in PASCAL-VOC format # what is it;
- use the ```xmltodict``` package to transform the XML files to a dict



- 아 미리 labelIMG를 통해서 xml 파일을 만든 후, 불러오는 거구나

- bboxse: bounding box가 있는 위치
- classes_raw: 해당 이미지의 클래스

In [62]:
import xmltodict
import os

bboxes = []
classes_raw = []
annotations_paths = glob.glob('training_images/*.xml')
for xmlfile in annotations_paths:
    x = xmltodict.parse( open( xmlfile , 'rb' ) )
    bndbox = x[ 'annotation' ][ 'object'][ 'bndbox' ]
    bndbox = np.array([ int(bndbox[ 'xmin' ]), int(bndbox[ 'ymin']), int(bndbox[ 'xmax']), int(bndbox[ 'ymax']) ])
                                    # dictionary에 xmin이라는 키값에 값이 string형태로 저장되어있음.
    bndbox2 = [ None ] * 4
    bndbox2[0] = bndbox[0]
    bndbox2[1] = bndbox[1]
    bndbox2[2] = bndbox[2]
    bndbox2[3] = bndbox[3]
    bndbox2 = np.array( bndbox2 ) / input_dim
    bboxes.append( bndbox2 )
    classes_raw.append( x[ 'annotation' ][ 'object' ][ 'name' ])

### bboxes 와 classes_raw 변수를 하나로 합쳐서 Final training & testing dataset 생성

- Label: 크기(vector7), [x_min, y_min, x_max, y_max, one_hot_encoding(3) ]

In [69]:
Y[0]

array([0.10087719, 0.18421053, 0.90350877, 0.87280702, 1.        ,
       0.        , 0.        ])

In [65]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

boxes = np.array( bboxes )
encoder = LabelBinarizer()
classes_onehot = encoder.fit_transform( classes_raw )

Y = np.concatenate( [boxes, classes_onehot ], axis=1)
X = np.array( images )

x_train, x_test, y_train, y_test = train_test_split( X, Y, test_size= 0.1)

### Creating Keras model

##### 모델을 만들기 전에 . . .
- custom loss 함수와 평가 지표를 정의해준다.
- loss function은 MSE와 IOU 을 사용한다.
- 평가지표 또한 IOU score를 accuracy로 사용한다.

$$ L(x, x') = MSE(x, x') + (1 - IOU(x, x')) $$

- IOU

![image.png](attachment:image.png)
- 저자가 예시로 만들어낸 로스함수이다.


In [78]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.backend as K

In [164]:
input_shape = (input_dim, input_dim, 3)
dropout_rate = 0.5
alpha = 0.2

def calculate_iou( target_boxes, pred_boxes ): 
    xA = K.maximum( target_boxes[ ..., 0], pred_boxes[ ..., 0])  # x_min
    yA = K.maximum( target_boxes[ ..., 1], pred_boxes[ ..., 1])  # y_min
    xB = K.minimum( target_boxes[ ..., 2], pred_boxes[ ..., 2])  # x_max
    yB = K.minimum( target_boxes[ ..., 3], pred_boxes[ ..., 3])  # y_max
            
    interArea = K.maximum( 0.0, xB - xA) * K.maximum( 0.0, yB - yA)
    
    # boxA 가 실제 label boxB 가 예측된 label
    boxAArea = (target_boxes[ ..., 2] - target_boxes[ ..., 0]) * (target_boxes[ ... , 3] - target_boxes[ ... , 1])
    boxBArea = (pred_boxes[ ... , 2] - pred_boxes[ ... , 0]) * (pred_boxes[ ... , 3] - pred_boxes[ ... , 1])
    
    # x와 x'의 공집합이 두번 더해지니까 하나 빼주기.
    iou = interArea / (boxAArea + boxBArea - interArea)
    
    return iou

def custom_loss( y_true, y_pred ):
    mse = tf.losses.mean_squared_error( y_true, y_pred )
    iou = calculate_iou( y_true, y_pred )
    return mse + ( 1 - iou )  # 겹쳐지면 겹쳐질수록 loss가 줄어든다 !

# score metric?
def iou_metric( y_true, y_pred ):
    return calculate_iou( y_true, y_pred )

In [165]:
y_pred = model.predict(np.expand_dims(x_test[7], 0))
y_true = y_test[7]

print(y_pred)
print('==============')
print(y_true)
print('==============')

calculate_iou(y_true, y_pred)

[[0.11073848 0.16060784 0.7712057  0.69288087 0.21294107 0.28534427
  0.26936674]]
[0.24122807 0.20175439 0.63157895 0.93859649 0.         0.
 1.        ]


<tf.Tensor: id=16391, shape=(1,), dtype=float64, numpy=array([0.42844017])>

### craete CNN model

- conv2D layer 쌓은다음, flatten

In [81]:
num_classes = 3
pred_vector_length = 4 + num_classes

model_layers = [       
    keras.layers.Conv2D(16, kernel_size=(3, 3), strides=1, input_shape=input_shape),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Conv2D(16, kernel_size=(3, 3), strides=1 ),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.MaxPooling2D( pool_size=( 2 , 2 ) ),

    keras.layers.Conv2D(32, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Conv2D(32, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.MaxPooling2D( pool_size=( 2 , 2 ) ),

    keras.layers.Conv2D(64, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Conv2D(64, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.MaxPooling2D( pool_size=( 2 , 2 ) ),

    keras.layers.Conv2D(128, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Conv2D(128, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.MaxPooling2D( pool_size=( 2 , 2 ) ),

    keras.layers.Conv2D(256, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Conv2D(256, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.MaxPooling2D( pool_size=( 2 , 2 ) ),

    keras.layers.Flatten() , 

    keras.layers.Dense( 1240 ) , 
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Dense( 640 ) , 
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Dense( 480 ) , 
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Dense( 120 ) , 
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Dense( 62 ) , 
    keras.layers.LeakyReLU( alpha=alpha ) ,

    keras.layers.Dense( pred_vector_length ),
    keras.layers.LeakyReLU( alpha=alpha ) ,
]

model = keras.Sequential( model_layers )
model.compile(
            optimizer = keras.optimizers.Adam( lr=0.0001 ),
            loss = custom_loss,
            metrics = [ iou_metric ])

In [95]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_20 (Conv2D)           (None, 226, 226, 16)      448       
_________________________________________________________________
leaky_re_lu_32 (LeakyReLU)   (None, 226, 226, 16)      0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 224, 224, 16)      2320      
_________________________________________________________________
leaky_re_lu_33 (LeakyReLU)   (None, 224, 224, 16)      0         
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 110, 110, 32)      4640      
_________________________________________________________________
leaky_re_lu_34 (LeakyReLU)   (None, 110, 110, 32)      0

### Training the model

In [84]:
model.fit(x_train, y_train,
          validation_data = (x_test, y_test),
          epochs = 50, 
          batch_size =3
         )

model.save( 'model.h5' )

Train on 167 samples, validate on 19 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [85]:
!mkdir -v inference_images

In [93]:
boxes = model.predict( x_test )

for i in range( boxes.shape[0] ):
    
    b = boxes[i, 0 : 4] * input_dim
    
    print(b)
    
    img = x_test[i] * 255
    
    source_img = Image.fromarray( img.astype( np.uint8 ), 'RGB' )
    
    draw = ImageDraw.Draw( source_img )
    
    draw.rectangle( b, outline='black' )
    
    source_img.save( 'inference_images/image_{}.png'.format( i+1 ), 'png' )

[ 21.709675  73.480995 192.96307  175.39508 ]
[ 12.919448  55.20157  188.26834  159.54427 ]
[ 23.72744  38.07049 198.56694 167.26326]
[ 22.803608  47.63307  174.05794  180.01195 ]
[ 13.0738    70.543594 199.25116  180.50168 ]
[ 34.21758   60.136635 180.44727  165.52232 ]
[ 53.695892  26.568632 173.53105  189.40585 ]
[ 25.248367  36.618584 175.83492  157.97684 ]
[ 41.92196  27.61482 166.17519 194.75533]
[  7.3946576  42.43762   195.77031   182.79451  ]
[ 32.0622   34.7135  205.96686 157.4669 ]
[ 25.299051  40.987236 198.53769  189.40019 ]
[ 38.298088  66.00753  151.2573   176.68599 ]
[ 33.94955   55.118134 188.91434  159.00462 ]
[ 30.7967   75.15745 195.80046 172.83195]
[ 30.907898  23.016546 181.66924  182.8038  ]
[ 34.814816  50.774178 155.98125  197.72162 ]
[  8.440704  73.11122  207.66293  157.18407 ]
[ -0.7434522  42.47475   215.3929    190.8309   ]


In [111]:
tf.argmax(model.predict(x_test)[:, 4:], axis=1)

<tf.Tensor: id=14521, shape=(19,), dtype=int64, numpy=
array([0, 1, 1, 1, 1, 0, 2, 1, 2, 0, 1, 1, 2, 1, 1, 1, 2, 0, 0],
      dtype=int64)>

In [115]:
tf.argmax(y_test[:, 4:], axis=1)

<tf.Tensor: id=14527, shape=(19,), dtype=int64, numpy=
array([0, 1, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 2, 1, 1, 1, 2, 0, 2],
      dtype=int64)>