## Train

Faster RCNN 모델 훈련 과정입니다.

In [1]:
from __future__ import division
import random
import pprint
import sys
import time
import numpy as np
import pickle
from six.moves import range

from keras import backend as K
from keras.optimizers import Adam, SGD, RMSprop
from keras.layers import Input
from keras.models import Model
from keras_frcnn import config, data_generators
from keras_frcnn import losses as losses
import keras_frcnn.roi_helpers as roi_helpers
from keras.utils import generic_utils
from keras_frcnn.pascal_voc_parser import get_data
from keras_frcnn import resnet as nn

Using TensorFlow backend.


### 데이터셋 설정

아래는 데이터셋 및 훈련 설정에 필요한 환경 변수입니다. 사용하는 데이터셋 및 모델에 맞춰서 변경하시기 바랍니다.

 - `dataset_path` : 데이터셋이 존재하는 경로를 지정합니다.
 - `train_dataset_name` : 데이터셋 내에서 훈련용으로 사용할 ImageSets 묶음의 이름입니다.
 - `val_dataset_name` : 데이터셋 내에서 검증용으로 사용할 ImageSets 묶음의 이름입니다.
 - `weight_file_path` : 결과 weight파일이 출력될 경로 및 이름을 지정합니다.
 - `config_file_path` : 모델 정보를 담은 pickle 파일이 출력될 경로 및 이름을 지정합니다.
 - `epochs` : 총 몇 epoch를 실행할 건지를 지정할 수 있습니다.
 - `epoch_len` : 한 epoch당 몇 샘플을 훈련할 것인지를 지정할 수 있습니다. 이 값이 1보다 작으면 한 epoch마다 훈련용 데이터셋 전체를 순회합니다.

In [2]:
dataset_path = './dataset/PascalVOC2012'
train_dataset_name = 'train'
val_dataset_name = 'val'
weight_file_path = './frcnn.hdf5'
config_file_path = './config.pickle'
epochs = 10
epoch_len = 1000

### 데이터셋 로딩

훈련 및 검증에 필요한 데이터셋을 로드합니다.

In [3]:
C = config.Config()

C.use_horizontal_flips = True
C.use_vertical_flips = True
C.rot_90 = False

all_imgs, classes_count, class_mapping = get_data(dataset_path, train_dataset_name, val_dataset_name)

if 'bg' not in classes_count:
    classes_count['bg'] = 0
class_mapping['bg'] = len(class_mapping)

C.class_mapping = class_mapping

inv_map = {v: k for k, v in class_mapping.items()}

random.shuffle(all_imgs)

num_imgs = len(all_imgs)

train_imgs = [s for s in all_imgs if s['imageset'] == train_dataset_name]
val_imgs = [s for s in all_imgs if s['imageset'] == val_dataset_name]

data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')

Parsing annotation files


In [4]:
print('Training images per class:')
pprint.pprint(classes_count)
print('Num classes (including bg) = {}'.format(len(classes_count)))
print('Num train samples {}'.format(len(train_imgs)))
print('Num val samples {}'.format(len(val_imgs)))

Training images per class:
{'aeroplane': 1002,
 'bg': 0,
 'bicycle': 837,
 'bird': 1271,
 'boat': 1059,
 'bottle': 1561,
 'bus': 685,
 'car': 2492,
 'cat': 1277,
 'chair': 3056,
 'cow': 771,
 'diningtable': 800,
 'dog': 1598,
 'horse': 803,
 'motorbike': 801,
 'person': 17401,
 'pottedplant': 1202,
 'sheep': 1084,
 'sofa': 841,
 'train': 704,
 'tvmonitor': 893}
Num classes (including bg) = 21
Num train samples 11302
Num val samples 5823


### 모델 빌드

Faster RCNN with ResNet50 모델을 빌드합니다.

In [5]:
C.model_path = weight_file_path
C.num_rois = 32
C.network = 'resnet50'
C.base_net_weights = './keras_frcnn/resnet50_weights_tf_dim_ordering_tf_kernels.h5'

with open(config_file_path, 'wb') as config_f:
    pickle.dump(C, config_f)
print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_file_path))

Config has been written to ./config.pickle, and can be loaded when testing to ensure correct results


In [6]:
img_input = Input(shape=(None, None, 3))
roi_input = Input(shape=(None, 4))
shared_layers = nn.nn_base(img_input, trainable=True)

num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

model_all = Model([img_input, roi_input], rpn[:2] + classifier)

print('loading weights from {}'.format(C.base_net_weights))
model_rpn.load_weights(C.base_net_weights, by_name=True)
model_classifier.load_weights(C.base_net_weights, by_name=True)

optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

loading weights from ./keras_frcnn/resnet50_weights_tf_dim_ordering_tf_kernels.h5


### 훈련 시작

데이터셋에 맞게 모델의 훈련을 시작합니다. 훈련 과정에서 val_loss가 최저인 weight가 자동으로 저장됩니다.

In [7]:
iter_num = 0

losses = np.zeros((epoch_len, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = np.Inf

class_mapping_inv = {v: k for k, v in class_mapping.items()}
print('Starting training')

for epoch_num in range(epochs):

    progbar = generic_utils.Progbar(epoch_len)
    print('Epoch {}/{}'.format(epoch_num + 1, epochs))

    while True:
        try:
            if len(rpn_accuracy_rpn_monitor) == epoch_len and C.verbose:
                mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor)
                rpn_accuracy_rpn_monitor = []
                print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_len))
                if mean_overlapping_bboxes == 0:
                    print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')

            # RPN 훈련
            X, Y, img_data = next(data_gen_train)

            loss_rpn = model_rpn.train_on_batch(X, Y)

            P_rpn = model_rpn.predict_on_batch(X)

            R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300)
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping)

            if X2 is None:
                rpn_accuracy_rpn_monitor.append(0)
                rpn_accuracy_for_epoch.append(0)
                continue

            neg_samples = np.where(Y1[0, :, -1] == 1)
            pos_samples = np.where(Y1[0, :, -1] == 0)

            if len(neg_samples) > 0:
                neg_samples = neg_samples[0]
            else:
                neg_samples = []

            if len(pos_samples) > 0:
                pos_samples = pos_samples[0]
            else:
                pos_samples = []
            
            rpn_accuracy_rpn_monitor.append(len(pos_samples))
            rpn_accuracy_for_epoch.append((len(pos_samples)))
            
            # 분류기 훈련
            if C.num_rois > 1:
                if len(pos_samples) < C.num_rois//2:
                    selected_pos_samples = pos_samples.tolist()
                else:
                    selected_pos_samples = np.random.choice(pos_samples, C.num_rois//2, replace=False).tolist()
                try:
                    selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist()
                except:
                    selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist()

                sel_samples = selected_pos_samples + selected_neg_samples
            else:
                selected_pos_samples = pos_samples.tolist()
                selected_neg_samples = neg_samples.tolist()
                if np.random.randint(0, 2):
                    sel_samples = random.choice(neg_samples)
                else:
                    sel_samples = random.choice(pos_samples)

            loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

            losses[iter_num, 0] = loss_rpn[1]
            losses[iter_num, 1] = loss_rpn[2]

            losses[iter_num, 2] = loss_class[1]
            losses[iter_num, 3] = loss_class[2]
            losses[iter_num, 4] = loss_class[3]

            iter_num += 1

            progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                      ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))])

            if iter_num == epoch_len:
                loss_rpn_cls = np.mean(losses[:, 0])
                loss_rpn_regr = np.mean(losses[:, 1])
                loss_class_cls = np.mean(losses[:, 2])
                loss_class_regr = np.mean(losses[:, 3])
                class_acc = np.mean(losses[:, 4])

                mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                rpn_accuracy_for_epoch = []

                if C.verbose:
                    print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
                    print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                    print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                    print('Loss RPN regression: {}'.format(loss_rpn_regr))
                    print('Loss Detector classifier: {}'.format(loss_class_cls))
                    print('Loss Detector regression: {}'.format(loss_class_regr))
                    print('Elapsed time: {}'.format(time.time() - start_time))

                curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                iter_num = 0
                start_time = time.time()

                if curr_loss < best_loss:
                    if C.verbose:
                        print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
                    best_loss = curr_loss
                    model_all.save_weights(C.model_path)

                break

        except Exception as e:
            print('Exception: {}'.format(e))
            continue

Starting training
Epoch 1/10
Mean number of bounding boxes from RPN overlapping ground truth boxes: 13.5343811395
Classifier accuracy for bounding boxes from RPN: 0.68409375
Loss RPN classifier: 2.75721625919
Loss RPN regression: 0.169774478639
Loss Detector classifier: 1.12017383945
Loss Detector regression: 0.415453916684
Elapsed time: 870.557617903
Total loss decreased from inf to 4.46261849396, saving weights
Epoch 2/10
Mean number of bounding boxes from RPN overlapping ground truth boxes: 14.7216699801
Classifier accuracy for bounding boxes from RPN: 0.6989375
Loss RPN classifier: 2.07846359299
Loss RPN regression: 0.169896317174
Loss Detector classifier: 0.872685098105
Loss Detector regression: 0.393101106316
Elapsed time: 724.95783186
Total loss decreased from 4.46261849396 to 3.51414611458, saving weights
Epoch 3/10

KeyboardInterrupt: 