In [1]:
# https://github.com/MhLiao/DB
# https://github.com/zonasw/DBNet
# https://github.com/xuannianz/DifferentiableBinarization
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
APPROACH_NAME = 'DBNet'

# Check GPU working

In [2]:
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0': raise SystemError('GPU device not found')
print('Found GPU at:', device_name)
!nvcc -V

Found GPU at: /device:GPU:0
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Nov_30_19:15:10_Pacific_Standard_Time_2020
Cuda compilation tools, release 11.2, V11.2.67
Build cuda_11.2.r11.2/compiler.29373293_0


# Data input pipeline

In [3]:
BATCH_SIZE = 4
IMAGE_SIZE = 640
THRESH_MIN = 0.3
THRESH_MAX = 0.7
SHRINK_RATIO = 0.4

In [4]:
from loader import DataImporter, DBNetDataGenerator
dataset = DataImporter('Datasets', pattern='*.txt')
print(dataset)

Samples count (1 image can have multiple bounding boxes):
- Number of images found: 951
- Number of image bounding boxes: 951
- Number of bounding boxes in all images: 15741


In [5]:
train_img_paths, all_train_bboxes, valid_img_paths, all_valid_bboxes = dataset.split(0.9)
print('Number of training samples:', len(train_img_paths))
print('Number of validate samples:', len(valid_img_paths))

Number of training samples: 855
Number of validate samples: 96


In [6]:
train_generator = DBNetDataGenerator(
    train_img_paths, all_train_bboxes, BATCH_SIZE, IMAGE_SIZE, 
    THRESH_MIN, THRESH_MAX, SHRINK_RATIO
)
valid_generator = DBNetDataGenerator(
    valid_img_paths, all_valid_bboxes, BATCH_SIZE, IMAGE_SIZE, 
    THRESH_MIN, THRESH_MAX, SHRINK_RATIO, False
)

# Define the model

In [7]:
from models import DBNet
dbnet = DBNet()
dbnet.model.summary(line_length=120)

Model: "DBNet"
________________________________________________________________________________________________________________________
 Layer (type)                          Output Shape               Param #       Connected to                            
 image (InputLayer)                    [(None, None, None, 3)]    0             []                                      
                                                                                                                        
 conv1 (Conv2D)                        (None, None, None, 64)     9408          ['image[0][0]']                         
                                                                                                                        
 bn_conv1 (BatchNormalization)         (None, None, None, 64)     256           ['conv1[0][0]']                         
                                                                                                                        
 conv1_relu (Acti

                                                                                                                        
 padding2c_branch2b (ZeroPadding2D)    (None, None, None, 64)     0             ['res2c_branch2a_relu[0][0]']           
                                                                                                                        
 res2c_branch2b (Conv2D)               (None, None, None, 64)     36864         ['padding2c_branch2b[0][0]']            
                                                                                                                        
 bn2c_branch2b (BatchNormalization)    (None, None, None, 64)     256           ['res2c_branch2b[0][0]']                
                                                                                                                        
 res2c_branch2b_relu (Activation)      (None, None, None, 64)     0             ['bn2c_branch2b[0][0]']                 
                                

 res3c_branch2a (Conv2D)               (None, None, None, 128)    65536         ['res3b_relu[0][0]']                    
                                                                                                                        
 bn3c_branch2a (BatchNormalization)    (None, None, None, 128)    512           ['res3c_branch2a[0][0]']                
                                                                                                                        
 res3c_branch2a_relu (Activation)      (None, None, None, 128)    0             ['bn3c_branch2a[0][0]']                 
                                                                                                                        
 padding3c_branch2b (ZeroPadding2D)    (None, None, None, 128)    0             ['res3c_branch2a_relu[0][0]']           
                                                                                                                        
 res3c_branch2b (Conv2D)        

 res4a (Add)                           (None, None, None, 1024)   0             ['bn4a_branch2c[0][0]',                 
                                                                                 'bn4a_branch1[0][0]']                  
                                                                                                                        
 res4a_relu (Activation)               (None, None, None, 1024)   0             ['res4a[0][0]']                         
                                                                                                                        
 res4b_branch2a (Conv2D)               (None, None, None, 256)    262144        ['res4a_relu[0][0]']                    
                                                                                                                        
 bn4b_branch2a (BatchNormalization)    (None, None, None, 256)    1024          ['res4b_branch2a[0][0]']                
                                

                                                                                                                        
 res4d (Add)                           (None, None, None, 1024)   0             ['bn4d_branch2c[0][0]',                 
                                                                                 'res4c_relu[0][0]']                    
                                                                                                                        
 res4d_relu (Activation)               (None, None, None, 1024)   0             ['res4d[0][0]']                         
                                                                                                                        
 res4e_branch2a (Conv2D)               (None, None, None, 256)    262144        ['res4d_relu[0][0]']                    
                                                                                                                        
 bn4e_branch2a (BatchNormalizati

 res5a_branch1 (Conv2D)                (None, None, None, 2048)   2097152       ['res4f_relu[0][0]']                    
                                                                                                                        
 bn5a_branch2c (BatchNormalization)    (None, None, None, 2048)   8192          ['res5a_branch2c[0][0]']                
                                                                                                                        
 bn5a_branch1 (BatchNormalization)     (None, None, None, 2048)   8192          ['res5a_branch1[0][0]']                 
                                                                                                                        
 res5a (Add)                           (None, None, None, 2048)   0             ['bn5a_branch2c[0][0]',                 
                                                                                 'bn5a_branch1[0][0]']                  
                                

 out4_up (UpSampling2D)                (None, None, None, 256)    0             ['out4[0][0]']                          
                                                                                                                        
 out3 (Add)                            (None, None, None, 256)    0             ['in3[0][0]',                           
                                                                                 'out4_up[0][0]']                       
                                                                                                                        
 in2 (ConvBnRelu)                      (None, None, None, 256)    66816         ['res2c_relu[0][0]']                    
                                                                                                                        
 out3_up (UpSampling2D)                (None, None, None, 256)    0             ['out3[0][0]']                          
                                

# Callbacks

In [8]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Stop if no improvement after 5 epochs
early_stopping_callback = EarlyStopping(patience=5, restore_best_weights=True, verbose=1)

# Reduce the learning rate once learning stagnates
reduce_lr_callback = ReduceLROnPlateau(
    monitor = 'val_loss', 
    patience = 2, # Reduce if no improvement after 2 epochs
    min_lr = 1e-6, # Lower bound on the learning rate 
    factor = 0.5, # => new_lr = lr * factor
    verbose = 1
)

# Training

In [9]:
from tensorflow.keras.optimizers import Adam
from losses import DBLoss
LEARNING_RATE = 2e-4
EPOCHS = 100
dbnet.compile(optimizer=Adam(LEARNING_RATE), loss=DBLoss())

In [None]:
%%time
history = dbnet.fit(
    train_generator,
    validation_data = valid_generator,
    validation_steps = len(valid_generator),
    steps_per_epoch = len(train_generator),
    epochs = EPOCHS,
    callbacks = [reduce_lr_callback, early_stopping_callback],
    verbose = 1
).history

Epoch 1/100
Epoch 2/100
  9/214 [>.............................] - ETA: 57s - loss: 1.3553