In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd drive/My\ Drive/YOLOv3

/content/drive/My Drive/YOLOv3


In [3]:
from tensorflow.keras.backend import clear_session
#clear_session()
import sys, time, pickle
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, Model, metrics
from class_yolov3 import YOLOv3, decode, compute_loss
from dataset_encoder import LabelEncoder
import utils 

print(tf.__version__)
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

#----------------------------------------------------------------------------
#   Build a model(inputs, outputs)
#----------------------------------------------------------------------------
input_shape = [416, 416, 3]
inputs = layers.Input(shape=input_shape)
yolov3 = YOLOv3()               # yolov3 class instance : calls darknet53 as backbone by default
yolo_outputs = yolov3(inputs)   # [conv52, conv26, conv13]
y_preds = []
for i, conv in enumerate(yolo_outputs):
    y_pred = decode(conv, i)
    y_preds.append(y_pred)
outputs = y_preds
model = Model(inputs, outputs)
##model.summary()


2.4.1
Found GPU at: /device:GPU:0


In [4]:
#@tf.function
def train_batch(images, targets):
    # take a batch of images and targets 
    with tf.GradientTape() as tape:
        y_preds = model(images, training=True)
        y_trues = targets
        loss_cls = loss_iou = loss_conf = 0
        for i in range(3): # iteration over 3 scales : 52, 26, 13
            losses = compute_loss(y_preds[i], y_trues[i]) # y_trues[i] = label, boxes_all
            loss_cls  += losses[0]
            loss_iou  += losses[1]
            loss_conf += losses[2]
            label, _ = y_trues[i]
            train_accuracy(label, y_preds[i])  # due to the way LabelEncoder is designed
        #----------------------------------------
        total_loss = loss_cls + loss_iou + loss_conf
    gradients = tape.gradient(total_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(total_loss)
    print("=> STEP %4d   lr: %.6f   loss_cls: %4.2f   loss_iou: %4.2f   "
             "loss_conf: %4.2f   total_loss: %4.2f" %(global_steps, optimizer.lr.numpy(),
                                                      loss_cls, loss_iou,
                                                      loss_conf, total_loss))
    # update learning rate
    global_steps.assign_add(1)
    if global_steps < warmup_steps:
        lr = global_steps / warmup_steps * lr_init
    else:
        lr = lr_end + 0.5 * (lr_init - lr_end) * (
            (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))
        )
    optimizer.lr.assign(lr.numpy())
  


In [5]:
#----------------------------------------------------------------------------
#   data_split (batch) for training and vaildation
#----------------------------------------------------------------------------
opt ='train'; start = 0 ; num_samples = 2501 ; batch_size = 16
train_split = utils.dataset_split(start, num_samples, batch_size, opt)

print('len(train_split) :', len(train_split))
dataset = tfds.load(name="voc", split=train_split,  data_dir='data')


len(train_split) : 157


In [None]:
#----------------------------------------------------------------------------
#
#   setup hyperparmaters and run training
#
# SparseCategoricalAccuracy for original labels 0,1,2,3....
# CategoricalAccuracy for one-hot encoded labels
train_loss     = metrics.Mean(name='train_loss')
train_accuracy = metrics.CategoricalAccuracy(name='train_acc')
train_hist = {'train_loss':[], 'train_acc':[], 'val_loss':[],'val_acc':[]}

EPOCHS = 200 
warmup_epoch = 10
lr_init = 1e-3 ; lr_end = 5e-6
steps_per_epoch = len(dataset)
global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
warmup_steps = warmup_epoch * steps_per_epoch
total_steps = EPOCHS * steps_per_epoch
optimizer = tf.keras.optimizers.Adam()

# save data dict of history to plot
fname = 'train_history.pickle'
with open(fname, 'wb') as f:
    for epoch in range(1, EPOCHS+1):
        # show the current epoch number
        sys.stdout.flush()
        epochStart = time.time()
        
        train_loss.reset_states()
        train_accuracy.reset_states()

        train_ds = LabelEncoder(dataset, num_samples, batch_size)
        for x_batch, y_batch in train_ds:
            train_batch(x_batch, y_batch)

        # attending training process
        template = 'Epoch {}/{}, train_loss: {:0.2f}, train_acc: {:0.2f}%'
        print(template.format(epoch, EPOCHS, train_loss.result(), train_accuracy.result()*100))

        train_hist['train_loss'].append(train_loss.result())
        train_hist['train_acc'].append(train_accuracy.result())

        # timing each epoch
        epochEnd = time.time()
        elapsed = (epochEnd - epochStart) / 60.0
        print("took {:.4} minutes".format(elapsed))

        model.save_weights("yolov3_weights_iou.h5")

        pickle.dump(train_hist, f, protocol=pickle.HIGHEST_PROTOCOL)

f.close()



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
=> STEP 2421   lr: 0.000998   loss_cls: 25.88   loss_iou: 18.14   loss_conf: 17.03   total_loss: 61.05
=> STEP 2422   lr: 0.000998   loss_cls: 18.70   loss_iou: 18.08   loss_conf: 15.66   total_loss: 52.44
=> STEP 2423   lr: 0.000998   loss_cls: 24.28   loss_iou: 17.67   loss_conf: 16.44   total_loss: 58.39
=> STEP 2424   lr: 0.000998   loss_cls: 13.90   loss_iou: 10.39   loss_conf: 9.39   total_loss: 33.69
=> STEP 2425   lr: 0.000998   loss_cls: 19.33   loss_iou: 13.68   loss_conf: 10.83   total_loss: 43.84
=> STEP 2426   lr: 0.000998   loss_cls: 27.40   loss_iou: 18.88   loss_conf: 15.96   total_loss: 62.24
=> STEP 2427   lr: 0.000998   loss_cls: 22.81   loss_iou: 11.14   loss_conf: 9.83   total_loss: 43.78
=> STEP 2428   lr: 0.000998   loss_cls: 30.55   loss_iou: 21.36   loss_conf: 18.84   total_loss: 70.75
=> STEP 2429   lr: 0.000998   loss_cls: 15.37   loss_iou: 11.52   loss_conf: 11.21   total_loss: 38.10
=> STEP 24