In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from maptd_model import maptd_model
from pipeline_v2 import get_dataset

In [None]:
TILE_SIZE = 512
BATCH_SIZE = 1
TRAINING_STEPS = 2000
img_dir = 'D:/Gerasimos/Toponym_Recognition/MapTD_General/MapTD/data/general_dataset/images'
json_dir = 'D:/Gerasimos/Toponym_Recognition/MapTD_General/MapTD/data/general_dataset/json'

In [None]:
def dice_coefficient( y_true_cls, y_pred_cls, training_mask):
    """
    Compute Dice loss from Sorensen-Dice coefficient. See Eq. (1) Weinman et al.
    (ICDAR 2019) and Milletari et al. (3DV 2016).

    Parameters
      y_true_cls   : binary ground truth score map (1==text, 0==non-text), 
                       [batch_size, tile_size/4, tile_size/4, 1]
      y_pred_cls   : predicted score map in range (0,1), same size as y_true_cls
      training_mask: binary tensor to indicate which locations should be included 
                       in the calculation, same size as y_true_cls
    Returns
      loss: scalar tensor between 0 and 1
    """
    eps = 1e-5 # added for numerical stability
    intersection = tf.math.reduce_sum(y_true_cls * y_pred_cls * training_mask)
    union = tf.math.reduce_sum( tf.square(y_true_cls) * training_mask) + \
            tf.math.reduce_sum( tf.square(y_pred_cls) * training_mask) + eps
    loss = 1. - (2 * intersection / union)
    #tf.summary.scalar('classification_dice_loss', loss, family='train/losses')
    return loss

In [None]:
def calculate_loss(y_true_cls, y_pred_cls,
         y_true_geo, y_pred_geo,
         training_mask):
    # NOTE: Gerasimos changed the name of the function from 'loss' to 'calculate_loss
    '''
    Compute total loss as the weighted sum of score loss (given by a
    Dice loss), rbox loss (defined as an IoU loss), and angle loss
    (i.e., cosine loss).  See Eq. (6) in Weinman et al. (ICDAR 2019).

    Parameters
     y_true_cls   : binary ground truth score map (1==text, 0==non-text), 
                    [batch_size,tile_size/4,tile_size/4, 1]
     y_pred_cls   : predicted score map in range (0,1), same size as y_true_cls
     y_true_geo   : ground truth box geometry map with shape 
                      [batch_size,tile_size/4,tile_size/4, 5]
     y_pred_geo   : predicted box geometry map, same size as y_true_geo
     training_mask: binary tensor to indicate which locations should be included 
                      in loss the calculations, same size as y_true_cls

    Returns
     total_loss: a scalar

    '''
    classification_loss = dice_coefficient(y_true_cls, y_pred_cls, training_mask)
    # scale classification loss to match the iou loss part
    classification_loss *= 0.01

    # d1 -> top, d2->right, d3->bottom, d4->left
    d1_gt, d2_gt, d3_gt, d4_gt, theta_gt = tf.split(
        value=y_true_geo,
        num_or_size_splits=5,
        axis=3)
    
    d1_pred, d2_pred, d3_pred, d4_pred, theta_pred = tf.split(
        value=y_pred_geo,
        num_or_size_splits=5,
        axis=3)
    
    area_gt = (d1_gt + d3_gt) * (d2_gt + d4_gt)
    area_pred = (d1_pred + d3_pred) * (d2_pred + d4_pred)
    
    w_union = tf.math.minimum(d2_gt, d2_pred) + tf.math.minimum(d4_gt, d4_pred)
    h_union = tf.math.minimum(d1_gt, d1_pred) + tf.math.minimum(d3_gt, d3_pred)
    
    area_intersect = w_union * h_union
    area_union = area_gt + area_pred - area_intersect
    
    L_AABB = -tf.math.log((area_intersect + 1.0)/(area_union + 1.0))
    L_theta = 1 - tf.math.cos(theta_pred - theta_gt)
    
    #tf.summary.scalar('geometry_AABB',
                      #tf.reduce_mean(L_AABB * y_true_cls * training_mask),
                      #family='train/losses')
    #tf.summary.scalar('geometry_theta',
                      #tf.reduce_mean(L_theta * y_true_cls * training_mask),
                      #family='train/losses')
    
    L_g = L_AABB + 20 * L_theta

    total_loss = tf.math.reduce_mean(L_g * y_true_cls * training_mask) \
                 + classification_loss

    return total_loss

In [None]:
def show_score_map(tile, gt_score_map, pred_score_map, threshold=None):
    pred_score_map = np.where(pred_score_map > threshold, 1, 0)
    COLORMAP = 'gray'
    fig, ax = plt.subplots(1, 3)
    ax[0].imshow(np.squeeze(tf.cast(tile, tf.uint8)))
    ax[1].imshow(np.squeeze(gt_score_map), cmap=COLORMAP)
    ax[2].imshow(np.squeeze(pred_score_map), cmap=COLORMAP)
    plt.show()
    
    

In [None]:
ds = get_dataset(img_dir, json_dir, '*', tile_size=TILE_SIZE, batch_size=BATCH_SIZE)

In [None]:
model = maptd_model(input_size=TILE_SIZE)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
loss_history = []
for step, (tile, score_map, geo_map, training_mask) in \
            ds.repeat().take(TRAINING_STEPS).enumerate():
    with tf.GradientTape() as tape:
        pred_score_map, pred_geo_map = model(tile, training=True)
        
        loss = calculate_loss(score_map, pred_score_map, geo_map, 
                              pred_geo_map, training_mask)
        loss_history.append(loss)
        
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
        if ((step.numpy() + 1) % 5) == 0:
            show_score_map(tile, score_map, pred_score_map, threshold=0.8)
        

        
        

In [None]:
loss = calculate_loss(score_maps, pred_score_map, geo_maps, pred_geo_map, training_mask)

In [None]:
score_maps.shape

In [None]:
step.numpy()