Imports

In [None]:
import os, re, time, json
import PIL.Image, PIL.ImageFont, PIL.ImageDraw
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
import tensorflow_datasets as tfds
import cv2
data_dir = 'TF3 C3 W1 Data'


Visualization of Images

In [None]:
def draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax, color=(255,0,0), thickness=5):
    image_width = image.shape[1]
    image_height = image.shape[0]
    cv2.rectangle(image, (int(xmin), int(ymin), int(xmax), int(ymax)), color, thickness)
    
def draw_bounding_boxes_on_image(image, boxes, color=[], thickness=5):
    boxes_shape = boxes.shape
    for i in range(boxes_shape[0]):
        draw_bounding_box_on_image(image, boxes[i,1], boxes[i,0], boxes[i,3], boxes[i,2], color[i], thickness)
        
def draw_bounding_boxes_on_image_array(image, boxes, color=[], thickness=5):
    draw_bounding_boxes_on_image(image,boxes,color, thickness)
    
    return image


Data and visualization

In [None]:
plt.rc('image', cmap='gray')
plt.rc('grid', linewidth=0)
plt.rc('xtick', top=False, bottom = False, labelsize = 'large')
plt.rc('ytick', left=False, right=False, labelsize = 'large')
plt.rc('axes', facecolor='F8F8F8', titlesize="large", edgecolor='white')
plt.rc('text', color='a8151a')
plt.rc('figure', facecolor='F0F0F0')

In [None]:
MATPLOTLIB_FONT_DIR = os.path.join(os.path.dirname(plt.__file__), "mpl-data/fonts/ttf") # doubt in this line

def display_digits_with_boxes(images,pred_bboxes, bboxes, iou, title, bboxes_normalized=False):
    n = len(images)
    
    fig = plt.figure(figsize=(20,4))
    plt.title(title)
    plt.yticks([])
    plt.xticks([])
    
    for i in range(n):
      ax = fig.add_subplot(1, 10, i+1)
      bboxes_to_plot = []
      if (len(pred_bboxes) > i):
        bbox = pred_bboxes[i]
        bbox = [bbox[0] * images[i].shape[1], bbox[1] * images[i].shape[0], bbox[2] * images[i].shape[1], bbox[3] * images[i].shape[0]]
        bboxes_to_plot.append(bbox)
    
      if (len(bboxes) > i):
        bbox = bboxes[i]
        if bboxes_normalized == True:
          bbox = [bbox[0] * images[i].shape[1],bbox[1] * images[i].shape[0], bbox[2] * images[i].shape[1], bbox[3] * images[i].shape[0] ]
        bboxes_to_plot.append(bbox)

      img_to_draw = draw_bounding_boxes_on_image_array(image=images[i], boxes=np.asarray(bboxes_to_plot), color=[(255,0,0), (0, 255, 0)])
      plt.xticks([])
      plt.yticks([])
    
      plt.imshow(img_to_draw)
      
      if len(iou) > i :
        color = "black"
        if (iou[i][0] < iou_threshold):
          color = "red"
        ax.text(0.2, -0.3, "iou: %s" %(iou[i][0]), color=color, transform=ax.transAxes)
        
def plot_metrics(metric_name, title, ylim=5):
    plt.title(title)
    plt.ylim(0,ylim)
    plt.plot(history.history[metric_name],color='blue', label=metric_name)
    plt.plot(history.history['val_' + metric_name], color='green', label='val_'+metric_name)

Pre-Processing of Images

In [None]:
def read_image_tfds(image, bbox):
    image = tf.cast(image, tf.float32)
    shape = tf.shape(image)

    factor_x = tf.cast(shape[1], tf.float32)
    factor_y = tf.cast(shape[0], tf.float32)

    image = tf.image.resize(image, (224, 224,))

    image = image/127.5
    image -= 1

    bbox_list = [bbox[0] / factor_x , 
                 bbox[1] / factor_y, 
                 bbox[2] / factor_x , 
                 bbox[3] / factor_y]
    
    return image, bbox_list

In [None]:
def read_image_with_shape(image,bbox):
    original_image = image
    image, bbox_list = read_image_tfds(image, bbox)
    
    return original_image, image, bbox_list

In [None]:
def read_image_tfds_with_original_bbox(data):
    image = data["image"]
    bbox = data["bbox"]
    
    shape = tf.shape(image)
    factor_x = tf.cast(shape[1], tf.float32)
    factor_y = tf.cast(shape[0], tf.float32)
    
    bbox_list = [bbox[1] * factor_x,
                 bbox[0] * factor_y,
                 bbox[3] * factor_x,
                 bbox[2] * factor_y]
    return image, bbox_list

In [None]:
def dataset_to_numpy_util(dataset, batch_size=0, N=0):
    take_dataset = dataset.shuffle(1024)
    
    if batch_size > 0:
        take_dataset = take_dataset.batch(batch_size)
    if N > 0:
        take_dataset = take_dataset.take(N)
    
    if tf.executing_eagerly():
        ds_images, ds_boxes = [], []
        for images, bboxes in take_dataset:
            ds_images.append(images.numpy())
            ds_boxes.append(bboxes.numpy())
    
    return (np.array(ds_images), np.array(ds_boxes))

In [None]:
def dataset_to_numpy_with_original_bboxes_util(dataset, batch_size=0, N=0):
    normalized_dataset = dataset.map(read_image_with_shape)
    if batch_size > 0:
        normalized_dataset = normalized_dataset.batch(batch_size)
        
    if N>0:
        normalized_dataset = normalized_dataset.take(N)
        
    if tf.executing_eagerly():
        ds_original_images, ds_images, ds_bboxes = [], [], []
    for original_images, images, bboxes in normalized_dataset:
        ds_original_images.append(original_images.numpy())
        ds_images.append(images.numpy())
        ds_bboxes.append(bboxes.numpy())
        
    return (np.array(ds_original_images), np.array(ds_images), np.array(ds_bboxes))

In [None]:
def get_visualization_training_dataset():
    dataset , info = tfds.load("caltech_birds2010", split="train", with_info=True, data_dir=data_dir, download=False)
    visualization_training_dataset = dataset.map(read_image_tfds_with_original_bbox, num_parallel_calls=16)
    
    return visualization_training_dataset

visualization_training_dataset = get_visualization_training_dataset()

In [None]:
(visualization_training_images, visualization_training_bboxes) = dataset_to_numpy_util(visualization_training_dataset, N=10)
display_digits_with_boxes(np.array(visualization_training_images), np.array([]),np.array(visualization_training_bboxes), np.array([]), "training images and their bboxes")