<a href="https://colab.research.google.com/github/emilyrlong/oddy-test/blob/main/Dissertation_1_8_Visualising_Box_Labels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dissertation: Visualising the Box Labels

Using the trained models to plot predictions of box labels on images.

In [None]:
# Connect colab to Google Drive
from google.colab import drive
drive.mount('/content/drive')

## **Step 1**: Installation


In [None]:
# !pip install tensorflow
import tensorflow as tf
print(tf.__version__)

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

In [None]:
#!pip install tensorflow-gpu
#device_name = tf.test.gpu_device_name()
#if device_name != '/device:GPU:0':
#  raise SystemError('GPU device not found')
#print('Found GPU at: {}'.format(device_name))

In [None]:
# Testing whether or not we're on a high memory RAM
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Install the Tensorflow 2 [Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection):

In [None]:
# uncomment the next line if you want to delete an existing models directory
!rm -rf ./models/

# clone the Tensorflow Model Garden
!git clone --depth 1 https://github.com/tensorflow/models/

In [None]:
# install the Object Detection API
!cd models/research/ && protoc object_detection/protos/*.proto --python_out=. && cp object_detection/packages/tf2/setup.py . && python -m pip install .

## **Step 2**: Import Packages

Let's now import the packages you will use in this assignment.

In [None]:
!pip install dfply
from dfply import *

In [None]:
import matplotlib
import matplotlib.pyplot as plt

import os
import random
import zipfile
import io
import scipy.misc
import numpy as np
import pandas as pd

import glob
import imageio
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display, Javascript
from IPython.display import Image as IPyImage

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

# import tensorflow as tf
tf.get_logger().setLevel('ERROR')

### **Step 2.1**: Import Object Detection API packages

In [None]:
### START CODE HERE (Replace Instances of `None` with your code) ###
# import the label map utility module
from object_detection.utils import label_map_util

# import module for reading and updating configuration files.
from object_detection.utils import config_util

# import module for visualization. use the alias `viz_utils`
from object_detection.utils import visualization_utils as viz_utils

# import module for building the detection model
from object_detection.builders import model_builder
### END CODE HERE ###

# import module for utilities in Colab
from object_detection.utils import colab_utils

In [None]:
from object_detection.utils import visualization_utils as viz_utils

## **Step 3**: Import and Clean Label Data
The labeller MakeSense.ai outputted (xmin, ymin, xdiff, ydiff) where xdiff and ydiff are equal to the difference between the minimum and maximum coordinates, so we need to make some new columns.

In [None]:
# Load in the csv from the labels folder in drive
label_df = pd.read_csv('/content/drive/MyDrive/Dissertation/labels/Fulldata_Aug12.csv')
# label_df = pd.read_csv('/content/drive/MyDrive/Dissertation/labels/UnanimousAug22.csv')

In [None]:
label_df

### **Step 3.1**: Getting Integer Class Values
We need to make a column with the mapped integer values for the classes.

In [None]:
# Load label map from file
# Function found here: https://github.com/tensorflow/models/blob/master/research/object_detection/utils/label_map_util.py
label_map = label_map_util.load_labelmap('/content/drive/MyDrive/Dissertation/labels/StringIntLabelMap.pbtxt')

In [None]:
# Convert to dictionary
label_dict = label_map_util.get_label_map_dict(label_map,use_display_name=True)
label_dict

In [None]:
# Map the label dictionary to a column to populate the corresponding class integer values
# https://kanoki.org/2019/04/06/pandas-map-dictionary-values-with-dataframe-columns/
label_df['classInt'] = label_df['class'].map(label_dict)

### **Step 3.2**: Define the category index dictionary + NumClasses


In [None]:
# define a dictionary describing the corrosion classes
category_index = {
    1 : {
        'id'  : 1, 
        'name': 'Ag-P'
    },
    2 : {
        'id'  : 2,
        'name': 'Ag-T'
    },
    3 : {
        'id'  : 3,
        'name': 'Ag-U'
    },
    4 : {
        'id'  : 4,
        'name': 'Cu-P'
    },
    5 : {
        'id'  : 5,
        'name': 'Cu-T'
    },
    6 : {
        'id'  : 6,
        'name': 'Cu-U'
    },
    7 : {
        'id'  : 7,
        'name': 'Pb-P'
    },
    8 : {
        'id'  : 8,
        'name': 'Pb-T'
    },
    9 : {
        'id'  : 9,
        'name': 'Pb-U'
    }
}

In [None]:
# Testing grabbing the categories
print(category_index[1])

In [None]:
# Specify the number of classes that the model will predict
num_classes = 9

## **Step 4**: Defining Functions

You'll define a couple of utility functions for loading images and plotting detections. This code is provided for you.

### **Function 1**: `plot_detections`

In [None]:
def plot_detections(image_np,
                    boxes,
                    classes,
                    scores,
                    category_index,
                    figsize=(12, 16),
                    image_name=None):
    """Wrapper function to visualize detections.

    Args:
    image_np: uint8 numpy array with shape (img_height, img_width, 3)
    boxes: a numpy array of shape [N, 4]
    classes: a numpy array of shape [N]. Note that class indices are 1-based,
          and match the keys in the label map.
    scores: a numpy array of shape [N] or None.  If scores=None, then
          this function assumes that the boxes to be plotted are groundtruth
          boxes and plot all boxes as black with no classes or scores.
    category_index: a dict containing category dictionaries (each holding
          category index `id` and category name `name`) keyed by category indices.
    figsize: size for the figure.
    image_name: a name for the image file.
    """
    
    image_np_with_annotations = image_np.copy()
    
    viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_annotations,
        boxes,
        classes,
        scores,
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=10,
        min_score_thresh=0.3,
        line_thickness = 10)
    
    if image_name:
        plt.imsave(image_name, image_np_with_annotations)
    
    else:
        plt.imshow(image_np_with_annotations)

### **Function 2**: `load_npy_set`

Get the training images from the Google Drive folder and their file names. The images are quite large, so the step which converts them into numpy arrays will take a while.


In [None]:
# A FUNCTION FOR LOADING IMAGES
def load_npy_set(npy_dir):
    """Load a folder of numpy arrays corresponding to images.
    Args: npy_dir - a path to folder of training, validation, or test images. 
    Returns: images_np - a list of the numpy array versions of the images
    """
    # Getting list of npy files
    files = os.listdir(npy_dir)
    # Starting an empty list for the npy arrays
    images_np = []
    # For loop to add each file (npy array) to the image list
    for idx, file in enumerate(files):
      npy_path = os.path.join(npy_dir,file)
      test_img = np.load(npy_path)
      images_np.append(test_img)
      if idx % 10 == 0:
        print('Loading',str(idx),':',file)
    # When finished, print message and return 
    print('Done Loading!')
    return images_np, files

### **Function 3**: `box_lister`
Converting the box coordinates and class labels into a list of numpy arrays. These can be visualised on top of the images and further converted into tensors. 

In [None]:
def box_lister(files):
    # Define a list of ground truth boxes
    gt_boxes = []
    # Define a list of class integers
    classes = []
    # For loop to iterate over the file names
    for file in files:
      # Need to change 'npy' extension to 'jpg'
      file = file.replace('npy','jpg')
      # A smaller dataframe to hold the labels for that particular image 
      image_labels = label_df[label_df['filename']==file]
      # Adding error message for if an image doesn't have any labels
      if len(image_labels) == 0:
        print('Error: file ' + file + ' has no corresponding labels')
        continue
      # Image height
      height = np.unique(image_labels['height'].to_numpy())[0] 
      # Image width
      width = np.unique(image_labels['width'].to_numpy())[0] 
      # Box array: (ymin, xmin, ymax, xmax)
      box_arr = image_labels[['ymin','xmin','ymax','xmax']].to_numpy()
      # Normalizing boxes by width and height
      box_arr = np.divide(box_arr, [height,width,height,width])
      # Appending new array to box list
      gt_boxes.append(box_arr)
      # Getting the class integers as an array and adding to list
      classes.append(image_labels['classInt'].to_numpy())
    return gt_boxes, classes

### **Function 4:** `data_preprocess`
Need some data preprocessing so it is formatted properly for the model:
- Convert the class labels to one-hot representations
- Convert everything (i.e. train images, gt boxes and class labels) to tensors.

In [None]:
def data_preprocess(train_images_np, gt_boxes, classes):
    # The label_id_offset to shift classes to the zeroth index.
    label_id_offset = 1
    # List for image tensors
    train_image_tensors = []
    # lists containing the one-hot encoded classes and ground truth boxes
    gt_classes_one_hot_tensors = []
    gt_box_tensors = []
    # Loop to convert the image numpy arrays, box coordinates, and classes
    for (train_image_np, gt_box_np, class_np) in zip(train_images_np, gt_boxes, classes):
        # convert training image to tensor, add batch dimension, and add to list
        train_image_tensors.append(tf.expand_dims(tf.convert_to_tensor(train_image_np, dtype=tf.float32), axis=0))
        # convert numpy array to tensor, then add to list
        gt_box_tensors.append(tf.convert_to_tensor(gt_box_np, dtype=tf.float32))
        # apply offset to have zero-indexed ground truth classes
        zero_indexed_groundtruth_classes = tf.convert_to_tensor(class_np - label_id_offset)
        # do one-hot encoding to ground truth classes
        gt_classes_one_hot_tensors.append(tf.one_hot(zero_indexed_groundtruth_classes, num_classes))
    print('Done prepping data.')
    return train_image_tensors, gt_box_tensors, gt_classes_one_hot_tensors

### **Function 5**: `whole_image_prep`
This function calls to summarise functions 2 - 4 into one line of code. It loads and converts images, boxes, and classes into multiple formats.

In [None]:
def whole_image_prep(npy_dir):
  print('Starting Image Loading:')
  # Loading the image numpy arrays into a list and 
  images_np, files = load_npy_set(npy_dir)
  print('Starting box coordinate and class lists:')
  # Converting csv box coordinates and classes into numpy arrays and lists
  gt_boxes, classes = box_lister(files)
  print('Converting images, boxes, and classes to tensors:')
  # Preprocessing images, boxes, and classes into (one hot) tensors 
  image_T, gt_box_T, gt_classes_OHT = data_preprocess(images_np, gt_boxes, classes)
  return images_np, files, gt_boxes, classes, image_T, gt_box_T, gt_classes_OHT

### **Function 6**: `plot_image_sample`

In [None]:
def plot_image_sample(images_np,gt_boxes,classes):
    ''' Function to plot a eight images to double check box placements, etc. 
    '''
    %matplotlib inline
    # define the figure size
    plt.figure(figsize=(15, 7))
    # using the plot_detections function to draw the ground truth boxes
    for idx in range(8):
        plt.subplot(2, 4, idx+1)
        plot_detections(
          images_np[idx],
          gt_boxes[idx],
          classes[idx],
          np.ones(classes[idx].shape), # scores set to 1
          category_index = category_index,
        )
    plt.show()

## **Step 5:** Loading and Testing Data

In [None]:
# LOADING VALIDATION SET
val_dir = '/content/drive/MyDrive/Dissertation/new_val_npy'
# val_dir = '/content/drive/MyDrive/Dissertation/non_met_images/unanimous_npy'
val_images_np, val_files, val_gt_boxes, val_classes, val_image_T, val_gt_box_T, val_gt_classes_OHT = whole_image_prep(val_dir)
# 191 images in 3m 7s, then 4m 49s, then 5m 6s
# 220 images at 960 x 640 in 2m 12s

In [None]:
# LOADING TEST DATA SET
test_npy_dir = '/content/drive/MyDrive/Dissertation/new_test_npy'
# Use the function load_image_set to load in the test set as a list of numpy arrays
test_images_np, test_files, test_gt_boxes, test_classes, test_image_T, test_gt_box_T, test_gt_classes_OHT = whole_image_prep(test_npy_dir)
# 190 test data only took 2m 55s to load!
# 960 x 640: 380 images took 3m 41s to load and process!

In [None]:
plot_image_sample(val_images_np,val_gt_boxes,val_classes)

## **Step 6**: Configure the model

In [None]:
# Clears old models
tf.keras.backend.clear_session()

# EfficientDet V1:
# pipeline_config = '/content/drive/MyDrive/Dissertation/models_workspace/eff_det/v1/eff_det_d1_pipeline_v1_test.config'

# EfficientDet V3:
pipeline_config = '/content/drive/MyDrive/Dissertation/models_workspace/eff_det/v3/eff_det_d1_pipeline_5.config'

# Load the configuration file into a dictionary
configs = config_util.get_configs_from_pipeline_file(pipeline_config)

In [None]:
# Read in the object stored at the key 'model' of the configs dictionary
model_config = configs['model']

In [None]:
# Use the model_builder build function from the config above
detection_model = model_builder.build(model_config = model_config, is_training = True)

## **Step 7:** Build the Model with Checkpoints

In [None]:
# checkpoint_path = '/content/drive/MyDrive/Dissertation/models_workspace/eff_det/v1/ckpt-39'
checkpoint_path = '/content/drive/MyDrive/Dissertation/models_workspace/eff_det/v3/ckpt-49'

# Define a checkpoint
checkpoint = tf.compat.v2.train.Checkpoint(model=detection_model)

# Restore the checkpoint to the checkpoint path
checkpoint.restore(checkpoint_path).expect_partial()

Run a dummy image through the model so that variables are created.

In [None]:
# use the detection model's `preprocess()` method and pass a dummy image
tmp_image, tmp_shapes = detection_model.preprocess(tf.zeros([1, 640, 640, 3]))

# run a prediction with the preprocessed image and shapes
tmp_prediction_dict = detection_model.predict(tmp_image, tmp_shapes)

# postprocess the predictions into final detections
tmp_detections = detection_model.postprocess(tmp_prediction_dict, tmp_shapes)

print('Weights restored!')

## **Step 8**: Define detect and IoU functions to process images

Define a function that returns the detection boxes, classes, and scores.

In [None]:
# Uncomment this decorator if you want to run inference eagerly
@tf.function
def detect(input_tensor):
    """Run detection on an input image.

    Args:
    input_tensor: A [1, height, width, 3] Tensor of type tf.float32.
      Note that height and width can be anything since the image will be
      immediately resized according to the needs of the model within this
      function.

    Returns:
    A dict containing 3 Tensors (`detection_boxes`, `detection_classes`,
      and `detection_scores`).
    """
    preprocessed_image, shapes = detection_model.preprocess(input_tensor)
    prediction_dict = detection_model.predict(preprocessed_image, shapes)
    # use the detection model's postprocess() method to get the the final detections
    detections = detection_model.postprocess(prediction_dict, shapes)
    
    return detections

In [None]:
def intersection_over_union(pred_box, true_box):
    # Split the predicted box array into separate values: (ymin, xmin, ymax, xmax)
    ymin_pred, xmin_pred, ymax_pred, xmax_pred = np.split(pred_box, 4)
    ymin_true, xmin_true, ymax_true, xmax_true = np.split(true_box, 4)

    smoothing_factor = 1e-10

    xmin_overlap = np.maximum(xmin_pred, xmin_true)
    xmax_overlap = np.minimum(xmax_pred, xmax_true)
    ymin_overlap = np.maximum(ymin_pred, ymin_true)
    ymax_overlap = np.minimum(ymax_pred, ymax_true)

    pred_box_area = (xmax_pred - xmin_pred) * (ymax_pred - ymin_pred)
    true_box_area = (xmax_true - xmin_true) * (ymax_true - ymin_true)

    overlap_area = np.maximum((xmax_overlap - xmin_overlap), 0)  * np.maximum((ymax_overlap - ymin_overlap), 0)
    union_area = (pred_box_area + true_box_area) - overlap_area
    
    iou = (overlap_area + smoothing_factor) / (union_area + smoothing_factor)

    return iou

## **Step 9**: Analysis

### Method 1: Finding boxes that match the ground truth
Looks like the model is definitely outputting at least one correct bounding box per ground truth coupon, but the scores are not necessarily the highest per coupon. 

In [None]:
# We want the boxes that have a high IoU and the same calsses as the ground truth

# Run an image through the model
i = 40
detection_test = detect(val_image_T[i])
gt_classes = val_classes[i]
gt_boxes = val_gt_boxes[i]

# Get predicted classes, scores, and boxes
class_array = detection_test['detection_classes'][0].numpy().astype('int') + 1
scores = detection_test['detection_scores'][0].numpy()
new_boxes = detection_test['detection_boxes'][0].numpy()

# Dataframe for the matched up boxes
box_data = {'GT_BoxNum':[0],'Pred_BoxNum':[0],'GT_Class':[0],'Pred_Class':[0],'IoU':[0],'Scores':[0]}
box_data = pd.DataFrame(box_data)

# Iterating over the unique values in the ground truth array
for j, gt in enumerate(gt_classes):
  # Iterating over the predicted class values
  for k, pc in enumerate(class_array):
    # If the ground truth (gt) and predicted class (pc) labels are the same, find IoU
    if gt == pc:
      pred_box = new_boxes[k]
      true_box = gt_boxes[j]
      iou = np.round(intersection_over_union(pred_box, true_box),decimals=3)
      # If the boxes intersect, add to the dataframe
      if iou > 0.7:
        box_data = box_data.append({'GT_BoxNum':j, 'Pred_BoxNum':k,
                                    'GT_Class':gt_classes[j], 'Pred_Class':class_array[k],
                                    'IoU': iou, 'Scores':scores[k]}, ignore_index=True)

# Drop the row of all zeros from box_data
box_data = box_data.drop([0])
# Sort by ground truth box number
box_data = box_data.sort_values(['GT_BoxNum','Scores'], ascending = [1,0])

In [None]:
box_data

In [None]:
box_data['Pred_BoxNum'].to_numpy()

### Method 2: Finding the breakdown of scores for each ground truth coupon

In [None]:
np.round(intersection_over_union(pred_box, true_box),decimals=3)[0]

In [None]:
# We want to find all the boxes that have a high IoU with the ground truth

# Run an image through the model
i = 70
detection_test = detect(val_image_T[i])
gt_classes = val_classes[i]
gt_boxes = val_gt_boxes[i]

# Get predicted classes, scores, and boxes
class_array = detection_test['detection_classes'][0].numpy().astype('int') + 1
scores = detection_test['detection_scores'][0].numpy()
new_boxes = detection_test['detection_boxes'][0].numpy()

# Dataframe for the matched up boxes
box_data = {'GT_BoxNum':[0],'Pred_BoxNum':[0],'GT_Class':[0],'Pred_Class':[0],'IoU':[0],'Scores':[0]}
box_data = pd.DataFrame(box_data)

# Iterating over the unique values in the ground truth array
for j, true_box in enumerate(gt_boxes):
  # Iterating over the predicted class values
  for k, pred_box in enumerate(new_boxes):
    # Calculate the IoU for these two boxes
    iou = np.round(intersection_over_union(pred_box, true_box),decimals=3)[0]
    # If the IoU is high and the scores are above a certain threshold, add info to the dataframe
    if (iou > 0.8) and (scores[k] > 0.1):
      box_data = box_data.append({'GT_BoxNum':j, 'Pred_BoxNum':k,
                                  'GT_Class':gt_classes[j], 'Pred_Class':class_array[k],
                                  'IoU': iou, 'Scores':scores[k]}, ignore_index=True)

# Drop the row of all zeros from box_data
box_data = box_data.drop([0])
# Sort by ground truth box number
box_data = box_data.sort_values(['GT_BoxNum','Scores'], ascending = [1,0])

In [None]:
box_data

### Method 3: Getting the percentage of the highest scoring predictions that are correct for each ground truth coupon

In [None]:
def find_max_scores_for_gt(i,val_image_T,val_classes,val_gt_boxes):
  # Get the ground truth classes and boxes for this image
  gt_classes = val_classes[i]
  gt_boxes = val_gt_boxes[i]
  # Run the image through the model
  detection_test = detect(val_image_T[i])

  # Get predicted classes, scores, and boxes
  class_array = detection_test['detection_classes'][0].numpy().astype('int') + 1
  scores = detection_test['detection_scores'][0].numpy()
  new_boxes = detection_test['detection_boxes'][0].numpy()

  # Dataframe for the matched up boxes
  box_data = {'GT_BoxNum':[0],'Pred_BoxNum':[0],'GT_Class':[0],'Pred_Class':[0],'IoU':[0],'Scores':[0]}
  box_data = pd.DataFrame(box_data)

  # Iterating over the unique values in the ground truth array
  for j, true_box in enumerate(gt_boxes):
    # Iterating over the predicted class values
    for k, pred_box in enumerate(new_boxes):
      # Calculate the IoU for these two boxes
      iou = np.round(intersection_over_union(pred_box, true_box),decimals=3)[0]
      # If the IoU is high and the scores are above a certain threshold, add info to the dataframe
      if (iou > 0.6) and (scores[k] > 0.1):
        box_data = box_data.append({'GT_BoxNum':j, 'Pred_BoxNum':k,
                                    'GT_Class':gt_classes[j], 'Pred_Class':class_array[k],
                                    'IoU': iou, 'Scores':scores[k]}, ignore_index=True)

  # Drop the row of all zeros from box_data
  box_data = box_data.drop([0])
  # Sort by ground truth box number
  box_data = box_data.sort_values(['GT_BoxNum','Scores'], ascending = [1,0])

  # If there aren't any overlapping boxes, output an empty max dataframe
  if box_data.shape[0] == 0: 
    max_vals = pd.DataFrame({'GT_BoxNum':[0],'Scores':[0],'Pred_BoxNum':[0],'GT_Class':[0],'Pred_Class':[1],'IoU':[0]})
    max_vals = max_vals.drop([0])
    print('Image ' + str(i) + ' did not have any suitable predictions')
  # Otherwise find the maximum values per box
  else:
    # Finding the max score for each GT box
    max_vals = (box_data >>
      group_by(X.GT_BoxNum) >>
      summarize(Scores = X.Scores.max()))
    # Merging with the full dataset to get all of the info
    max_vals = max_vals.merge(box_data, on=['GT_BoxNum','Scores'], how='left')
    
  return detection_test, max_vals

In [None]:
# Iterating over the full validation set
cor_data = {'FileNum':[0],'Correct':[0],'NumGT':[0],'NumPred':[0]}
cor_data = pd.DataFrame(cor_data)
for i in range(len(val_classes)):
  detection_test, max_vals = find_max_scores_for_gt(i,val_image_T,val_classes,val_gt_boxes)
  # We divide by val_classes[i].shape[0] because that's the number of ground truth boxes
  cor_num = sum(max_vals['GT_Class'] == max_vals['Pred_Class'])/val_classes[i].shape[0]
  # ^ There may be coupons that don't receive a prediction from this output
  cor_data = cor_data.append({'FileNum':i, 'Correct':cor_num,
                              'NumGT':val_classes[i].shape[0],
                              'NumPred':max_vals.shape[0]}, ignore_index=True)

# Drop the row of all zeros from box_data
cor_data = cor_data.drop([0])
# Turn data into percentages
cor_data['Correct'] = np.round(cor_data['Correct']*100)

In [None]:
cor_data

In [None]:
cor_data[cor_data['NumGT'] != cor_data['NumPred']]

In [None]:
mean(cor_data['Correct'])

In [None]:
for i in np.unique(cor_data['Correct']):
  count = sum(cor_data['Correct'] == i)
  print(str(i) + ': ' + str(count))

In [None]:
sum(cor_data['Correct'] < 100)

### Iterating Over Test Set

In [None]:
# Iterating over the full validation set
cor_test = {'FileNum':[0],'Correct':[0],'NumGT':[0],'NumPred':[0]}
cor_test = pd.DataFrame(cor_test)
for i in range(len(test_classes)):
  detection_test, max_vals = find_max_scores_for_gt(i,test_image_T,test_classes,test_gt_boxes)
  # We divide by val_classes[i].shape[0] because that's the number of ground truth boxes
  cor_num = sum(max_vals['GT_Class'] == max_vals['Pred_Class'])/test_classes[i].shape[0]
  # ^ There may be coupons that don't receive a prediction from this output
  cor_test = cor_test.append({'FileNum':i, 'Correct':cor_num,
                              'NumGT':test_classes[i].shape[0],
                              'NumPred':max_vals.shape[0]}, ignore_index=True)

# Drop the row of all zeros from box_data
cor_test = cor_test.drop([0])
# Turn data into percentages
cor_test['Correct'] = np.round(cor_test['Correct']*100)

In [None]:
cor_test[cor_test['NumGT'] != cor_test['NumPred']]

In [None]:
mean(cor_test['Correct'])

In [None]:
for i in np.unique(cor_test['Correct']):
  count = sum(cor_test['Correct'] == i)
  print(str(i) + ': ' + str(count))

In [None]:
sum(cor_test['Correct'] < 100)

### Method 4: Displaying images with the Exceptions in EfficientDet V2

In [None]:
'''
# For validation
exceptions = cor_data[cor_data['Correct'] < 100]
exc_list = exceptions['FileNum'].to_numpy().astype('int')
exceptions
'''
# For testing
exceptions = cor_test[cor_test['Correct'] < 100]
exc_list = exceptions['FileNum'].to_numpy().astype('int')
exceptions

In [None]:
exceptions.shape

#### Code for the Test Set:

In [None]:
# For Validation - Outputting the Summarised Predictions 
i = exc_list[18]
# Get the predictions for this exception image
detection_test, max_vals = find_max_scores_for_gt(i,test_image_T,test_classes,test_gt_boxes)
# The index numbers of the predicted boxes we want to use
pred_nums = max_vals['Pred_BoxNum'].to_numpy().astype('int')

# Adding one to the array of predicted classes to get 1-9 classes
class_array = detection_test['detection_classes'][0].numpy().astype(np.uint32) + 1
class_array = class_array[pred_nums]
det_boxes = detection_test['detection_boxes'][0].numpy()[pred_nums]
det_scores = detection_test['detection_scores'][0].numpy()[pred_nums]

%matplotlib inline

input_tensor = val_image_T[i]
# define the figure size
plt.figure(figsize=(80, 50))
# using the plot_detections function to draw the ground truth boxes
plot_detections(
    test_images_np[i],
    det_boxes,
    class_array,
    det_scores,
    category_index = category_index,
)
plt.show()

In [None]:
# Plotting the Ground Truth Values for a particular test
i = exc_list[18]

%matplotlib inline
# define the figure size
plt.figure(figsize=(80, 50))
plot_detections(
    test_images_np[i],
    test_gt_boxes[i],
    test_classes[i],
    np.ones(test_classes[i].shape), # scores set to 1
    category_index = category_index,
    )
plt.show()

#### Code for the Validation Set:

In [None]:
# For Validation - Outputting the Summarised Predictions 
i = 7 # i = exc_list[0]
# Get the predictions for this exception image
detection_test, max_vals = find_max_scores_for_gt(i,val_image_T,val_classes,val_gt_boxes)
# The index numbers of the predicted boxes we want to use
pred_nums = max_vals['Pred_BoxNum'].to_numpy().astype('int')

# Adding one to the array of predicted classes to get 1-9 classes
class_array = detection_test['detection_classes'][0].numpy().astype(np.uint32) + 1
class_array = class_array[pred_nums]
det_boxes = detection_test['detection_boxes'][0].numpy()[pred_nums]
det_scores = detection_test['detection_scores'][0].numpy()[pred_nums]

%matplotlib inline

input_tensor = val_image_T[i]
# define the figure size
plt.figure(figsize=(80, 50))
# using the plot_detections function to draw the ground truth boxes
plot_detections(
    val_images_np[i],
    det_boxes,
    class_array,
    det_scores,
    category_index = category_index,
)
plt.show()

In [None]:
# Plotting the Ground Truth Values for a particular test
i = exc_list[42]

%matplotlib inline
# define the figure size
plt.figure(figsize=(80, 50))
plot_detections(
    val_images_np[i],
    val_gt_boxes[i],
    val_classes[i],
    np.ones(val_classes[i].shape), # scores set to 1
    category_index = category_index,
    )
plt.show()

In [None]:
i = exc_list[5]

%matplotlib inline
# define the figure size
plt.figure(figsize=(80, 50))
plot_detections(
    val_images_np[i],
    detection_test['detection_boxes'][0].numpy(),
    detection_test['detection_classes'][0].numpy().astype(np.uint32) + 1,
    detection_test['detection_scores'][0].numpy(), # scores set to 1
    category_index = category_index,
    )
plt.show()

In [None]:
detection_test['detection_classes'][0].numpy().astype(np.uint32) + 1

In [None]:
# Iterating over the full validation set
cor_data = {'FileNum':[0],'Correct':[0]}
cor_data = pd.DataFrame(cor_data)
for i in range(len(val_classes)):
  max_vals = find_max_scores_for_gt(i,val_image_T,val_classes,val_gt_boxes)
  cor_num = sum(max_vals['GT_Class'] == max_vals['Pred_Class'])/max_vals.shape[0]
  cor_data = cor_data.append({'FileNum':i, 'Correct':cor_num}, ignore_index=True)

# Drop the row of all zeros from box_data
cor_data = cor_data.drop([0])
# Turn data into percentages
cor_data['Correct'] = np.round(cor_data['Correct']*100)

### Method 5: Getting a contingency table for overall P/T/U classifications

In [None]:
def find_overall_class_pred(i,val_image_T,val_classes,val_gt_boxes):
  # Get the ground truth classes and boxes for this image
  gt_classes = val_classes[i]
  gt_boxes = val_gt_boxes[i]
  # Run the image through the model
  detection_test = detect(val_image_T[i])

  # Get predicted classes, scores, and boxes
  class_array = detection_test['detection_classes'][0].numpy().astype('int') + 1
  scores = detection_test['detection_scores'][0].numpy()
  new_boxes = detection_test['detection_boxes'][0].numpy()

  # Dataframe for the matched up boxes
  box_data = {'GT_BoxNum':[0],'Pred_BoxNum':[0],'GT_Class':[0],'Pred_Class':[0],'IoU':[0],'Scores':[0]}
  box_data = pd.DataFrame(box_data)

  # Iterating over the unique values in the ground truth array
  for j, true_box in enumerate(gt_boxes):
    # Iterating over the predicted class values
    for k, pred_box in enumerate(new_boxes):
      # Calculate the IoU for these two boxes
      iou = np.round(intersection_over_union(pred_box, true_box),decimals=3)[0]
      # If the IoU is high and the scores are above a certain threshold, add info to the dataframe
      if (iou > 0.6) and (scores[k] > 0.1):
        box_data = box_data.append({'GT_BoxNum':j, 'Pred_BoxNum':k,
                                    'GT_Class':gt_classes[j], 'Pred_Class':class_array[k],
                                    'IoU': iou, 'Scores':scores[k]}, ignore_index=True)

  # Drop the row of all zeros from box_data
  box_data = box_data.drop([0])
  # Sort by ground truth box number
  box_data = box_data.sort_values(['GT_BoxNum','Scores'], ascending = [1,0])

  # If there aren't any overlapping boxes, output an empty max dataframe
  if box_data.shape[0] == 0: 
    max_vals = pd.DataFrame({'GT_BoxNum':[0],'Scores':[0],'Pred_BoxNum':[0],'GT_Class':[0],'Pred_Class':[1],'IoU':[0]})
    max_vals = max_vals.drop([0])
    print('Image ' + str(i) + ' did not have any suitable predictions')
  # Otherwise find the maximum values per box
  else:
    # Finding the max score for each GT box
    max_vals = (box_data >>
      group_by(X.GT_BoxNum) >>
      summarize(Scores = X.Scores.max()))
    # Merging with the full dataset to get all of the info
    max_vals = max_vals.merge(box_data, on=['GT_BoxNum','Scores'], how='left')
  
  # - - - Adding a section to find the overall P/T/U ratings for the GT and Predicted Classes - - - 

  # A dictionary to convert class integers to P/T/U ratings
  reverse_dict = {1:'P', 2:'T', 3:'U', 4:'P', 5:'T', 6:'U', 7:'P', 8:'T', 9:'U'}
  # Mapping classes for ground truth and predicted classes
  max_vals['GT_OC'] = max_vals['GT_Class'].map(reverse_dict)
  max_vals['Pred_OC'] = max_vals['Pred_Class'].map(reverse_dict)
  # Getting the maximum value, which happens to be the worst potential corrosion level P < T < U
  GT_OC = max(max_vals['GT_OC'])
  Pred_OC = max(max_vals['Pred_OC'])
  
  return GT_OC, Pred_OC

Code for test classes:

In [None]:
# Create a dataframe for the overall classes
dataO = {'FileNum':[0],'GT_OC':[0],'Pred_OC':[0]}
dataO = pd.DataFrame(dataO)
# Iterate over the test set
for i in range(len(test_classes)):
  GT_OC, Pred_OC = find_overall_class_pred(i,test_image_T,test_classes,test_gt_boxes)
  # Add overall classes to the dataframe
  dataO = dataO.append({'FileNum':i, 'GT_OC':GT_OC,'Pred_OC':Pred_OC}, ignore_index=True)

# Drop the row of all zeros from box_data
dataO = dataO.drop([0])

# Get contingency table for the classes
data_crosstab = pd.crosstab(dataO['GT_OC'],
                            dataO['Pred_OC'], 
                               margins = False)

In [None]:
data_crosstab

Code for validation classes:

In [None]:
# Create a dataframe for the overall classes
dataO = {'FileNum':[0],'GT_OC':[0],'Pred_OC':[0]}
dataO = pd.DataFrame(dataO)
# Iterate over the test set
for i in range(len(val_classes)): 
  GT_OC, Pred_OC = find_overall_class_pred(i,val_image_T,val_classes,val_gt_boxes)
  # Add overall classes to the dataframe
  dataO = dataO.append({'FileNum':i, 'GT_OC':GT_OC,'Pred_OC':Pred_OC}, ignore_index=True)

# Drop the row of all zeros from box_data
dataO = dataO.drop([0])

# Get contingency table for the classes
data_crosstab = pd.crosstab(dataO['GT_OC'],
                            dataO['Pred_OC'], 
                               margins = False)

In [None]:
dataO

### Method 6: Getting contingency tables for class-specific classifications

In [None]:
def find_class_pred(image_T,classes,gt_boxes):
  # Start a dataframe for the contingency table data
  con_data = {'FileNum':[0],'GT_Class':[0],'Pred_Class':[0]}
  con_data = pd.DataFrame(con_data)
  # Iterating over the full image set
  for i in range(len(classes)): # 
    # Get the bounding box predictions
    detection_test, max_vals = find_max_scores_for_gt(i,image_T,classes,gt_boxes)
    # We divide by val_classes[i].shape[0] because that's the number of ground truth boxes
    predictions = max_vals[['GT_Class','Pred_Class']]
    for j in range(predictions.shape[0]):
      # ^ There may be coupons that don't receive a prediction from this output
      con_data = con_data.append({'FileNum':i, 'GT_Class': predictions['GT_Class'][j],
                                  'Pred_Class':predictions['Pred_Class'][j]}, ignore_index=True)

  # Drop the row of all zeros from box_data
  con_data = con_data.drop([0])

  # A dictionary to convert class integers to P/T/U ratings
  reverse_dict = {1:'Ag-P', 2:'Ag-T', 3:'Ag-U', 4:'Cu-P', 5:'Cu-T', 6:'Cu-U', 7:'Pb-P', 8:'Pb-T', 9:'Pb-U'}
  # Mapping classes for ground truth and predicted classes
  con_data['GT_OC'] = con_data['GT_Class'].map(reverse_dict)
  con_data['Pred_OC'] = con_data['Pred_Class'].map(reverse_dict)

  # Get contingency table for the classes
  data_crosstab = pd.crosstab(con_data['GT_OC'], con_data['Pred_OC'], margins = False)

  return data_crosstab

In [None]:
val_con = find_class_pred(val_image_T,val_classes,val_gt_boxes)

In [None]:
val_con

In [None]:
test_con = find_class_pred(test_image_T,test_classes,test_gt_boxes)

In [None]:
test_con

In [None]:
test_con['Ag-P'][0] + test_con['Ag-T'][1] + test_con['Ag-U'][2] + test_con['Cu-P'][3] + test_con['Cu-T'][4] + test_con['Cu-U'][5] + test_con['Pb-P'][6] + test_con['Pb-T'][7] + test_con['Pb-U'][8]

In [None]:
test_npy = test_con.to_numpy()
test_total = sum(sum(test_npy))
error_count = sum(test_npy[test_npy < 10])
error_p = error_count / test_total

In [None]:
error_p