<a href="https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/object_detection_for_image_cropping/multitaxa/multitaxa_generate_crops_tf2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Use Faster-RCNN ResNet 50 and Inception v2 in Tensorflow to automatically crop images of snakes & lizards (Squamata), beetles (Coleoptera), frogs (Anura), and carnivores (Carnivora)
---   
*Last Updated 15 March 2023*  
-Now runs in Python 3 with Tensorflow 2.0-     

Use trained object detection models to automatically crop images of snakes & lizards (Squamata), beetles (Coleoptera), frogs (Anura), and carnivores (Carnivora) to square dimensions centered around animal(s). 

Models were trained and saved to Google Drive in [multitaxa_train_tf2_rcnns.ipynb](https://github.com/aubricot/computer_vision_with_eol_images/blob/master/object_detection_for_image_cropping/multitaxa/multitaxa_train_tf2_rcnns.ipynb).

***Models were trained in Python 2 and TF 1 in April 2020: Faster RCNN ResNet 50 trained for 12 hours to 200,000 steps and Faster RCNN Inception v2 for 18 hours to 200,000 steps.***

Notes:   
* Before you you start: change the runtime to "GPU" with "High RAM"
* Change parameters using form fields on right (/where you see 'TO DO' in code)
* For each 24 hour period on Google Colab, you have up to 12 hours of free GPU access. 

References:     
* [Official Tensorflow Object Detection API Instructions](https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/training.html)   
* [Medium Blog on training using Tensorflow Object Detection API in Colab](https://medium.com/analytics-vidhya/training-an-object-detection-model-with-tensorflow-api-using-google-colab-4f9a688d5e8b)

## Installs & Imports
---

In [None]:
#@title Choose where to save results & set up directory structure
# Use dropdown menu on right
save = "in Colab runtime (files deleted after each session)" #@param ["in my Google Drive", "in Colab runtime (files deleted after each session)"]

# Mount google drive to export image cropping coordinate file(s)
if 'Google Drive' in save:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)

# Type in the path to your project wd in form field on right
basewd = "/content/drive/MyDrive/train" #@param ["/content/drive/MyDrive/train"] {allow-input: true}
# Type in the folder that you want to contain TF2 files
folder = "tf2" #@param ["tf2"] {allow-input: true}
# Define current working directory using form field inputs
cwd = basewd + '/' + folder

# Install dependencies
!pip3 install --upgrade gdown
!gdown 1fIEf387CNrWk0ziPY-ltvwN9VrRXrRkY # Download helper_funcs folder
!tar -xzvf helper_funcs.tar.gz -C .
!pip install -r requirements.txt

In [None]:
# Set up directory structure and clone TF Object Detection API
!python setup.py $cwd

# Build TF Object Detection API
%cd $cwd
!cd tf_models/models/research/ && protoc object_detection/protos/*.proto --python_out=. && cp object_detection/packages/tf2/setup.py . && python -m pip install .

In [None]:
# For object detection
import tensorflow as tf 
import tensorflow_hub as hub
import sys
sys.path.append("tf_models/models/research/")
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# For downloading and displaying images
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import tempfile
import urllib
from six.moves.urllib.request import urlopen
from six import BytesIO
from collections import defaultdict
from io import StringIO
from IPython.display import display

# For drawing onto images
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring inference time
import time

# For working with data
import subprocess
import sys
import numpy as np
import pandas as pd
import os
import pathlib
import csv
import tarfile
import zipfile

# Print Tensorflow version
print('\nTensorflow Version: %s' % tf.__version__)

# Check available GPU devices
print('The following GPU devices are available: %s' % tf.test.gpu_device_name())

## Generate cropping coordinates for images
---
Run EOL 20k image bundles through pre-trained object detection models and save results in 4 batches (A-D). 

### Prepare object detection functions and settings

In [None]:
#@title Choose model and parameters for running inference
%matplotlib inline
sys.path.append('/content')
from wrangle_data import read_datafile, display_image

# Use EOL pre-trained model for object detection or your own custom trained model?
model = "pre-trained EOL model" #@param ["pre-trained EOL model", "my custom model"]

# Use EOL pre-trained model & download needed files
if 'EOL' in model: 
    PATH_TO_CKPT = 'tf_models/train_demo/rcnn_i/finetuned_model' + '/frozen_inference_graph.pb'
    if not os.path.exists(PATH_TO_CKPT):
        # Download labelmap.pbtxt
        !gdown 1DAwX6gj77r3YHLEgaTJgfn8VMLIf3ruw
        # Download frozen_inference_graph.pb
        !mkdir -p tf_models/train_demo/rcnn_i/finetuned_model
        %cd tf_models/train_demo/rcnn_i/finetuned_model
        !gdown 1hb4LI9nq1eHGQZ1d84Rj4VQpeu0UUPQZ
        PATH_TO_CKPT = 'tf_models/train_demo/rcnn_i/finetuned_model' + '/frozen_inference_graph.pb'
        %cd $cwd

# Use your own custom trained model
else:
    # Change path to saved model checkpoint
    my_custom_model_path = "tf_models/train_demo/rcnn_i/finetuned_model" #@param {type:"string"}
    PATH_TO_CKPT = my_custom_model_path + '/frozen_inference_graph.pb'

# Label Map for model (maps model output # to text label)
# Note: You can modify "filter" to choose detection results for any class of interest the model is trained on
filters = ["Anura", "Carnivora", "Coleoptera", "Squamata"] #@param ["[\"Anura\", \"Carnivora\", \"Coleoptera\", \"Squamata\"]"] {type:"raw", allow-input: true}
PATH_TO_LABELS = "labelmap.pbtxt" #@param {type:"string"}
NUM_CLASSES = 4 #@param

# Define functions

# Restore frozen detection graph (trained model)    
print("\nLoading trained model from: \n", PATH_TO_CKPT)
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.compat.v1.GraphDef()
    with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

print("\nLoading label map for {} class(es) from: \n{}".format(NUM_CLASSES, PATH_TO_LABELS))        
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# For handling bounding boxes
def draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax,
                               color, font, thickness=4, display_str_list=()):
    """Adds a bounding box to an image."""
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
    draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)], width=thickness, fill=color)

    # Adjust display string placement if out of bounds
    display_str_heights = [font.getbbox(ds)[3]-font.getbbox(ds)[1] for ds in display_str_list]
    # Each display_str has a top and bottom margin of 0.05x.
    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = top + total_display_str_height
    # Reverse list and print from bottom to top.
    for ds in display_str_list[::-1]:
        text_height = font.getbbox(ds)[3] - font.getbbox(ds)[1]
        text_width = font.getbbox(ds)[2] - font.getbbox(ds)[0]
        margin = np.ceil(0.05 * text_height)
        draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
        draw.text((left + margin, text_bottom - text_height - margin),
                  ds, fill="black", font=font)
        text_bottom -= text_height - 2 * margin

# TO DO: Set the maximum number of detections to keep per image
max_boxes = 10 #@param {type:"slider", min:0, max:100, step:10}

# TO DO: Set the minimum confidence score for detections to keep per image
min_score = 0.1 #@param {type:"slider", min:0, max:0.9, step:0.1}

def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):
    """Overlay labeled boxes on an image with formatted scores and label names."""
    if max_boxes:
        max_boxes = max_boxes
    if min_score:
        min_score = min_score
    colors = list(ImageColor.colormap.values())

    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf",
                              25)
    except IOError:
        print("Font not found, using default font.")
        font = ImageFont.load_default()
    # Draw up to N-max boxes with confidence > score threshold
    for i in range(0, max_boxes):
        if scores[0][i] >= min_score:
            ymin, xmin, ymax, xmax = tuple(boxes[0][i])
            display_str = "{}: {}%".format(category_index[class_names[0][i]]['name'],
                                     int(100 * scores[0][i]))
            color = colors[hash(class_names[0][i]) % len(colors)]
            image_pil = Image.fromarray(np.squeeze(image))
            
        # Only the filtered class is shown on images
        if any(fil in display_str for fil in filters):
            draw_bounding_box_on_image(
                image_pil,
                ymin, xmin, ymax, xmax,
                color, font, display_str_list=[display_str])
            np.copyto(image, np.array(image_pil))

    return image[0]

# For uploading an image from url
# Modified from https://www.pyimagesearch.com/2015/03/02/convert-url-to-image-with-python-and-opencv/
def url_to_image(url):
    resp = urllib.request.urlopen(url)
    image = np.asarray(bytearray(resp.read()), dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_np = np.expand_dims(image, axis=0)
    im_h, im_w = image.shape[:2]
  
    return image_np, im_h, im_w

# Define start and stop indices in EOL bundle for running inference   
def set_start_stop(run):
    # To test with a tiny subset, use 5 random bundle images
    if "tiny subset" in run:
        start=np.random.choice(a=1000, size=1)[0]
        stop=start+5
    # To run inference on 4 batches of 5k images each
    elif "_a." in outfpath: # batch a is from 0-5000
        start=0
        stop=5000
    elif "_b." in outfpath: # batch b is from 5000-1000
        start=5000
        stop=10000
    elif "_c." in outfpath: # batch c is from 10000-15000
        start=10000
        stop=15000
    elif "_d." in outfpath: # batch d is from 15000-20000
        start=15000
        stop=20000
    
    return start, stop

# For running inference
def run_detector_tf(image_url):
    image_np, im_h, im_w = url_to_image(image_url)
    with detection_graph.as_default():
        with tf.compat.v1.Session(graph=detection_graph) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')

            # Actual detection
            start_time = time.time()
            result = sess.run([detection_boxes, detection_scores, 
                               detection_classes, num_detections],
                               feed_dict={image_tensor: image_np})
            end_time = time.time()
            
            result = {"detection_boxes": result[0], "detection_scores": result[1],
                      "detection_classes": result[2], "num_detections": result[3]}
            print("Found %d objects with > %s confidence" % (min(result["num_detections"], max_boxes), min_score))
            print("Inference time: %s sec" % format(end_time-start_time, '.2f'))
      
            # Draw detection boxes on image
            image_with_boxes = draw_boxes(image_np, result["detection_boxes"],
                                  result["detection_classes"], result["detection_scores"])

            # Export bounding boxes to file in Google Drive
            with open(outfpath, 'a') as out_file:
                tsv_writer = csv.writer(out_file, delimiter='\t')
                img_id = os.path.splitext((os.path.basename(image_url)))[0]
                # Write one row per detected object with bounding box coordinates
                num_detections = min(int(result["num_detections"][0]), max_boxes)
                for i in range(0, num_detections):
                    class_name = category_index[result["detection_classes"][0][i]]['name']
                    if any(fil in class_name for fil in filters): # Only writes rows for filtered class
                        ymin = result["detection_boxes"][0][i][0]
                        xmin = result["detection_boxes"][0][i][1]
                        ymax = result["detection_boxes"][0][i][2]
                        xmax = result["detection_boxes"][0][i][3]
                        tsv_writer.writerow([img_id, class_name, 
                                  xmin, ymin, xmax, ymax, im_w, im_h, image_url])
      
    return image_with_boxes

print('\nModel loaded and functions defined! \nGo to next steps to run inference on images.')

#### Test: Run inference on a couple images from URLs

In [None]:
# TO DO: Type in image URLs 1-3 using form fields to right
url_1 = "https://content.eol.org/data/media/80/26/c6/542.5058695994.jpg" #@param {type:"string"}
url_2 = "https://content.eol.org/data/media/80/16/40/542.4836766484.jpg" #@param {type:"string"}
url_3 = "https://content.eol.org/data/media/9f/84/64/776.27478384.jpg" #@param {type:"string"}
image_urls = [url_1, url_2, url_3]

# Display detection results on images
display_results = True #@param {type:"boolean"}

# Set temporary outfile for tagging results
outfpath = "temp_outfile.tsv"

# Loop through EOL image bundle to add bounding boxes to images
print("Running inference on images\n")
for im_num, image_url in enumerate(image_urls, start=1):
  try:
    image_wboxes = run_detector_tf(image_url)
    if display_results:
        display_image(image_wboxes)
    # Display progress message after each image
    print('Inference complete for image {} of {}\n'.format(im_num, len(image_urls)))

  except:
    print('Check if URL from {} is valid\n'.format(image_url))
  
  os.remove(outfpath) # Delete temporary outfile

### Generate crops: Run inference on EOL images & save results for cropping - Run 4X for batches A-D
Use 20K EOL Anura, Carnivora, Coleoptera, Squamata image bundles to get bounding boxes of detected animals. Results are saved to [crops_file].tsv. Run this section 4 times (to make batches A-D) of 5K images each to incrementally save in case of Colab timeouts.

In [None]:
#@title Enter EOL image bundle to run inference on
# So URL's don't get truncated in display
pd.set_option('display.max_colwidth',1000)

# Read in EOL image bundle dataframe
# Type in image bundle address using form field to right
bundle = "https://editors.eol.org/other_files/bundle_images/files/images_for_Squamata_20K_breakdown_download_000001.txt" #@param ["https://editors.eol.org/other_files/bundle_images/files/images_for_Squamata_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Coleoptera_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Anura_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Carnivora_20K_breakdown_download_000001.txt"]
df = read_datafile(bundle, sep='\n', header=None)
df.columns = ['url']
print('\n EOL image bundle head:\n{}'.format(df.head()))

In [None]:
#@title Enter path for saving results and output crops file name (change **crops_file** for each batch A-D)

# Write header row of output tagging file
basewd = "/content/drive/MyDrive/train/tf2/results/" #@param ["/content/drive/MyDrive/train/results/"] {allow-input: true}
crops_file = "multitaxa_cropcoords_tf2_a" #@param ["multitaxa_cropcoords_tf2_a", "multitaxa_cropcoords_tf2_b", "multitaxa_cropcoords_tf2_c", "multitaxa_cropcoords_tf2_d"] {allow-input: true}
outfpath = basewd + crops_file.rsplit('_',1)[0] + '_rcnn' + '_' + crops_file.rsplit('_',1)[1] + '.tsv'
print('\n Cropping file *batch {}* will be saved to:\n{}'.format(crops_file.rsplit('_',1)[1], outfpath))

# Write header row of output tag file
with open(outfpath, 'a') as out_file:
                  tsv_writer = csv.writer(out_file, delimiter='\t')
                  tsv_writer.writerow(["img_id", "class_name", 
                            "xmin", "ymin", "xmax", "ymax", "im_width", "im_height", "url"])

In [None]:
#@title Choose settings to run inference on image batches A-D

# Test pipeline with a smaller subset than 5k images?
run = "test with tiny subset" #@param ["test with tiny subset", "for all images"]

# Display detection results on images?
display_results = "yes (use this option if testing tiny subsets; only works for \u003C50 images)" #@param ["yes (use this option if testing tiny subsets; only works for \u003C50 images)", "no (use this option if running batches)"]

# Loop through EOL image bundle to add bounding boxes to images
print("Running inference on images")
start, stop = set_start_stop(run)
for i, row in enumerate(df.iloc[start:stop].iterrows()):
    try:
        image_wboxes = run_detector_tf(df['url'][i])
        if (i+1<=50) and display_results:
            display_image(image_wboxes)
    
        # Display progress message after each image
        print('{}) Inference complete for image {} of {}\n'.format(row[0], i+1, (stop-start)))

    except:
        print('Check if URL from {} is valid\n'.format(df['url'][i]))

print("Run inference using these two code blocks for all batches A-D before proceeding\n")

## Post-process detection results
--- 
Combine output files for batches A-D. Then, convert detection boxes into square, centered thumbnail cropping coordinates.

#### Merge batch output files A-D

In [None]:
#@title Enter path to any inference result batch file A-D

# So URL's don't get truncated in display
pd.set_option('display.max_colwidth',1000)
pd.options.display.max_columns = None

# If you just ran "Generate crops" above, you do not need to enter anything
# If you ran "Generate crops" during a previous session, enter the path for ONE output file
if 'outfpath' not in locals() or globals():
    outfpath = "/content/drive/MyDrive/train/tf2/results/multitaxa_cropcoords_tf2_d.tsv" #@param ["/content/drive/MyDrive/train/tf2/results/multitaxa_cropcoords_tf2_rcnn_i_d.tsv"] {allow-input: true}

# Combine 4 batches of detection box coordinates to one dataframe
base_path =  os.path.splitext(outfpath)[0].rsplit('_',1)[0] + '_'
exts = ['a.tsv', 'b.tsv', 'c.tsv', 'd.tsv']
all_filenames = [base_path + e for e in exts]
df = pd.concat([pd.read_csv(f, sep='\t', header=0, na_filter = False) for f in all_filenames], ignore_index=True)

# Write results to tsv
print("New concatenated dataframe with all 4 batches: \n", df.head())
concat_outfpath = base_path + 'concat.tsv'
df.to_csv(concat_outfpath, sep='\t', index=False)

#### Combine individual detection boxes into one "superbox" per image

In [None]:
# Define functions

from functools import reduce
from urllib.error import HTTPError
# So URL's don't get truncated in display
pd.set_option('display.max_colwidth',1000)
pd.options.display.max_columns = None

# Convert normalized detection coordinates (scaled to 0,1) to pixel values
def denormalize_coords(crops):
    crops.xmin = crops.xmin * crops.im_width
    crops.ymin = crops.ymin * crops.im_height
    crops.xmax = crops.xmax * crops.im_width
    crops.ymax = crops.ymax * crops.im_height
    # Round results to 2 decimal places
    crops.round(2)
    #print("De-normalized cropping coordinates: \n", crops.head())

    return crops

# For images with >1 detection, make a 'super box' that containings all boxes
def make_superboxes(crops):
    # Get superbox coordinates that contain all detection boxes per image
    xmin = pd.DataFrame(crops.groupby(['url'])['xmin'].min()) # smallest xmin
    ymin = pd.DataFrame(crops.groupby(['url'])['ymin'].min()) # smallest ymin
    xmax = pd.DataFrame(crops.groupby(['url'])['xmax'].max()) # largest xmax
    ymax = pd.DataFrame(crops.groupby(['url'])['ymax'].max()) # largest ymax

    # Workaround to get im_height, im_width and class in same format as 'super box' coords
    # There is only one value for im_height and im_width, so taking max will return unchanged values
    im_h = pd.DataFrame(crops.groupby(['url'])['im_height'].max())
    im_w = pd.DataFrame(crops.groupby(['url'])['im_width'].max())
    im_class = pd.DataFrame(crops.groupby(['url'])['class_name'].max())
  
    # Make list of superboxes
    superbox_list = [im_h, im_w, xmin, ymin, xmax, ymax, im_class]

    # Make a new dataframe with 1 superbox per image
    superbox_df = reduce(lambda  left, right: pd.merge(left, right, on=['url'],
                                            how='outer'), superbox_list)
    #print("Cropping dataframe, 1 superbox per image: \n", crops_unq.head())

    return superbox_df

# Add EOL img identifying info from breakdown file to cropping data
def add_identifiers(*, bundle_info, crops):
    # Get dataObjectVersionIDs, identifiers, and eolMediaURLS from indexed cols
    ids = bundle_info.iloc[:, np.r_[0:2,-2]]
    ids.set_index('eolMediaURL', inplace=True, drop=True)
    #print("Bundle identifying info head: \n", ids.head())

    # Set up superboxes df for mapping to bundle_info
    superboxes.reset_index(inplace=True)
    superboxes.rename(columns={'url': 'eolMediaURL'}, inplace=True)
    superboxes.set_index('eolMediaURL', inplace=True, drop=True)

    # Map dataObjectVersionIDs to crops_unq using eolMediaURL as the index
    crops_w_identifiers = pd.DataFrame(superboxes.merge(ids, left_index=True, right_index=True))
    crops_w_identifiers.reset_index(inplace=True)
    print("\n Crops with added EOL identifiers: \n", crops_w_identifiers.head())
  
    return crops_w_identifiers

In [None]:
#@title Enter EOL image bundle used for running inference

# For images with >1 detection, make a 'super box' that containings all boxes

# Read in crop file exported from "Combine output files A-D" block above
crops = read_datafile(concat_outfpath, sep='\t', header=0, disp_head=False)

# De-normalize cropping coordinates to pixel values
crops = denormalize_coords(crops)

# Make 1 superbox per image [coordinates: bottom left (smallest xmin, ymin) and top right (largest xmax, ymax)]
superboxes = make_superboxes(crops)

# Read in EOL image "breakdown" bundle dataframe from "breakdown_download" bundle used for cropping
if 'bundle' not in locals() or globals():
    bundle = "https://editors.eol.org/other_files/bundle_images/files/images_for_Squamata_20K_breakdown_download_000001.txt" #@param ["https://editors.eol.org/other_files/bundle_images/files/images_for_Squamata_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Coleoptera_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Anura_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Carnivora_20K_breakdown_download_000001.txt"]
breakdown = bundle.replace("download_", "") # Get EOL breakdown bundle url from "breakdown_download" address
bundle_info = read_datafile(breakdown, sep='\t', header=0, disp_head=False)

# Add EOL img identifying info from breakdown file to cropping data
crops_w_identifiers = add_identifiers(bundle_info=bundle_info, crops=superboxes)

#### Make superbox dimensions square

In [None]:
# Define functions

# Suppress pandas warning about writing over a copy of data
pd.options.mode.chained_assignment = None  # default='warn'

# Check if dimensions are out of bounds
def are_dims_oob(dim):
    # Check if square dimensions are out of image bounds (OOB)
    if dim > min(im_h, im_w):
        return True
    else:
        return False

# Center padded, square coordinates around object midpoint
def center_coords(coord_a, coord_b, crop_w, crop_h, im_dim_a, im_dim_b, pad):
    # Centered, padded top-right coordinates
    tr_coord_a = coord_a + 0.5*(abs(crop_h - crop_w)) + pad
    tr_coord_b = coord_a + pad
    # Adjust coordinate positions if OOB (out of bounds)
    if crop_h != crop_w: # for cond 1 and 2
        # Both coords not OOB
        if (tr_coord_a <= im_dim_a) and (tr_coord_b <= im_dim_b):
            bl_coord_a = coord_a - 0.5*(abs(crop_h - crop_w)) - pad
            bl_coord_b = coord_b - pad
        # Topright coord_a OOB (+), shift cropping box down/left a-axis 
        elif (tr_coord_a > im_dim_a) and (tr_coord_b <= im_dim_b):
            bl_coord_a = 0.5*(abs(im_dim_a - crop_w))
            bl_coord_b = coord_b - pad
        # Topright coord_b OOB (+), shift cropping box down/left b-axis    
        elif (tr_coord_a <= im_dim_a) and (tr_coord_b > im_dim_b):
            bl_coord_a = coord_a - 0.5*(abs(crop_h - crop_w)) - pad
            bl_coord_b = coord_b - (tr_coord_b - im_dim_b + pad)
        # Both coords OOB (+), shift cropping box down/left both axes     
        elif (tr_coord_a > im_dim_a) and (tr_coord_b > im_dim_b):
            bl_coord_a = 0.5*(abs(im_dim_a - crop_w))
            bl_coord_b = coord_b - (tr_coord_b - im_dim_b + pad)
    else: # for cond 3
        # Both coords not OOB
        if (tr_coord_a <= im_dim_a) and (tr_coord_b <= im_dim_b):
            bl_coord_a = coord_a - pad
            bl_coord_b = coord_b - pad
        # Topright coord_a OOB (+), shift cropping box down/left a-axis 
        elif (tr_coord_a > im_dim_a) and (tr_coord_b <= im_dim_b):
            bl_coord_a = coord_a - (tr_coord_a - im_dim_a + pad)
            bl_coord_b = coord_b - pad
        # Topright coord_b OOB (+), shift cropping box down/left b-axis    
        elif (tr_coord_a <= im_dim_a) and (tr_coord_b > im_dim_b):
            bl_coord_a = coord_a - pad
            bl_coord_b = coord_b - (tr_coord_b - im_dim_b + pad)
        # Both coords OOB (+), shift cropping box down/left both axes     
        elif (tr_coord_a > im_dim_a) and (tr_coord_b > im_dim_b):
            bl_coord_a = coord_a - (tr_coord_a - im_dim_a + pad)
            bl_coord_b = coord_b - (tr_coord_b - im_dim_b + pad)
    
    return bl_coord_a, bl_coord_b

# Set square dimensions = larger bounding box side
def make_large_square(dim):
    # Set new square crop dims = original larger crop dim
    lg_square = crop_w1 = crop_h1 = dim
    return lg_square

# Set square dimensions = smaller bounding box side
def make_small_square(dim):
    # Set new square crop dims = original smaller crop dim
    sm_square = crop_w1 = crop_h1 = dim
    return sm_square

# Add x% padding to bounding box dimensions
def add_padding(dim):
    # Add padding on all sides of square
    padded_dim = dim + 2*percent_pad*dim
    return padded_dim

# Make square crops that are within image bounds for different scenarios
def make_square_crops(df):
    print("Before making square: \n", df.head())
    start_time = time.time()
    df['crop_height'] = round(df['ymax'] - df['ymin'], 1)
    df['crop_width'] = round(df['xmax'] - df['xmin'], 1)
    for i, row in df.iterrows():
        # Define variables for use filtering data through loops below
        crop_h0 = df['crop_height'][i]
        crop_w0 = df['crop_width'][i]
        #print("crop_h0: {}, crop_w0: {}".format(crop_h0, crop_w0))
        pad = percent_pad * max(crop_h0, crop_w0)  
        global im_h, im_w
        im_h = df.im_height[i]
        im_w = df.im_width[i]
        xmin0 = df.xmin[i]
        ymin0 = df.ymin[i]
        xmax0 = df.xmax[i]
        ymax0 = df.ymax[i]
        
        # Conditions determine how rectangle bounding boxes are made square
        cond1 = crop_h0 > crop_w0 # crop height > width
        cond2 = crop_h0 < crop_w0 # crop width > height
        cond3 = crop_h0 == crop_w0 # crop height = width (already square)

        # Crop Height > Crop Width
        # See project wiki "Detailed explanation with drawings: convert_bboxdims.py", Scenario 1
        if cond1:
            lg_sq = make_large_square(crop_h0)
            lg_padded_sq = add_padding(lg_sq)
            sm_sq = make_small_square(crop_w0)
            sm_padded_sq = add_padding(sm_sq)

            # Where padded crop height is within image dimensions
            if are_dims_oob(lg_padded_sq) is False:
                # Make new crop dims equal to large padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_h1 = lg_padded_sq  
                # Center position of new crop dims (adjust xmin, ymin)
                df.xmin[i], df.ymin[i] = center_coords(xmin0, ymin0, crop_w0, crop_h1, im_w, im_h, pad)

            # Where unpadded crop height is within image dimensions
            elif (are_dims_oob(lg_padded_sq) is False) and (are_dims_oob(lg_sq) is True):
                # Make new crop dims equal to large padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_h1 = lg_sq  
                # Center position of new crop dims (adjust xmin, ymin)
                df.xmin[i] = xmin0 - 0.5*(min(im_h, im_w) - crop_w0)
                df.ymin[i] = 0

            # Where padded crop width is within image dimensions
            elif (are_dims_oob(lg_sq) is False) and (are_dims_oob(sm_padded_sq) is True):
                # Make new crop dimensions equal to small padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_w1 = sm_padded_sq
                # Center position of new crop dims (adjust xmin, ymin)
                df.xmin[i] = xmin0 - 0.5*pad
                df.ymin[i] = ymin0 + 0.5*(crop_h0 - crop_w0) - pad   

            # Where unpadded crop width is within image dimensions
            elif (are_dims_oob(sm_padded_sq) is False) and (are_dims_oob(sm_sq) is True):
                # Make new crop dimensions equal to small padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_w1 = sm_sq

            # Where crop width and height are both OOB
            elif are_dims_oob(sm_sq) is False:
                # Do not crop, set values equal to image dimensions
                df.crop_height[i] = crop_h1 = im_h 
                df.ymin[i] = 0
                df.xmin[i] = 0 
    
        # Crop Width > Crop Height
        # See project wiki "Detailed explanation with drawings: convert_bboxdims.py", Scenario 2
        elif cond2:
            lg_sq = make_large_square(crop_w0)
            lg_padded_sq = add_padding(lg_sq)
            sm_sq = make_small_square(crop_h0)
            sm_padded_sq = add_padding(sm_sq)

            # Where padded crop width is within image dimensions
            if are_dims_oob(lg_padded_sq) is False:
                # Make new crop dims equal to large padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_w1 = lg_padded_sq  
                # Center position of new crop dims (adjust xmin, ymin)
                df.ymin[i], df.xmin[i] = center_coords(ymin0, xmin0, crop_w1, crop_h0, im_w, im_h, pad)

            # Where unpadded crop width is within image dimensions
            elif (are_dims_oob(lg_padded_sq) is False) and (are_dims_oob(lg_sq) is True):
                # Make new crop dims equal to large padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_w1 = lg_sq  
                # Center position of new crop dims (adjust xmin, ymin)
                df.ymin[i] = ymin0 - 0.5*(min(im_h, im_w) - crop_h0)
                df.xmin[i] = 0

            # Where padded crop height is within image dimensions
            elif (are_dims_oob(lg_sq) is False) and (are_dims_oob(sm_padded_sq) is True):
                # Make new crop dimensions equal to small padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_h1 = sm_padded_sq
                # Center position of new crop dims (adjust xmin, ymin)
                df.ymin[i] = ymin0 - pad
                df.xmin[i] = xmin0 + 0.5*(crop_w0 - crop_h0) - pad   

            # Where unpadded crop height is within image dimensions
            elif (are_dims_oob(sm_padded_sq) is False) and (are_dims_oob(sm_sq) is True):
                # Make new crop dimensions equal to small padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_h1 = sm_sq

            # Where crop width and height are both OOB
            elif are_dims_oob(sm_sq) is False:
                # Do not crop, set values equal to image dimensions
                df.crop_width[i] = crop_w1 = im_w
                df.crop_height[i] = crop_h1 = im_h 
                df.ymin[i] = 0
                df.xmin[i] = 0 

        # Crop Width == Crop Height
        # See project wiki "Detailed explanation with drawings: convert_bboxdims.py", Scenario 3
        elif cond3: 
            lg_sq = make_large_square(crop_w0)
            lg_padded_sq = add_padding(lg_sq)
            sm_sq = make_small_square(crop_h0)
            sm_padded_sq = add_padding(sm_sq)
        
            # Where padded crop width/height is within image dimensions
            if are_dims_oob(lg_padded_sq) is False:            
                # Make new crop dims equal to large padded square dims
                df.crop_width[i] = df.crop_height[i] = crop_w1 = crop_h1 = lg_padded_sq
                # Center position of new crop dims (adjust xmin, ymin)
                df.xmin[i], df.ymin[i] = center_coords(xmin0, ymin0, crop_w0, crop_w1, im_w, im_h, pad)
                
            # Where unpadded crop width/height is within image dimensions
            elif (are_dims_oob(lg_padded_sq) is True) and (are_dims_oob(lg_sq) is False):
                # Both coords not OOB, no changes needed
                if (ymax0 <= im_h) and (xmax0 <= im_w):
                    pass
                
                # Topright X coord OOB (+), shift cropping box left
                elif (ymax0 <= im_h) and (xmax0 > im_w):  
                    df.xmin[i] = xmin0 - (xmax0 - im_w)
                # Topright Y coord OOB (+), shift cropping box down
                elif (ymax0 > im_h) and (xmax0 <= im_w):
                    df.ymin[i] = ymin0 - (ymax0 - im_h)
                # X and Y coords OOB (+), shift cropping box down and left   
                elif (ymax0 > im_h) and (xmax0 > im_w):
                    df.ymin[i] = ymin0 - (ymax0 - im_h)
                    df.xmin[i] = xmin0 - (xmax0 - im_w)

    # Image coordinates should be positive, set negative xmin and ymin values to 0
    df.xmin[df.xmin < 0] = 0
    df.ymin[df.ymin < 0] = 0
    print("Cropping coordinates, made square and with {}% padding: \n{}".format(percent_pad, df.head()))

    # Print time to run script
    print ('Run time: {} seconds'.format(format(time.time()- start_time, '.2f')))

    return df

# Format cropping dimensions to EOL standards
def format_crops_for_eol(df):
# {"height":"423","width":"640","crop_x":123.712,"crop_y":53.4249,"crop_width":352,"crop_height":0}
    df['crop_dimensions'] = np.nan
    for i, row in df.iterrows():
        df.crop_dimensions[i] = ('{{"height":"{}","width":"{}","crop_x":{},"crop_y":{},"crop_width":{},"crop_height":{}}}'
        .format(df.im_height[i], df.im_width[i], df.xmin[i], df.ymin[i], df.crop_width[i], df.crop_height[i]))
    #print("\n EOL formatted cropping dimensions: \n", df.head())

    # Add other dataframe elements from cols: identifier, dataobjectversionid, eolmediaurl, im_class, crop_dimensions
    eol_crops = pd.DataFrame(df.iloc[:,np.r_[-5,-4,-6,0,-1]])
    print("\n EOL formatted cropping dimensions: \n", eol_crops.head())

    return eol_crops

In [None]:
# Make crops square and within image bounds

# Optional TO DO: Pad by xx% larger crop dimension
percent_pad = 0 #@param {type:"slider", min:0, max:10, step:2}

# Make crops square and within bounds
df = make_square_crops(crops_w_identifiers)

# Export crop coordinates to display_test.tsv to visualize results in next code block and confirm crop transformations
display_test_fpath = os.path.splitext(concat_outfpath)[0] + '_displaytest' + '.tsv'
print("\n File for displaying square crops on images will be saved to: \n", display_test_fpath)
df.to_csv(display_test_fpath, sep='\t', index=False)

# Format image and cropping dimensions for EOL standards
eol_crops = format_crops_for_eol(df)

# Write results to tsv
eol_crops_fpath = os.path.splitext(display_test_fpath)[0].rsplit('_',2)[0] + '_20k_final' + '.tsv'
eol_crops.to_csv(eol_crops_fpath, columns = eol_crops.iloc[:,:-1], sep='\t', index=False)

## Display cropping results on images
---

In [None]:
#@title Read in cropping file and display results on images

# Define functions

import cv2

# If you just ran "Generate crops" above, you do not need to enter anything
# If you ran "Generate crops" during a previous session, enter the path for ONE output file
if 'outfpath' not in locals() or globals():
    outfpath = "/content/drive/MyDrive/train/tf2/results/multitaxa_cropcoords_tf2_d.tsv" #@param ["/content/drive/MyDrive/train/tf2/results/multitaxa_cropcoords_tf2_d.tsv"] {allow-input: true}
df = pd.read_csv(outfpath, sep="\t", header=0)
print(df.head())

# For uploading an image from url
# Modified from https://www.pyimagesearch.com/2015/03/02/convert-url-to-image-with-python-and-opencv/
def url_to_image(url):
    resp = urllib.request.urlopen(url)
    image = np.asarray(bytearray(resp.read()), dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_np = np.expand_dims(image, axis=0)
    im_h, im_w = image.shape[:2]
  
    return image_np

# For handling bounding boxes
def draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax,
                               color, font, thickness=4, display_str_list=()):
    """Adds a bounding box to an image."""
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
    draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)], width=thickness, fill=color)

    # Adjust display string placement if out of bounds
    display_str_heights = [font.getbbox(ds)[3]-font.getbbox(ds)[1] for ds in display_str_list]
    # Each display_str has a top and bottom margin of 0.05x.
    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = top + total_display_str_height
    # Reverse list and print from bottom to top.
    for ds in display_str_list[::-1]:
        text_height = font.getbbox(ds)[3] - font.getbbox(ds)[1]
        text_width = font.getbbox(ds)[2] - font.getbbox(ds)[0]
        margin = np.ceil(0.05 * text_height)
        draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
        draw.text((left + margin, text_bottom - text_height - margin),
                  ds, fill="black", font=font)
        text_bottom -= text_height - 2 * margin

# Draw bounding box on image
def draw_boxes(image, df):
    """Overlay labeled boxes on an image with formatted scores and label names."""
    # Set font
    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf",
                              25)
    except IOError:
        print("Font not found, using default font.")
        font = ImageFont.load_default()
    colors = list(ImageColor.colormap.values())
    # Format box and label design
    ymin, xmin, ymax, xmax = tuple([df["ymin"][i], df["xmin"][i], df["xmax"][i], df["ymax"][i]])
    box_dims = [ymin, xmin, ymax, xmax]
    display_str = "{}".format(df["class_name"][i])
    color = colors[hash(df["class_name"][i]) % len(colors)]
    image_pil = Image.fromarray(np.squeeze(image))
    # Draw the box
    draw_bounding_box_on_image(
                image_pil,
                ymin, xmin, ymax, xmax,
                color, font, display_str_list=[display_str])
    np.copyto(image, np.array(image_pil))
    
    return image[0], box_dims

In [None]:
#@title Choose starting index for crops to display

# Adjust line to right to see up to 50 images displayed at a time
start = 0 #@param {type:"slider", min:0, max:5000, step:50}
stop = start+50

# Loop through images
for i, row in df.iloc[start:stop].iterrows():
    # Read in image 
    url = df['url'][i]
    img = url_to_image(url)
  
    # Draw bounding box on image
    image_wbox, box_dims = draw_boxes(img, df)

    # Plot cropping box on image
    _, ax = plt.subplots(figsize=(20, 15))
    ax.imshow(image_wbox)

    # Display image URL and coordinatesabove image
    # Helps with fine-tuning data transforms in post-processing steps above
    plt.title('{} \n xmin: {}, ymin: {}, xmax: {}, ymax: {}'.format(url, box_dims[0], box_dims[1], box_dims[2], box_dims[3]))