<a href="https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/object_detection_for_image_cropping/multitaxa/multitaxa_generate_crops_tf2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Use Faster-RCNN ResNet 50 and Inception v2 in Tensorflow to automatically crop images of snakes & lizards (Squamata), beetles (Coleoptera), frogs (Anura), and carnivores (Carnivora)
---   
*Last Updated 1 February 2025*  
-Now runs in Python 3 with Tensorflow 2.0-     

Use trained object detection models to automatically crop images of snakes & lizards (Squamata), beetles (Coleoptera), frogs (Anura), and carnivores (Carnivora) to square dimensions centered around animal(s).

Models were trained and saved to Google Drive in [multitaxa_train_tf2_rcnns.ipynb](https://github.com/aubricot/computer_vision_with_eol_images/blob/master/object_detection_for_image_cropping/multitaxa/multitaxa_train_tf2_rcnns.ipynb).

***Models were trained in Python 2 and TF 1 in April 2020: Faster RCNN ResNet 50 trained for 12 hours to 200,000 steps and Faster RCNN Inception v2 for 18 hours to 200,000 steps.***

Notes:   
* Run code blocks by pressing play button in brackets on left
* Before you you start: change the runtime to "GPU" with "High RAM"
* Change parameters using form fields on right (find details at corresponding lines of code by searching '#@param')

References:     
* [Official Tensorflow Object Detection API Instructions](https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/training.html)   
* [Medium Blog on training using Tensorflow Object Detection API in Colab](https://medium.com/analytics-vidhya/training-an-object-detection-model-with-tensorflow-api-using-google-colab-4f9a688d5e8b)

## Installs & Imports
---

In [None]:
#@title Choose where to save results
# Use dropdown menu on right
save = "in Colab runtime (files deleted after each session)" #@param ["in my Google Drive", "in Colab runtime (files deleted after each session)"]

# Mount google drive to export image cropping coordinate file(s)
if 'Google Drive' in save:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)

# Note: You can modify "filter" to choose detection results for any class of interest the model is trained on
filter = "Multitaxa" # @param ["Multitaxa"] {"allow-input":true}

# Type in the path to your project wd in form field on right
basewd = "/content/drive/MyDrive/train" #@param ["/content/drive/MyDrive/train"] {allow-input: true}
# Type in the folder that you want to contain TF2 files
folder = "tf2" #@param ["tf2"] {allow-input: true}
# Define current working directory using form field inputs
cwd = basewd + '/' + folder

# Install dependencies
!pip3 install --upgrade gdown
!gdown 1-0F-zKYOAV1qk2hu7kqKvlMo5Xsj6Uut # Download helper_funcs folder
!tar -xzvf multitaxa_helper_funcs.tar.gz -C .
#!pip install -r requirements.txt

In [None]:
# TO DO figure out requirements.txt and fix warnings
#!pip install -r requirements.txt
!pip install numpy==1.24.3
!pip install protobuf==3.20.3

In [None]:
#@title Choose model parameters, set up directory structure, and build Tensorflow Object Detection API

# Use EOL pre-trained model for object detection?
use_EOL_model = True #@param {type: "boolean"}

# If using your own trained model, change values to match your trained model
filters = ["Anura", "Carnivora", "Coleoptera", "Squamata"] #@param ["[\"Anura\", \"Carnivora\", \"Coleoptera\", \"Squamata\"]"] {type:"raw", allow-input: true}
PATH_TO_LABELS = "labelmap.pbtxt" #@param {type:"string"}
NUM_CLASSES = 4 #@param
saved_models_dir = "tf_models/train_demo/rcnn_i/finetuned_model/" #@param ["tf_models/train_demo/rcnn/finetuned_model/"] {allow-input: true}
mod_abbv = "rcnn_i"

# For working with directories
import os
import pathlib

# For downloading and displaying images
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import io
import scipy.misc
import numpy as np
import pandas as pd
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from six.moves.urllib.request import urlopen

# For object detection
import tensorflow as tf
import tensorflow_hub as hub
tf.get_logger().setLevel('ERROR')

# Import EOL custom helper_funcs
from setup import *
from wrangle_data import *

# Clone Tensorflow Object Detection Github Repo
setup_dirs(cwd)

# Build Tensorflow Object Detection API
!sudo apt install -y protobuf-compiler
%cd $cwd
!cd tf_models/models/research/ && protoc object_detection/protos/*.proto --python_out=. && cp object_detection/packages/tf2/setup.py . && python -m pip install .

In [None]:
# Build saved model

# For object detection
import sys
#sys.path.append("tf_models/models/research/")
sys.path.append('/content')
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# For downloading and displaying images
import cv2
import tempfile
import urllib
from collections import defaultdict
from io import StringIO
from IPython.display import display

# For drawing onto images
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring inference time
import time

# For working with data
import subprocess
import csv
import tarfile
import zipfile

# Print Tensorflow version
print('\nTensorflow Version: %s' % tf.__version__)

# Check available GPU devices
print('The following GPU devices are available: %s' % tf.test.gpu_device_name())

# Unpack EOL saved model
PATH_TO_CKPT = saved_models_dir + 'frozen_inference_graph.pb'
detector = detection_graph = unpack_EOL_model(use_EOL_model, saved_models_dir, PATH_TO_CKPT, cwd)

# Load saved model and label map
print("\nLoading label map for {} class(es) from: \n{}".format(NUM_CLASSES, PATH_TO_LABELS))
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Generate crops: Run inference on EOL images & save resulting coordinates for cropping - Run 4X for batches A-D
---
Use 20K EOL image bundle to generate bounding boxes around each object with pre-trained object detection models. Results are saved to [crops_file].tsv. Run this section 4 times (to make batches A-D) of 5K images each to incrementally save in case of Colab timeouts.

In [None]:
#@title Define functions
%matplotlib inline

# Set the maximum number of detections to keep per image
max_boxes = 10 #@param {type:"slider", min:0, max:100, step:10}

# Set the minimum confidence score for detections to keep per image
min_score = 0.6 #@param {type:"slider", min:0, max:0.9, step:0.1}

# Set filename for saving classification results
def set_outpath(crops_file, cwd):
    outpath = cwd + '/' + 'results/' + crops_file.rsplit('_',1)[0] + '_' + mod_abbv + '_' + crops_file.rsplit('_',1)[1] + '.tsv'
    print("\nSaving results to: \n", outpath)

    return outpath

# Export object detection results
def export_results(image_url, result, outfpath, im_h, im_w, filter=filters):
    with open(outfpath, 'a') as out_file:
        tsv_writer = csv.writer(out_file, delimiter='\t')
        img_id = os.path.splitext((os.path.basename(image_url)))[0]
        # Write one row per detected object with bounding box coordinates
        num_detections = min(int(result["num_detections"][0]), max_boxes)
        for i in range(0, num_detections):
            class_name = category_index[result["detection_classes"][0][i]]['name']
            if any(fil in class_name for fil in filters): # Only writes rows for filtered class
                ymin = result["detection_boxes"][0][i][0]
                xmin = result["detection_boxes"][0][i][1]
                ymax = result["detection_boxes"][0][i][2]
                xmax = result["detection_boxes"][0][i][3]
                confidence = result["detection_scores"][0][i]
                tsv_writer.writerow([img_id, class_name, confidence,
                          xmin, ymin, xmax, ymax, im_h, im_w, image_url])
        print("\nObject detection results for Image {} saved to: {}".format(image_url, outfpath))

    return img_id

# Format cropping dimensions to EOL standards
def format_crops_for_eol(df):
# {"height":"423","width":"640","crop_x":123.712,"crop_y":53.4249,"crop_width":352,"crop_height":0}
    df['crop_dimensions'] = np.nan
    for i, row in df.iterrows():
        df.loc[i, 'crop_dimensions'] = ('{{"height":"{}","width":"{}","crop_x":{},"crop_y":{},"crop_width":{},"crop_height":{}}}'
        .format(df.im_height[i], df.im_width[i], df.xmin[i], df.ymin[i], df.crop_width[i], df.crop_height[i]))

    # Add other dataframe elements from cols: identifier, dataobjectversionid, eolmediaurl, im_class, crop_dimensions
    eol_crops = pd.DataFrame(df.iloc[:,np.r_[-5,-4,-6,0,-1]])
    print("\n EOL formatted cropping dimensions: \n", eol_crops.head())

    return eol_crops

print('Model loaded and functions defined! \nGo to next steps to run inference on images.')

### Generate crops: Run inference on EOL images & save results for cropping - Run 4X for batches A-D
Use 20K EOL Anura, Carnivora, Coleoptera, Squamata image bundle to get bounding boxes of detected bats. Results are saved to [crops_file].tsv. Run this section 4 times (to make batches A-D) of 5K images each to incrementally save in case of Colab timeouts.

In [None]:
#@title Enter EOL image bundle and choose inference settings (change **crops_file** for each batch A-D)

# Load in EOL image bundle
bundle = "https://editors.eol.org/other_files/bundle_images/files/images_for_Squamata_20K_breakdown_download_000001.txt" #@param ["https://editors.eol.org/other_files/bundle_images/files/images_for_Squamata_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Coleoptera_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Anura_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Carnivora_20K_breakdown_download_000001.txt"]
df = read_datafile(bundle, sep='\t', header=None, disp_head=False)
df.columns = ['url']
print('\n EOL image bundle head:\n{}'.format(df.head()))

# Test pipeline with a smaller subset than 5k images?
run = "test with tiny subset" #@param ["test with tiny subset", "for all images"]

# Display detection results on images?
if 'tiny subset' in run:
    display_results = True
else:
    display_results = False

# Take 5k subset of bundle for running inference
# Change filename for each batch
crops_file = "multitaxa_cropcoords_tf2_a" #@param ["multitaxa_cropcoords_tf2_a", "multitaxa_cropcoords_tf2_b", "multitaxa_cropcoords_tf2_c", "multitaxa_cropcoords_tf2_d"] {allow-input: true}
outfpath = set_outpath(crops_file, cwd)

# Write header row of output tag file
if not os.path.isfile(outfpath):
    with open(outfpath, 'a') as out_file:
              tsv_writer = csv.writer(out_file, delimiter='\t')
              tsv_writer.writerow(["img_id", "class_name", "confidence",
                                   "xmin", "ymin", "xmax", "ymax", "im_width", \
                                   "im_height", "url"])

In [None]:
# TO DO: Refactor draw bounding boxes on an image; multitaxa has errors
# To draw bounding boxes on an image
# Modified from TF Hub https://www.tensorflow.org/hub/tutorials/object_detection
def draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax,
                               color, font, thickness=4, display_str_list=()):
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                 ymin * im_height, ymax * im_height)
    draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
              (left, top)],
              width=thickness,
              fill=color)

    # Adjust display string placement if out of bounds
    display_str_heights = [font.getbbox(ds)[3]-font.getbbox(ds)[1] for ds in display_str_list]
    # Each display_str has a top and bottom margin of 0.05x.
    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = top + total_display_str_height
    # Reverse list and print from bottom to top.
    for ds in display_str_list[::-1]:
        text_height = font.getbbox(ds)[3] - font.getbbox(ds)[1]
        text_width = font.getbbox(ds)[2] - font.getbbox(ds)[0]
        margin = np.ceil(0.05 * text_height)
        draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
        draw.text((left + margin, text_bottom - text_height - margin),
                  ds, fill="black", font=font)
        text_bottom -= text_height - 2 * margin

# Filter detections and annotate images with results
# Modified from TF Hub https://www.tensorflow.org/hub/tutorials/object_detection
def draw_boxes(image, boxes, class_names, scores, max_boxes, min_score, filter, label_map, category_index):
    # Format text above boxes
    colors = list(ImageColor.colormap.values())
    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf", 25)
    except IOError:
        print("Font not found, using default font.")
        font = ImageFont.load_default()

    # Draw up to N-max boxes with confidence > score threshold
    for i in range(0, max_boxes):
        if scores[0][i] >= min_score:
            ymin, xmin, ymax, xmax = tuple(boxes[0][i])
            display_str = "{}: {}%".format(category_index[class_names[0][i]]['name'],
                                     int(100 * scores[0][i]))
            color = colors[hash(class_names[0][i]) % len(colors)]
            image_pil = Image.fromarray(np.squeeze(image))
            # Only the filtered class is shown on images
            if any(fil in display_str for fil in filters):
                draw_bounding_box_on_image(
                    image_pil,
                    ymin, xmin, ymax, xmax,
                    color, font, display_str_list=[display_str])
                np.copyto(image, np.array(image_pil))
    return image[0]

# For running inference
# Modified from TF Hub https://www.tensorflow.org/hub/tutorials/object_detection
def run_detector_tf(detection_graph, image_url, outfpath, filter, label_map, max_boxes, min_score, category_index):
    image_np, im_h, im_w = url_to_image(image_url)
    with detection_graph.as_default():
        with tf.compat.v1.Session(graph=detection_graph) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')

            # Actual detection
            start_time = time.time()
            result = sess.run([detection_boxes, detection_scores,
                               detection_classes, num_detections],
                               feed_dict={image_tensor: image_np})
            end_time = time.time()

            result = {"detection_boxes": result[0], "detection_scores": result[1],
                      "detection_classes": result[2], "num_detections": result[3]}

            print("Found %d objects with > %s confidence" % (min(result["num_detections"], max_boxes), min_score))
            print("Inference time: %s sec" % format(end_time-start_time, '.2f'))

            # Draw detection boxes on image
            #image_wboxes = draw_boxes(image_np, result["detection_boxes"],
                                      #result["detection_classes"], result["detection_scores"],
                                      #max_boxes, min_score, filter, label_map, category_index)

    #return image_wboxes, result, im_h, im_w
    return result, im_h, im_w

In [None]:
#@title Run inference on image batches A-D

# Run EOL bundle images through trained model and save results
print("Running inference on images")
all_predictions = []
start, stop, cutoff = set_start_stop(run, df)
start = 0
stop = 5
for i, row in enumerate(df.iloc[start:stop].iterrows()):
    try:
        # Run image through object detector and export result
        image_url = df['url'][row[0]]
        #image_wboxes, result, im_h, im_w = run_detector_tf(detector, image_url, outfpath, filters, label_map, max_boxes, min_score, category_index)
        result, im_h, im_w = run_detector_tf(detector, image_url, outfpath, filters, label_map, max_boxes, min_score, category_index)
        img_id = export_results(image_url, result, outfpath, im_h, im_w)

        # Optional: Display detections on images
        #if (i+1<=50) and display_results:
            #display_image(image_wboxes)

        # Display progress message after each image
        all_predictions.append(img_id)
        print('\033[92m {}) Inference complete for image {} of {} \033[0m \n'.format(i+1, i+1, cutoff))
        #if len(all_predictions)>=cutoff:
              #break

    except:
        print('Check if URL from {} is valid\n'.format(df['url'][i]))

print("\n\n~~~\n\033[92m Inference complete!\033[0m \033[93m Run these steps for remaining batches A-D before proceeding.\033[0m\n~~~")

## Post-process detection results
---
Combine output files for batches A-D. Then, convert detection boxes into square, centered thumbnail cropping coordinates.

In [None]:
#@title Merge 5k image batch output files A-D

# Enter path to any inference result batch file A-D

# If you just ran "Generate crops" above, you do not need to enter anything
# If you ran "Generate crops" during a previous session, enter the path for ONE output file
if 'outfpath' not in locals() or globals():
    crops_file = "multitaxa_cropcoords_tf2_a" #@param ["multitaxa_cropcoords_tf2_a", "multitaxa_cropcoords_tf2_b", "multitaxa_cropcoords_tf2_c", "multitaxa_cropcoords_tf2_d"] {allow-input: true}
    outfpath = set_outpath(crops_file, cwd)

# Combine 4 batches of detection box coordinates to one dataframe
basewd =  os.path.splitext(outfpath)[0].rsplit('_',1)[0] + '_'
exts = ['a.tsv', 'b.tsv', 'c.tsv', 'd.tsv']
all_filenames = [basewd + e for e in exts]
df = pd.concat([pd.read_csv(f, sep='\t', header=0, na_filter = False) for f in all_filenames], ignore_index=True)

# Write results to tsv
concat_outfpath = basewd + 'concat.tsv'
df.to_csv(concat_outfpath, sep='\t', index=False)
print("New concatenated dataframe with all 4 batches saved to: {} \n{}".format(concat_outfpath, df.head()))

In [None]:
#@title Combine individual detection boxes into one "superbox" per image

# For images with >1 detection, make a 'super box' that containings all boxes

# Read in crop file exported from "Combine output files A-D" block above
crops = read_datafile(concat_outfpath, sep='\t', header=0, disp_head=False)

# De-normalize cropping coordinates to pixel values
crops = denormalize_coords(crops)

# Make 1 superbox per image [coordinates: bottom left (smallest xmin, ymin) and top right (largest xmax, ymax)]
superboxes = make_superboxes(crops)

# Read in EOL image "breakdown" bundle dataframe from "breakdown_download" bundle used for cropping
if 'bundle' not in locals() or globals():
    bundle = "https://editors.eol.org/other_files/bundle_images/files/images_for_Squamata_20K_breakdown_download_000001.txt" #@param ["https://editors.eol.org/other_files/bundle_images/files/images_for_Squamata_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Coleoptera_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Anura_20K_breakdown_download_000001.txt", "https://editors.eol.org/other_files/bundle_images/files/images_for_Carnivora_20K_breakdown_download_000001.txt"]
breakdown = bundle.replace("download_", "") # Get EOL breakdown bundle url from "breakdown_download" address
bundle_info = read_datafile(breakdown, sep='\t', header=0, disp_head=False)

# Add EOL img identifying info from breakdown file to cropping data
crops_w_identifiers = add_identifiers(superboxes, bundle_info)

In [None]:
#@title Make superbox square and within image bounds (Optional: add padding)

# Pad by xx% larger crop dimension
pad = 2 #@param {type:"slider", min:0, max:10, step:2}
pad = pad/100 # Convert to percentage

# Make crops square and within bounds
df = make_square_crops(crops_w_identifiers, pad)

# Export crop coordinates to display_test.tsv to visualize results in next code block and confirm crop transformations
display_test_fpath = os.path.splitext(concat_outfpath)[0] + '_displaytest' + '.tsv'
print("\n File for displaying square crops on images will be saved to: \n", display_test_fpath)
df.to_csv(display_test_fpath, sep='\t', index=False)

# Format image and cropping dimensions for EOL standards
eol_crops = format_crops_for_eol(df)

# Write results to tsv
eol_crops_fpath = os.path.splitext(display_test_fpath)[0].rsplit('_',2)[0] + '_20k_final' + '.tsv'
eol_crops.to_csv(eol_crops_fpath, columns = eol_crops.iloc[:,:-1], sep='\t', index=False)
print("EOL formatted crops dataset saved to: {} \n{}".format(eol_crops_fpath, eol_crops.head()))

## Display cropping results on images
---

In [None]:
#@title Read in cropping file and display results on images
from wrangle_data import *
import cv2

# If you just ran "Post-process results" above, you do not need to enter anything
# If you ran "Generate crops" during a previous session, enter the path for desired cropping file
if 'display_test_fpath' not in locals() or globals():
    crops_file = "multitaxa_cropcoords_tf2_a" #@param ["multitaxa_cropcoords_tf2_a", "multitaxa_cropcoords_tf2_b", "multitaxa_cropcoords_tf2_c", "multitaxa_cropcoords_tf2_d"] {allow-input: true}
    outfpath = set_outpath(crops_file, cwd)
    display_test_fpath =  os.path.splitext(outfpath)[0].rsplit('_',1)[0] + '_concat_displaytest' + '.tsv'
    print(display_test_fpath)
df = pd.read_csv(display_test_fpath, sep="\t", header=0)
print(df.head())

In [None]:
#@title Choose starting index for crops to display

# Adjust line to right to see up to 50 images displayed at a time
start = 0 #@param {type:"slider", min:0, max:5000, step:50}
stop = start+50

# Loop through images
for i, row in df.iloc[start:stop].iterrows():
    # Read in image
    url = df['eolMediaURL'][i]
    image, im_h, im_w = url_to_image(url)

    # Draw bounding box on image
    image_wbox, boxcoords = draw_box_on_image(df, i, image, filters)

    # Plot cropping box on image
    _, ax = plt.subplots(figsize=(10, 10))
    ax.imshow(image_wbox)

    # Display image URL and coordinatesabove image
    # Helps with fine-tuning data transforms in post-processing steps above
    plt.title('{} \n xmin: {}, ymin: {}, xmax: {}, ymax: {}'.format(url, boxcoords[0], boxcoords[1], boxcoords[2], boxcoords[3]))