<a href="https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/object_detection_for_image_cropping/aves/aves_generate_crops_tf2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using Faster-RCNN and SSD in Tensorflow to detect birds from images   
---   
*Last Updated 7 May 2021*.  
-Runs in Python 3 with Tensorflow 2.0-   
Using Faster-RCNN and SSD as methods to do customized, large-scale image processing with Tensorflow. Using the location and dimensions of the detected birds, images will be cropped to square dimensions that are centered and padded around the detection box. Pre-trained models are used for "out of the box" inference on images of birds of varying dimensions and resolutions.

It is modified from [here](https://medium.com/@nickbortolotti/tensorflow-object-detection-api-in-5-clicks-from-colaboratory-843b19a1edf1). The [Tensorflow Object Detection API Tutorial](https://github.com/tensorflow/models/tree/master/research/object_detection) was also used as a reference. Tensorflow Object Detection API is meant for building models for custom object detection, see more information here: [Tensorflow Object Detection API](https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html#tensorflow-models-installation). 

# Installs & Imports
---

In [None]:
# Mount google drive to export image cropping coordinate file(s)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# For running inference on the TF-Hub module.
import tensorflow as tf
import tensorflow_hub as hub

# For downloading the image.
import matplotlib.pyplot as plt
import tempfile
from six.moves.urllib.request import urlopen
from six import BytesIO

# For drawing onto the image.
import numpy as np
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring the inference time.
import time

# Print Tensorflow version
print('Tensorflow Version: %s' % tf.__version__)

# Check available GPU devices.
print('The following GPU devices are available: %s' % tf.test.gpu_device_name())

### Model Preparation
--- 
Define functions needed for running inference using chosen pre-trained model (SSD MobileNet v2 or Faster RCNN ResNet 50).

In [None]:
import csv
import os

# Load Pre-trained model from Tensorflow Hub
# TO DO: Choose model to run inference with (both trained on MS COCO 2017)
model = "SSD MobileNet v2" #@param ["SSD MobileNet v2", "Faster RCNN Resnet 50"]
if 'SSD' in model: 
  module_handle = "https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2"
elif 'RCNN' in model:
  module_handle = "https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1"
else:
  print('Error: Model name does not correspond to module_handle')
print('Loading {} from TF-Hub...'.format(model))
detector = hub.load(module_handle)

# For handling images
def display_image(image):
  fig = plt.figure(figsize=(20, 15))
  plt.grid(False)
  plt.imshow(image)

def load_img(path): #From file
  img = tf.io.read_file(path)
  img = tf.image.decode_jpeg(img, channels=3)
  return img

def download_and_resize_image(url, new_width=256, new_height=256, #From URL
                              display=False):
  _, filename = tempfile.mkstemp(suffix=".jpg")
  response = urlopen(url)
  image_data = response.read()
  image_data = BytesIO(image_data)
  pil_image = Image.open(image_data)
  im_h, im_w = pil_image.size
  pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.ANTIALIAS)
  pil_image_rgb = pil_image.convert("RGB")
  pil_image_rgb.save(filename, format="JPEG", quality=90)
  #print("Image downloaded to %s." % filename)
  if display:
    display_image(pil_image)
  return filename, im_h, im_w

# MS COCO 2017 Label Map (# output by models corresp. to text lables)
# Note: You can modify "filter" to choose detection results for any of these categories
filter = "bird" #@param {type:"string"}
label_map = {1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane',
              6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light',
              11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench',
              16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow',
              22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack',
              28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee',
              35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite',
              39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard',
              43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork',
              49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple',
              54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog',
              59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch',
              64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet',
              72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard',
              77: 'cell phone', 78: 'microwave', 79: 'oven', 80: 'toaster', 81: 'sink',
              82: 'refrigerator', 84: 'book', 85: 'clock', 86: 'vase', 87: 'scissors',
              88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush'}

# For handling bounding boxes
def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color,
                               font,
                               thickness=4,
                               display_str_list=()):
  """Adds a bounding box to an image."""
  draw = ImageDraw.Draw(image)
  im_width, im_height = image.size
  (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
  draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)],
            width=thickness,
            fill=color)

  # If the total height of the display strings added to the top of the bounding
  # box exceeds the top of the image, stack the strings below the bounding box
  # instead of above.
  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
  # Each display_str has a top and bottom margin of 0.05x.
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
  if top > total_display_str_height:
    text_bottom = top
  else:
    text_bottom = top + total_display_str_height
  # Reverse list and print from bottom to top.
  for display_str in display_str_list[::-1]:
    text_width, text_height = font.getsize(display_str)
    margin = np.ceil(0.05 * text_height)
    draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
    draw.text((left + margin, text_bottom - text_height - margin),
              display_str,
              fill="black",
              font=font)
    text_bottom -= text_height - 2 * margin

# TO DO: Set the maximum number of detections to keep per image
max_boxes = 10 #@param {type:"slider", min:0, max:100, step:10}

def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):
  """Overlay labeled boxes on an image with formatted scores and label names."""
  if max_boxes:
    max_boxes = max_boxes
  colors = list(ImageColor.colormap.values())

  try:
    font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf",
                              25)
  except IOError:
    print("Font not found, using default font.")
    font = ImageFont.load_default()

  for i in range(0, max_boxes):
    if scores[0][i] >= min_score:
      ymin, xmin, ymax, xmax = tuple(boxes[0][i])
      display_str = "{}: {}%".format(label_map[class_names[0][i]],
                                     int(100 * scores[0][i]))
      color = colors[hash(class_names[0][i]) % len(colors)]
      image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
      if filter in display_str: # Only the filtered class is shown on images
        draw_bounding_box_on_image(
          image_pil,
          ymin,
          xmin,
          ymax,
          xmax,
          color,
          font,
          display_str_list=[display_str])
      np.copyto(image, np.array(image_pil))
  return image
  
# For running inference
def run_detector(image_url):
  image_path, im_h, im_w = download_and_resize_image(image_url, 640, 480)
  img = load_img(image_path)

  converted_img  = tf.image.convert_image_dtype(img, tf.uint8)[tf.newaxis, ...]
  start_time = time.time()
  result = detector(converted_img)
  end_time = time.time()

  result = {key:value.numpy() for key,value in result.items()}

  print("Found %d objects." % result["num_detections"])
  print("Inference time: %s" % format(end_time-start_time, '.2f'))

  image_with_boxes = draw_boxes(img.numpy(), result["detection_boxes"],
      result["detection_classes"], result["detection_scores"])
  
  # Export bounding boxes to file in Google Drive
  with open(outfpath, 'a') as out_file:
              tsv_writer = csv.writer(out_file, delimiter='\t')
              img_id = os.path.splitext((os.path.basename(image_url)))[0]
              # Write one row per detected object with bounding box coordinates
              num_detections = min(int(result["num_detections"][0]), max_boxes)
              for i in range(0, num_detections):
                class_id = str(label_map[result["detection_classes"][0][i]])
                if filter in class_id: # Only writes rows for filtered class
                    ymin = result["detection_boxes"][0][i][0]
                    xmin = result["detection_boxes"][0][i][1]
                    ymax = result["detection_boxes"][0][i][2]
                    xmax = result["detection_boxes"][0][i][3]
                    tsv_writer.writerow([img_id, class_id, 
                        xmin, ymin, xmax, ymax, image_url])
  return image_with_boxes
print('Model loaded and functions defined! \nGo to next steps for running inference on images.')

## Test running inference on a couple images from URLs
--- 
Try running inference on any images from URL to get bounding boxes of detected birds. Results are displayed on images. No detected object (bird) coordinates are saved.

In [None]:
# TO DO: Type in image URLs 1-3 using form fields to right
url_1 = "https://content.eol.org/data/media/7e/9c/7a/542.15445377044.jpg" #@param {type:"string"}
url_2 = "https://content.eol.org/data/media/81/1c/0d/542.7816025222.jpg" #@param {type:"string"}
url_3 = "https://content.eol.org/data/media/7e/3c/0b/542.10578857864.jpg" #@param {type:"string"}
image_urls = [url_1, url_2, url_3]

# Display detection results on images
display_results = True

# Set temporary outfile for tagging results
outfpath = "temp_outfile.tsv"

# Loop through EOL image bundle to add bounding boxes to images
print("Running inference on images")
for im_num, image_url in enumerate(image_urls, start=1):
  try:
    image_wboxes = run_detector(image_url)
    if display_results:
      display_image(image_wboxes)
    # Display progress message after each image
    print('Inference complete for image {} of {}'.format(im_num, len(image_urls)))

  except:
    print('Check if URL from {} is valid'.format(image_url))
  
  os.remove(outfpath) # Delete temporary outfile

## Run inference on EOL 20k image bundles & save results for cropping
---
Use 20K EOL Aves image bundle to get bounding boxes of detected birds. Results are saved to [crops_file].tsv. Running in 4 batches of 5K images is recommended in case of unexpected Colab timeouts.

Optional: Display detection boxes on up to 50 images. Image with bounding box around detected objects (birds), class type, and confidence score are shown with inference times for each image. 

In [None]:
import pandas as pd
import numpy as np
# So URL's don't get truncated in display
pd.set_option('display.max_colwidth',1000)

# Read in EOL image bundle dataframe
# TO DO: Type in image bundle address using form field to right
bundle = "https://editors.eol.org/other_files/bundle_images/files/images_for_Aves_20K_breakdown_download_000001.txt" #@param {type:"string"}
df = pd.read_csv(bundle, sep='\n', header=None)
df.columns = ['url']
print('EOL image bundle head:\n{}'.format(df.head()))

# Write header row of output tagging file
# TO DO: Change file name for each bundle/run
# Note: If running in 4 batches of 5k images per 20k image bundle (reccomended), use a/b/c/d for each batch
base = '/content/drive/My Drive/train/results/'
crops_file = "aves_cropcoords_tf2_a" #@param ["aves_cropcoords_tf2_a", "aves_cropcoords_tf2_b", "aves_cropcoords_tf2_c", "aves_cropcoords_tf2_d"] {allow-input: true}
if 'SSD' in model:
  mod_abbv = '_ssd'
elif 'RCNN' in model:
  mod_abbv = '_rcnn'
else:
  print('Error: Model name does not correspond to module_handle')
outfpath = base + crops_file.rsplit('_',1)[0] + mod_abbv + '_' + crops_file.rsplit('_',1)[1] + '.tsv'
print('Cropping file will be saved to:\n{}'.format(outfpath))

# Run in 4 batches of 5k images each (batch a is from 0-5000, b from 5000 to 10000, etc)
if "_a." in crops_file:
  a=0
  b=5000
elif "_b." in crops_file:
  a=5000
  b=10000
elif "_c." in crops_file:
  a=10000
  b=15000
elif "_d." in crops_file:
  a=15000
  b=20000

# Write header row of output tag file
with open(outfpath, 'a') as out_file:
                  tsv_writer = csv.writer(out_file, delimiter='\t')
                  tsv_writer.writerow(["img_id", "class_id", 
                            "xmin", "ymin", "xmax", "ymax", "url"])

In [None]:
# Test with a smaller subset than 5k images?
# TO DO: Check test_with_tiny_subset box if "Yes"
# Then choose start_index and num_test_images values using sliders
test_with_tiny_subset = True #@param {type: "boolean"}
if test_with_tiny_subset:
  start_index = 0 #@param {type:"slider", min:0, max:50, step:5}
  a = start_index
  num_test_imgs = 5 #@param {type:"slider", min:0, max:50, step:5}
  b = start_index + num_test_imgs

# Display detection results on images?
# TO DO: Check display_results box if "Yes"
# Note: Only run for <50 images at a time
display_results = True #@param {type:"boolean"}

# Loop through EOL image bundle to add bounding boxes to images
print("Running inference on images")
for i, row in df.iloc[a:b].iterrows():
  try:
    image_wboxes = run_detector(df['url'][i])
    if display_results:
      display_image(image_wboxes)
    
    # Display progress message after each image
    print('Inference complete for image {} of {}'.format(i+1, (b-a)))

  except:
    print('Check if URL from {} is valid'.format(row))