<a href="https://colab.research.google.com/github/aubricot/object_detection_for_image_cropping/blob/master/aves_tf_ssd_rcnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using Faster-RCNN and SSD in Tensorflow to detect birds from images   
---   
*Last Updated 9 December 2019*   
Using Faster-RCNN and SSD as methods to do customized, large-scale image processing with Tensorflow. Using the location and dimensions of the detected birds, images will be cropped to square dimensions that are centered and padded around the detection box. Pre-trained models are used for "out of the box" inference on images of birds of varying dimensions and resolutions, but will be modified and fine-tuned in future efforts for other taxonomic groups.

This notebook is meant to be run enitrely in Google Colab and doesn't require any software installations or downloads to your local machine. To get started, just click the "Open in Colab" button. 

It is modified from [here](https://medium.com/@nickbortolotti/tensorflow-object-detection-api-in-5-clicks-from-colaboratory-843b19a1edf1). The [Tensorflow Object Detection API Tutorial](https://github.com/tensorflow/models/tree/master/research/object_detection) was also used as a reference. Tensorflow Object Detection API is meant for building models for custom object detection, see more information here: [Tensorflow Object Detection API](https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html#tensorflow-models-installation). 

## Installs
---
Install the Tensorflow Object Detection API directly to this Colab notebook.

In [0]:
!git clone https://github.com/tensorflow/models.git
!apt-get -qq install libprotobuf-java protobuf-compiler
!protoc ./models/research/object_detection/protos/string_int_label_map.proto --python_out=.
!cp -R models/research/object_detection/ object_detection/
!rm -rf models

In [0]:
# Mount google drive to export image cropping coordinate file(s)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

### Imports   
---

In [0]:
%tensorflow_version 1.0

import tensorflow as tf 
tf.compat.v1.enable_eager_execution()

# For importing/exporting files, working with arrays, etc
import os
import pathlib
import six.moves.urllib as urllib
import sys
import tarfile
import zipfile
import numpy as np 
import csv
import matplotlib
import time
import pandas as pd

# For downloading the images
import tempfile
from six.moves.urllib.request import urlopen
from six import BytesIO
from collections import defaultdict
from io import StringIO

# For drawing onto and plotting the images
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

import cv2

from IPython.display import display

from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

### Model Preparation
--- 
Configure the model to use and select needed elements to use the Object Detection API.

In [0]:
# What model to download. Can choose between SSD or Faster RCNN by commenting out/in the different MODEL_NAME(s) below
# SSD Model
#MODEL_NAME = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03'

# Faster RCNN Model
MODEL_NAME = 'faster_rcnn_resnet50_coco_2018_01_28'

MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt')

NUM_CLASSES = 90

opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
  file_name = os.path.basename(file.name)
  if 'frozen_inference_graph.pb' in file_name:
    tar_file.extract(file, os.getcwd())
    
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.compat.v1.GraphDef()
  with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')
    
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Run test images through object detector
--- 

### Prepare object detection functions and settings

In [0]:
# For loading images into computer-readable format
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

# Function for loading images from urls
def url_to_image(url):
  resp = urllib.request.urlopen(url)
  image = np.asarray(bytearray(resp.read()), dtype="uint8")
  image = cv2.imdecode(image, cv2.IMREAD_COLOR)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  return image

def show_inference(image_np_expanded):
  with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
      # Definite input and output Tensors for detection_graph
      image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
      # Each box represents a part of the image where a particular object was detected.
      detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
      # Each score represent how level of confidence for each of the objects.
      detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
      detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
      num_detections = detection_graph.get_tensor_by_name('num_detections:0')
      # Optional: adjust score confidence threshold for display
      #min_score_thresh = .7

      # Actual detection.
      (boxes, scores, classes, num) = sess.run(
          [detection_boxes, detection_scores, detection_classes, num_detections],
          feed_dict={image_tensor: image_np_expanded})
      
      # Visualization of the results of a detection
      # Modified from https://github.com/tensorflow/models/issues/4682
      im_height, im_width, im_depth = image_np.shape
      ymin = int((boxes[0][0][0]*im_height))
      xmin = int((boxes[0][0][1]*im_width))
      ymax = int((boxes[0][0][2]*im_height))
      xmax = int((boxes[0][0][3]*im_width))
      newImage = np.copy(image_np)
      newImage = cv2.rectangle(newImage, (xmin, ymax), (xmax, ymin), (255, 0, 157), 3)
      # Add labels to boxes
      #newImage = cv2.putText(newImage, label, (xmin, ymax-5), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (153, 255, 255), 5, cv2.LINE_AA)

      return newImage

### Run images (from individual URLs) through object detector
Load in images by URL and run the image detector for all images. Plotted results include the image with bounding box around detected objects (birds), class type, and confidence score. Inference times are printed above images.

In [0]:
image_urls = ["https://content.eol.org/data/media/7e/9c/7a/542.15445377044.jpg",
              "https://content.eol.org/data/media/81/1c/0d/542.7816025222.jpg",
              "https://content.eol.org/data/media/7e/3c/0b/542.10578857864.jpg"]

for im_num, image_url in enumerate(image_urls, start=1):
    # Load in image
    image_np = url_to_image(image_url)
    image_np_expanded = np.expand_dims(image_np, axis=0)
    # Record inference time
    start_time = time.time()
    # Detection and draw boxes on image
    show_inference(image_np_expanded)
    end_time = time.time()
    # Display progress message after each image
    print('Detection complete in {} of 3 images'.format(im_num))
    
    # Plot and show detection boxes on images
    # If running detection on >50 images, comment out this portion
    _, ax = plt.subplots(figsize=(10, 10))
    ax.imshow(show_inference(image_np_expanded))
    plt.title('{}) Inference time: {}'.format(im_num, format(end_time-start_time, '.2f')))

## Run EOL image bundles through the trained object detector & save results for cropping
---
Display resulting detection boxes on images and save their coordinates to aves_det_crops.tsv for use cropping EOL images. 

Plotted results include the image with bounding box around detected objects (birds), class type, and confidence score. Inference times are printed above images. 

### Prepare object detection functions and settings

In [0]:
# For loading images into computer-readable format
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

# Function for loading images from urls
def url_to_image(url):
  resp = urllib.request.urlopen(url)
  image = np.asarray(bytearray(resp.read()), dtype="uint8")
  image = cv2.imdecode(image, cv2.IMREAD_COLOR)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  return image

def show_inference(image_np_expanded):
  with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
      # Definite input and output Tensors for detection_graph
      image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
      # Each box represents a part of the image where a particular object was detected.
      detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
      # Each score represent how level of confidence for each of the objects.
      detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
      detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
      num_detections = detection_graph.get_tensor_by_name('num_detections:0')
      # Optional: adjust score confidence threshold for display
      #min_score_thresh = .7

      # Actual detection.
      (boxes, scores, classes, num) = sess.run(
          [detection_boxes, detection_scores, detection_classes, num_detections],
          feed_dict={image_tensor: image_np_expanded})
      
      # Visualization of the results of a detection
      # Modified from https://github.com/tensorflow/models/issues/4682
      im_height, im_width, im_depth = image_np.shape
      ymin = int((boxes[0][0][0]*im_height))
      xmin = int((boxes[0][0][1]*im_width))
      ymax = int((boxes[0][0][2]*im_height))
      xmax = int((boxes[0][0][3]*im_width))
      newImage = np.copy(image_np)
      newImage = cv2.rectangle(newImage, (xmin, ymax), (xmax, ymin), (255, 0, 157), 3)
      # Add labels to boxes
      #newImage = cv2.putText(newImage, label, (xmin, ymax-5), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (153, 255, 255), 5, cv2.LINE_AA)

      # Export bounding boxes to drive
      with open('/content/drive/My Drive/fall19_smithsonian_informatics/train/aves_det_crops_1000.tsv', 'a') as out_file:
                  tsv_writer = csv.writer(out_file, delimiter='\t')
                  crop_width = xmax-xmin
                  crop_height = ymax-ymin
                  tsv_writer.writerow([image_url, im_height, im_width, 
                            xmin, ymin, xmax, ymax])
      return newImage

In [0]:
# Use URLs from EOL image URL bundles
# Comment out to use either 1000 or 20000 image bundles
#1000 images
urls = 'https://editors.eol.org/other_files/bundle_images/files/images_for_Aves_breakdown_download_000001.txt'
#20000 images
#urls = 'https://editors.eol.org/other_files/bundle_images/files/images_for_Aves_20K_breakdown_download_000001.txt'

df = pd.read_csv(urls)
df.columns = ["link"]
pd.DataFrame.head(df)

In [0]:
# Prepare cropping coordinates output file
# cd to train/
%cd drive/My Drive/fall19_smithsonian_informatics/train

# Write header row of output cropping coordinates file
with open('/content/drive/My Drive/fall19_smithsonian_informatics/train/aves_det_crops_1000.tsv', 'a') as out_file:
                  tsv_writer = csv.writer(out_file, delimiter='\t')
                  tsv_writer.writerow(["image_url", "im_height", "im_width", 
                            "xmin", "ymin", "xmax", "ymax"])

In [0]:
# Run images through model for object detection
from PIL import Image

# Set number of seconds to timeout if image url taking too long to open
import socket
socket.setdefaulttimeout(10)

# Loops through first 5 image urls from the text file
for i, row in df.head(5).itertuples(index=True, name='Pandas'):

# For ranges of rows or all rows, use df.iloc
# Can be useful if running detection in batches
#for i, row in df.iloc[:5000].iterrows():

  try:
    # Load in image
    image_url = df.get_value(i, "link")
    image_np = url_to_image(image_url)
    image_np_expanded = np.expand_dims(image_np, axis=0)
    # Record inference time
    start_time = time.time()
    # Detection and draw boxes on image
    show_inference(image_np_expanded)
    end_time = time.time()
    # Display progress message after each image
    print('Detection complete in {} of 1000 images'.format(i+1))
    
    # Plot and show detection boxes on images
    # If running detection on >50 images, comment out this portion
    #_, ax = plt.subplots(figsize=(10, 10))
    #ax.imshow(show_inference(image_np_expanded))
    #plt.title('{}) Inference time: {}'.format(i+1, format(end_time-start_time, '.2f')))

  except:
    print('Check if URL from {} is valid'.format(image_url))

### Get inference info for test images to compare object detection model times for YOLO, SSD, and Faster-RCNN

In [0]:
urls = 'https://editors.eol.org/other_files/bundle_images/files/images_for_Aves_breakdown_download_000001.txt'
df = pd.read_csv(urls)
df.columns = ["link"]
pd.DataFrame.head(df)

In [0]:
from PIL import Image
import os

# For exporting inference times
inf_time = []
img_urls = []
im_dims = []

# Loops through first 5 image urls from the text file
for i, row in df.head(145).itertuples(index=True, name='Pandas'):
#for i, row in df.head(5).itertuples(index=True, name='Pandas'):
  
  try:
    # Use YOLO for object detection  
    image_url = df.get_value(i, "link")
    image_path = download_and_convert_image(image_url)
    image = Image.open(image_path)
    image_np = load_image_into_numpy_array(image)
    image_np_expanded = np.expand_dims(image_np, axis=0)
    # Record inference time
    start_time = time.time()
    # Actual detection
    show_inference(image_np_expanded)
    end_time = time.time()
    # Display progress message after each image
    print('Detection complete in {} of 145 test images'.format(i+1))
    
    # Record inference time, image name and image dimensions to export
    inf_time.append(end_time-start_time)
    img_urls.append(image_url)
    im_dims.append(image_np.shape)
  
  except:
    print('Error: check if web address {} is valid'.format(image_url))
    
inf_times = pd.DataFrame(([inf_time, img_urls, im_dims]))
inf_times = inf_times.transpose()
inf_times.to_csv("aves_inference_times_rcnn.csv", index=False, header=("time (sec)", "filepath", "image_dims (h, w, d)"))
print(inf_times.head())