# Import Libraries

In [None]:
%pip install tensorflow
%pip install tensorflow_hub
%pip install Js2Py

In [None]:
import os
import pathlib
import cv2
import PIL

import matplotlib
import matplotlib.pyplot as plt

import io
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont

import tensorflow as tf
import tensorflow_hub as hub

from IPython.display import display, Javascript
from js2py import eval_js
from base64 import b64decode, b64encode
import html
import time

# Install TensorFlow Object Detection API

In [None]:

%%bash
# Clone the tensorflow models repository
git clone --depth 1 https://github.com/tensorflow/models

# API Installation
sudo apt install -y protobuf-compiler
cd models/research
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [None]:
%cd models/research
# Test the installation.
''' If the output shows, [OK] then we're good to go'''
!python object_detection/builders/model_builder_tf2_test.py

In [None]:
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops

%matplotlib inline

In [None]:
%pip install protobuf
!sudo apt-get update
!sudo apt-get install protobuf-compiler
!protoc object_detection/protos/*.proto --python_out=.

# Get video

In [None]:

# Run the function, get the video path as saved in your notebook, and play it back here.
from IPython.display import HTML
from base64 import b64encode

video_width = 300

# Capture video and save
video_path = "video.mp4"

# Play captured video
video_file = open(video_path, "r+b").read()
video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
HTML(f"""""")

# Detector Function

In [None]:
def run_inference_for_single_image(model, image, live_cam):

    # convert image into numpy
    image = np.asarray(image)
    #print('Converted image into numpy type:', type(image))

    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    #print('Converted numpy into tensor format:', input_tensor)

    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis,...]

    # Run inference
    if not live_cam:
      start_time = time.time()
      output_dict = model(input_tensor)
      end_time = time.time()
      print(f"Inference time: {np.ceil(end_time-start_time)} seconds per frame")

    output_dict = model(input_tensor)
    num_detections = int(output_dict.pop('num_detections')) # 300

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.

    output_dict = {key: value[0, :num_detections].numpy()
                   for key, value in output_dict.items()}

    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)

    return output_dict

def run_inference_video(model, video_path, live_cam):
  cap = cv2.VideoCapture(video_path)
  if cap.isOpened():
      width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
      height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
      res=(int(width), int(height))

      # save detected video
      # Initialize our video writer
      fourcc = cv2.VideoWriter_fourcc(*"XVID") #codec
      out = cv2.VideoWriter('../../detected_output.avi', fourcc, 20.0, res)
      frame = None

      while True:
          try:
              is_success, image_np = cap.read()
          except cv2.error:
              continue

          if not is_success:
              break

          # Actual detection.
          output_dict = run_inference_for_single_image(model, image_np, live_cam)

          # Visualization of the results of a detection.
          viz_utils.visualize_boxes_and_labels_on_image_array(
              image_np,
              output_dict['detection_boxes'],
              output_dict['detection_classes'],
              output_dict['detection_scores'],
              category_index,
              instance_masks=output_dict.get('detection_masks_reframed', None),
              use_normalized_coordinates=True,
              line_thickness=8)

          out.write(image_np)

      out.release()

      # OPTIONAL: show last image
      if frame:
        cv2_imshow(frame)

  cap.release()

def run_inference_in_anaconda(model):
  """ Function for Inferencing live video in anaconda environment """
  cap = cv2.VideoCapture(0)
  while cap.isOpened():
      ret, image_np = cap.read()

      # Actual detection.
      output_dict = run_inference_for_single_image(model, image_np, None)

      # Visualization of the results of a detection.
      viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np,
          output_dict['detection_boxes'],
          output_dict['detection_classes'],
          output_dict['detection_scores'],
          category_index,
          instance_masks=output_dict.get('detection_masks_reframed', None),
          use_normalized_coordinates=True,
          line_thickness=8)

      cv2.imshow('object_detection', cv2.resize(image_np, (800, 600)))

      if cv2.waitKey(1) == ord('q'):
          writer.write(image_np)
          writer.release()
          cap.release()
          cv2.destroyAllWindows()
          break

In [None]:
PATH_TO_LABELS = 'object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

model_handle = 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1'

print('loading model...')
hub_model = hub.load(model_handle)
print('model loaded!')
