## Imports and Setup

In [None]:
# This Colab requires TF 2.5.
!pip install -U tensorflow>=2.5

In [None]:
import os
import pathlib

import matplotlib
import matplotlib.pyplot as plt

import io
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from six.moves.urllib.request import urlopen

import tensorflow as tf
import tensorflow_hub as hub
import cv2

tf.get_logger().setLevel('ERROR')

## Utilities

Run the following cell to create some utils that will be needed later:

- Helper method to load an image
- Map of Model Name to TF Hub handle
- List of tuples with Human Keypoints for the COCO 2017 dataset. This is needed for models with keypoints.

In [None]:
# @title Run this!!

def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: the file path to the image

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  image = None
  if(path.startswith('http')):
    response = urlopen(path)
    image_data = response.read()
    image_data = BytesIO(image_data)
    image = Image.open(image_data)
  else:
    image_data = tf.io.gfile.GFile(path, 'rb').read()
    image = Image.open(BytesIO(image_data))

  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (1, im_height, im_width, 3)).astype(np.uint8)


ALL_MODELS = {
'CenterNet HourGlass104 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512/1',
'CenterNet HourGlass104 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1',
'CenterNet HourGlass104 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024/1',
'CenterNet HourGlass104 Keypoints 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024_kpts/1',
'CenterNet Resnet50 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1',
'CenterNet Resnet50 V1 FPN Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512_kpts/1',
'CenterNet Resnet101 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet101v1_fpn_512x512/1',
'CenterNet Resnet50 V2 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512/1',
'CenterNet Resnet50 V2 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512_kpts/1',
'EfficientDet D0 512x512' : 'https://tfhub.dev/tensorflow/efficientdet/d0/1',
'EfficientDet D1 640x640' : 'https://tfhub.dev/tensorflow/efficientdet/d1/1',
'EfficientDet D2 768x768' : 'https://tfhub.dev/tensorflow/efficientdet/d2/1',
'EfficientDet D3 896x896' : 'https://tfhub.dev/tensorflow/efficientdet/d3/1',
'EfficientDet D4 1024x1024' : 'https://tfhub.dev/tensorflow/efficientdet/d4/1',
'EfficientDet D5 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d5/1',
'EfficientDet D6 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d6/1',
'EfficientDet D7 1536x1536' : 'https://tfhub.dev/tensorflow/efficientdet/d7/1',
'SSD MobileNet v2 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2',
'SSD MobileNet V1 FPN 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v1/fpn_640x640/1',
'SSD MobileNet V2 FPNLite 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1',
'SSD MobileNet V2 FPNLite 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_640x640/1',
'SSD ResNet50 V1 FPN 640x640 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_640x640/1',
'SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_1024x1024/1',
'SSD ResNet101 V1 FPN 640x640 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_640x640/1',
'SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_1024x1024/1',
'SSD ResNet152 V1 FPN 640x640 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_640x640/1',
'SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_1024x1024/1',
'Faster R-CNN ResNet50 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1',
'Faster R-CNN ResNet50 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_1024x1024/1',
'Faster R-CNN ResNet50 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_800x1333/1',
'Faster R-CNN ResNet101 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_640x640/1',
'Faster R-CNN ResNet101 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_1024x1024/1',
'Faster R-CNN ResNet101 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_800x1333/1',
'Faster R-CNN ResNet152 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_640x640/1',
'Faster R-CNN ResNet152 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_1024x1024/1',
'Faster R-CNN ResNet152 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_800x1333/1',
'Faster R-CNN Inception ResNet V2 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1',
'Faster R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_1024x1024/1',
'Mask R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1'
}

IMAGES_FOR_TEST = {
  'Beach' : 'models/research/object_detection/test_images/image2.jpg',
  'Dogs' : 'models/research/object_detection/test_images/image1.jpg',
  # By Heiko Gorski, Source: https://commons.wikimedia.org/wiki/File:Naxos_Taverna.jpg
  'Naxos Taverna' : 'https://upload.wikimedia.org/wikipedia/commons/6/60/Naxos_Taverna.jpg',
  # Source: https://commons.wikimedia.org/wiki/File:The_Coleoptera_of_the_British_islands_(Plate_125)_(8592917784).jpg
  'Beatles' : 'https://upload.wikimedia.org/wikipedia/commons/1/1b/The_Coleoptera_of_the_British_islands_%28Plate_125%29_%288592917784%29.jpg',
  # By Américo Toledano, Source: https://commons.wikimedia.org/wiki/File:Biblioteca_Maim%C3%B3nides,_Campus_Universitario_de_Rabanales_007.jpg
  'Phones' : 'https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg/1024px-Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg',
  # Source: https://commons.wikimedia.org/wiki/File:The_smaller_British_birds_(8053836633).jpg
  'Birds' : 'https://upload.wikimedia.org/wikipedia/commons/0/09/The_smaller_British_birds_%288053836633%29.jpg',
  'veh1' : 'models/research/object_detection/test_images/veh1.jpg',
  'twov1' : 'models/research/object_detection/test_images/twov1.jpg',
  'twocars' :'/content/models/research/object_detection/test_images/twocars.jpg',
  'carbus' :'/content/models/research/object_detection/test_images/carbus.jpg',

}

COCO17_HUMAN_POSE_KEYPOINTS = [(0, 1),
 (0, 2),
 (1, 3),
 (2, 4),
 (0, 5),
 (0, 6),
 (5, 7),
 (7, 9),
 (6, 8),
 (8, 10),
 (5, 6),
 (5, 11),
 (6, 12),
 (11, 12),
 (11, 13),
 (13, 15),
 (12, 14),
 (14, 16)]

## Installating the Object Detection API

In [None]:
# Clone the tensorflow models repository
!git clone --depth 1 https://github.com/tensorflow/models

In [None]:
%%bash
sudo apt install -y protobuf-compiler
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .


Import the dependencies we will need later

In [None]:
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops

%matplotlib inline

### Load label map data

Label maps correspond index numbers to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine.

We are going, for simplicity, to load from the repository that we loaded the Object Detection API code

In [None]:
PATH_TO_LABELS = './models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

In [None]:
category_index

## Build a detection model and load pre-trained model weights

Here we will choose which Object Detection model we will use.
Select the architecture and it will be loaded automatically.
If you want to change the model to try other architectures later, just change the next cell and execute following ones.

**Tip:** if you want to read more details about the selected model, you can follow the link (model handle) and read additional documentation on TF Hub. After you select a model, we will print the handle to make it easier.

In [None]:
#@title Model Selection { display-mode: "form", run: "auto" }
model_display_name = 'SSD MobileNet V2 FPNLite 320x320' # @param ['CenterNet HourGlass104 512x512','CenterNet HourGlass104 Keypoints 512x512','CenterNet HourGlass104 1024x1024','CenterNet HourGlass104 Keypoints 1024x1024','CenterNet Resnet50 V1 FPN 512x512','CenterNet Resnet50 V1 FPN Keypoints 512x512','CenterNet Resnet101 V1 FPN 512x512','CenterNet Resnet50 V2 512x512','CenterNet Resnet50 V2 Keypoints 512x512','EfficientDet D0 512x512','EfficientDet D1 640x640','EfficientDet D2 768x768','EfficientDet D3 896x896','EfficientDet D4 1024x1024','EfficientDet D5 1280x1280','EfficientDet D6 1280x1280','EfficientDet D7 1536x1536','SSD MobileNet v2 320x320','SSD MobileNet V1 FPN 640x640','SSD MobileNet V2 FPNLite 320x320','SSD MobileNet V2 FPNLite 640x640','SSD ResNet50 V1 FPN 640x640 (RetinaNet50)','SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)','SSD ResNet101 V1 FPN 640x640 (RetinaNet101)','SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)','SSD ResNet152 V1 FPN 640x640 (RetinaNet152)','SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)','Faster R-CNN ResNet50 V1 640x640','Faster R-CNN ResNet50 V1 1024x1024','Faster R-CNN ResNet50 V1 800x1333','Faster R-CNN ResNet101 V1 640x640','Faster R-CNN ResNet101 V1 1024x1024','Faster R-CNN ResNet101 V1 800x1333','Faster R-CNN ResNet152 V1 640x640','Faster R-CNN ResNet152 V1 1024x1024','Faster R-CNN ResNet152 V1 800x1333','Faster R-CNN Inception ResNet V2 640x640','Faster R-CNN Inception ResNet V2 1024x1024','Mask R-CNN Inception ResNet V2 1024x1024']
model_handle = ALL_MODELS[model_display_name]
print('Selected model:'+ model_display_name)
print('Model Handle at TensorFlow Hub: {}'.format(model_handle))

## Loading the selected model from TensorFlow Hub

Here we just need the model handle that was selected and use the Tensorflow Hub library to load it to memory.


In [None]:
print('loading model...')
hub_model = hub.load(model_handle)
print('model loaded!')

## Loading an image

Let's try the model on a simple image. To help with this, we provide a list of test images.

Here are some simple things to try out if you are curious:
* Try running inference on your own images, just upload them to colab and load the same way it's done in the cell below.
* Modify some of the input images and see if detection still works.  Some simple things to try out here include flipping the image horizontally, or converting to grayscale (note that we still expect the input image to have 3 channels).

**Be careful:** when using images with an alpha channel, the model expect 3 channels images and the alpha will count as a 4th.



In [None]:
#@title Image Selection (don't forget to execute the cell!) { display-mode: "form"}
selected_image = "twocars" #@param ["Beach", "Dogs", "Naxos Taverna", "Beatles", "Phones", "Birds", "veh1", "twov1", "twocars", "carbus"]
flip_image_horizontally = False #@param {type:"boolean"}
convert_image_to_grayscale = False #@param {type:"boolean"}

image_path = IMAGES_FOR_TEST[selected_image]
image_np = load_image_into_numpy_array(image_path)

# Flip horizontally
if(flip_image_horizontally):
  image_np[0] = np.fliplr(image_np[0]).copy()

# Convert image to grayscale
if(convert_image_to_grayscale):
  image_np[0] = np.tile(
    np.mean(image_np[0], 2, keepdims=True), (1, 1, 3)).astype(np.uint8)

#plt.figure(figsize=(24,32))
plt.figure(figsize=(10,10))
plt.imshow(image_np[0])
plt.show()

In [None]:
# running inference
results = hub_model(image_np)
result = {key:value.numpy() for key,value in results.items()}
print(result.keys())

In [None]:
confidence_threshold = 0.4
selected_predictions = result['detection_scores'] >= confidence_threshold

In [None]:
selected_predictions

In [None]:
unqiue,count = np.unique(selected_predictions[0],return_counts=True)

In [None]:
print(unqiue,count)

In [None]:
result['detection_classes'][selected_predictions]

In [None]:
category_index[result['detection_classes'][selected_predictions][0]]

In [None]:
category_index[result['detection_classes'][selected_predictions][1]]

In [None]:
category_index[result['detection_classes'][selected_predictions][2]]

In [None]:
 dic = category_index[result['detection_classes'][selected_predictions][0]]

In [None]:
dic['name']

In [None]:
result['detection_classes']

In [None]:
result['detection_classes'][0]

In [None]:
result['detection_boxes'][0][1]

In [None]:
int(result['detection_boxes'].shape[0])

In [None]:
selected_predictions[0][0]

In [None]:
lst =[2,3,4,5,6,7,8]
boxlst =[]
size = result['detection_classes'].shape[1]
for num in range(size) :
  #if result['detection_classes'][0][num] in lst :
  if selected_predictions[0][num] and result['detection_classes'][0][num] in lst:
    boxlst.append(result['detection_boxes'][0][num])   

In [None]:
boxlst

In [None]:
boxlst[0][3]

In [None]:
detection_boxes = result['detection_boxes'][0]

In [None]:
selected_prediction_boxes = result['detection_boxes'][selected_predictions]

In [None]:
selected_prediction_boxes

In [None]:
  img_h, img_w = image_np.shape[1:3]

In [None]:
for i in range(selected_prediction_boxes.shape[0]):
  selected_prediction_boxes[i,0] *= img_h #ymin * img_w
  selected_prediction_boxes[i,1] *= img_w #xmin * img_h
  selected_prediction_boxes[i,2] *= img_h #ymax * img_w
  selected_prediction_boxes[i,3] *= img_w #xmax * img_h

In [None]:
selected_prediction_boxes= selected_prediction_boxes.astype(int)

In [None]:
selected_prediction_boxes

In [None]:
box = selected_prediction_boxes[0] 

In [None]:
box

In [None]:
org = (0,img_h-10)
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1
      # Red color in BGR
      # Line thickness of 2 px
thickness = 1
color = (0, 0, 255)
cv2.rectangle(image_np[0], (box[1], box[0]), (box[3], box[2]), (0,255,0), 2)
      #cv2.rectangle(img, (xmin,ymin), (xmax, ymax), (0,255,0), 2)
cv2.putText(image_np[0], 'Hello', org, font, fontScale, color, thickness, cv2.LINE_AA, False)
img = cv2.cvtColor(image_np[0], cv2.COLOR_BGR2RGB)

In [None]:
image_np[0].shape

In [None]:
plt.imshow(image_np[0])

In [None]:
selected_prediction_boxes

In [None]:
    detection_boxes = result['detection_boxes'][0]
    selected_prediction_boxes = result['detection_boxes'][selected_predictions]
    #De-normalize box co-ordinates (multiply x-coordinates by image width and y-coords by image height)
    img_h, img_w = image_np.shape[1:3]

    for i in range(selected_prediction_boxes.shape[0]):
        
        selected_prediction_boxes[i,0] *= img_h #ymin * img_w
        selected_prediction_boxes[i,1] *= img_w #xmin * img_h
        selected_prediction_boxes[i,2] *= img_h #ymax * img_w
        selected_prediction_boxes[i,3] *= img_w #xmax * img_h
    
    #Make all co-ordinates as integer
    selected_prediction_boxes= selected_prediction_boxes.astype(int)

    lst =[2,3,4,5,6,7,8]
    boxlst =[]
    size = result['detection_classes'].shape[1]
    for num in range(size) :
      #if result['detection_classes'][0][num] in lst :
      if selected_predictions[0][num] and result['detection_classes'][0][num] in lst:
        boxlst.append(selected_prediction_boxes[num]) 

In [None]:
boxlst

In [None]:
      box = selected_prediction_boxes[i]      

      org = (0,img_h-10)
      font = cv2.FONT_HERSHEY_SIMPLEX
      fontScale = 1
      # Red color in BGR
      # Line thickness of 2 px
      thickness = 1
      color = (0, 0, 255)
      frame = cv2.rectangle(img, (box[1], box[0]), (box[3], box[2]), (0,255,0), 2)
      frame = cv2.putText(img, text, org, font, fontScale, color, thickness, cv2.LINE_AA, False)

In [None]:
    cv2.rectangle(img, (xmin,ymin), (xmax, ymax), (0,255,0), 2)
    #cv2.rectangle(img, (int(xmin),int(ymin)), (int(xmax), int(ymax)), (0,255,0), 2)
    #Add text
    cv2.putText(img,label,(int(xmin), int(ymin)-5),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

In [None]:
def NoofVehicles(arr) :
  vehicleCount=0
  #for item in result['detection_classes'][selected_predictions] :
  for item in arr :
    dic = category_index[int(item)]
    #print(dic['id'])
    """
      Lets search for below objects in the result and count it
      2: {'id': 2, 'name': 'bicycle'},
      3: {'id': 3, 'name': 'car'},
      4: {'id': 4, 'name': 'motorcycle'},
      5: {'id': 5, 'name': 'airplane'},
      6: {'id': 6, 'name': 'bus'},
      7: {'id': 7, 'name': 'train'},
      8: {'id': 8, 'name': 'truck'},
    """
    lst =[2,3,4,5,6,7,8]
    if dic['id'] in lst :
      vehicleCount = vehicleCount +  1   
  
  return vehicleCount

In [None]:
print(NoofVehicles(result['detection_classes'][selected_predictions]))

# Read video to count vehicles

In [None]:
videopath ='/content/models/research/object_detection/test_images/cars.mp4'

In [None]:
save_path = '/content/models/research/object_detection/test_images/vedioimages/output.webm'

In [None]:
import cv2

# Logic building

In [None]:
#Load video
#capture = cv2.VideoCapture(videopath)
#Get the first frame
hasFrame, frame = capture.read()

img_h = frame.shape[0]
img_w = frame.shape[1]

#if save_path given, initialize video writer
if save_path:
  _fourcc = cv2.VideoWriter_fourcc(*'VP90')
  _out = cv2.VideoWriter(save_path, _fourcc, 25, (img_w,img_h))

In [None]:
_fourcc

In [None]:
img = frame
frame = tf.reshape(frame, [1, img_h, img_w, 3])
results = hub_model(frame)

In [None]:
result = {key:value.numpy() for key,value in results.items()}

In [None]:
confidence_threshold = 0.4
selected_predictions = result['detection_scores'] >= confidence_threshold

In [None]:
result['detection_classes'][selected_predictions]

In [None]:
vehCount = NoofVehicles(result['detection_classes'][selected_predictions])
print(vehCount)

In [None]:
detection_boxes = result['detection_boxes'][0]
selected_prediction_boxes = result['detection_boxes'][selected_predictions]
#De-normalize box co-ordinates (multiply x-coordinates by image width and y-coords by image height)
img_h, img_w = img.shape[0:2]

for i in range(selected_prediction_boxes.shape[0]):
  selected_prediction_boxes[i,0] *= img_h #ymin * img_w
  selected_prediction_boxes[i,1] *= img_w #xmin * img_h
  selected_prediction_boxes[i,2] *= img_h #ymax * img_w
  selected_prediction_boxes[i,3] *= img_w #xmax * img_h
    
#Make all co-ordinates as integer
selected_prediction_boxes= selected_prediction_boxes.astype(int)

lst =[2,3,4,5,6,7,8]
boxlst =[]
size = result['detection_classes'].shape[1]
for num in range(size) :
  #if result['detection_classes'][0][num] in lst :
  if selected_predictions[0][num] and result['detection_classes'][0][num] in lst:
    boxlst.append(selected_prediction_boxes[num]) 

In [None]:
org = (0,img_h-10)
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.2
      # Red color in BGR
      # Line thickness of 2 px
thickness = 1
color = (0, 0, 255)
#cv2.rectangle(img, (boxlst[0][1], boxlst[0][0]), (boxlst[0][3], boxlst[0][2]), (0,255,0), 2)
cv2.rectangle(img, (boxlst[0][1], boxlst[0][0]), (boxlst[0][3], boxlst[0][2]), (0,255,0), 2)
      #cv2.rectangle(img, (xmin,ymin), (xmax, ymax), (0,255,0), 2)
cv2.putText(img, 'Veh Count' + str(vehCount), org, font, fontScale, color, thickness, cv2.LINE_AA, False)
#img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)

In [None]:
_out.write(img)

In [None]:
capture.release()
#Close the output video file
if save_path:
  _out.release()

# Check No fo Frames

In [None]:
#Load video
capture = cv2.VideoCapture(videopath)
#Get the first frame
hasFrame, frame = capture.read()
counter1=0
img_h = frame.shape[0]
img_w = frame.shape[1]

#if save_path given, initialize video writer
if save_path:
  _fourcc = cv2.VideoWriter_fourcc(*'VP90')
  _out = cv2.VideoWriter(save_path, _fourcc, 60, (img_w,img_h))

while hasFrame :
  try :
    #frame =np.array(frame.getdata()).reshape((1, im_height, im_width, 3)).astype(np.uint8)
    counter1= counter1+1
    hasFrame, frame = capture.read()
  except Exception as e:
    print(e)
    hasFrame, frame = capture.read()

#cv2.destroyAllWindows()
capture.release()
#Close the output video file
if save_path:
  _out.release()

print("No of frames in Video-",counter1)

# Read vedio and count vehicles

1st File

In [None]:
videopath ='/content/models/research/object_detection/test_images/video2.mp4'
#save_path = '/content/models/research/object_detection/test_images/vedioimages/output2.webm'
save_path = '/content/models/research/object_detection/test_images/vedioimages/video2output.mp4'

2nd File 

In [None]:
videopath ='/content/models/research/object_detection/test_images/cars.mp4'
#save_path = '/content/models/research/object_detection/test_images/vedioimages/output2.webm'
save_path = '/content/models/research/object_detection/test_images/vedioimages/carsoutput.mp4'

In [None]:
#Load video
capture = cv2.VideoCapture(videopath)
#Get the first frame
hasFrame, frame = capture.read()

img_h = frame.shape[0]
img_w = frame.shape[1]

#if save_path given, initialize video writer
if save_path:
  #_fourcc = cv2.VideoWriter_fourcc(*'VP90')
  _fourcc = cv2.VideoWriter_fourcc(*'MP4V')
  _out = cv2.VideoWriter(save_path, _fourcc, 25, (img_w,img_h))

while hasFrame :
  try :
    #frame =np.array(frame.getdata()).reshape((1, im_height, im_width, 3)).astype(np.uint8)
    img = frame
    frame = tf.reshape(frame, [1, img_h, img_w, 3])
    results = hub_model(frame)
    result = {key:value.numpy() for key,value in results.items()}
    confidence_threshold = 0.4
    selected_predictions = result['detection_scores'] >= confidence_threshold
    vehCount = NoofVehicles(result['detection_classes'][selected_predictions])
    #Write no of veicle count in the image at the left-bottom corner
    text = "Veh No" + str(vehCount)

    detection_boxes = result['detection_boxes'][0]
    selected_prediction_boxes = result['detection_boxes'][selected_predictions]
    #De-normalize box co-ordinates (multiply x-coordinates by image width and y-coords by image height)
    img_h, img_w = img.shape[0:2]
    
    for i in range(selected_prediction_boxes.shape[0]):
      selected_prediction_boxes[i,0] *= img_h #ymin * img_w
      selected_prediction_boxes[i,1] *= img_w #xmin * img_h
      selected_prediction_boxes[i,2] *= img_h #ymax * img_w
      selected_prediction_boxes[i,3] *= img_w #xmax * img_h
    
    #Make all co-ordinates as integer
    selected_prediction_boxes= selected_prediction_boxes.astype(int)

    lst =[2,3,4,5,6,7,8] # Classes for vehicles
    boxlst =[]
    size = result['detection_classes'].shape[1]
    for num in range(size) :
      if selected_predictions[0][num] and result['detection_classes'][0][num] in lst:
        boxlst.append(selected_prediction_boxes[num])

    if len(boxlst) > 0 :
      for box in boxlst :
        org = (0,img_h-10)
        font = cv2.FONT_HERSHEY_SIMPLEX
        fontScale = 1
        # Red color in BGR
        # Line thickness of 2 px
        thickness = 1
        color = (0, 0, 255)
        cv2.rectangle(img, (box[1], box[0]), (box[3], box[2]), (0,255,0), 2)
        #cv2.rectangle(img, (xmin,ymin), (xmax, ymax), (0,255,0), 2)
        cv2.putText(img, text, org, font, fontScale, color, thickness, cv2.LINE_AA, False)

      #frame = cv2.rectangle(img, (box[1], box[0]), (box[3], box[2]), (0,255,0), 2)
      #frame = cv2.putText(img, text, org, font, fontScale, color, thickness, cv2.LINE_AA, False)

      #Save the output file
    if save_path:
      #_out.write(frame)
      _out.write(img)
    #Read the next frame
    hasFrame, frame = capture.read()
  except Exception as e:
    print(e)
    hasFrame, frame = capture.read()

#cv2.destroyAllWindows()
capture.release()
#Close the output video file
if save_path:
  _out.release()

In [None]:
from IPython.display import HTML
from base64 import b64encode

In [None]:
video_file = open('/content/models/research/object_detection/test_images/vedioimages/output.webm','rb').read()
data_url = "data:video/webm;base64," + b64encode(video_file).decode()

In [None]:
HTML("""
<video controls>
      <source src="%s" type="video/webm">
</video>
""" % data_url)