<a href="https://colab.research.google.com/github/brytlao/Practical_computer_vision/blob/master/CHAPTER_07_Segmentation_and_Tracking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Chapter 07: Segmentation and Tracking**

Discussion and examples on segmentation and tracking.

# **Libraries required**

In [1]:
# image processing: opencv and numpy
import cv2
import numpy as np
import math

# deep learning: keras and tensorflow
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow.keras.layers as tfkl
print(tf.__version__)

# helper libraries
import matplotlib.pyplot as plt

# pretrained models
from tensorflow.keras.applications.vgg16 import VGG16

# tf issue
tf.enable_eager_execution()


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

1.13.1


# **Segmentation**

**Challenges in segmentation**

*   noisy boundaries
*   cluttered scene

**Implementation of segmentation: FCN**

In [0]:
# create fcn32 model
def create_model_fcn32(num_class,input_w=256):
  """
  create FCN-32s model for segmentation
  input:
    num_class: number of detection categories
    input_w: input width, using square image
  returns created model
  """ 
  # using VGG with pretrained weights
  vgg_model = VGG16(include_top=False,
                    weights='imagenet',
                    input_shape=(input_w,input_w,3))
  
  # network extension
  last_model_shape = vgg_model.output_shape[1:]
  ext_model = tf.keras.Sequential([
      tfkl.Conv2D(4096,kernel_size=(7,7),use_bias=False,
            activation='relu',padding='same',
            input_shape=last_model_shape),
      tfkl.Dropout(0.5),
      tfkl.Conv2D(4096,kernel_size=(1,1),use_bias=False,
            activation='relu',padding='same'),
      tfkl.Dropout(0.5),
      tfkl.Conv2D(num_class,kernel_size=(1,1),use_bias=False,
            padding='same'),
      tfkl.Conv2DTranspose(num_class,
                           kernel_size=(64,64),
                           strides=(32,32),
                           use_bias=False,
                           padding='same'),
      tfkl.Activation(tf.nn.softmax)
  ])
  
  # combine models
  model = tf.keras.Sequential([vgg_model,ext_model])
      
  # compile model
  model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
  
  # model summary
  vgg_model.summary()
  ext_model.summary()
  
  return model

In [3]:
# create model for 21 classes
model = create_model_fcn32(21)

Instructions for updating:
Colocations handled automatically by placer.


W0508 14:44:04.767608 140677185722240 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py:642: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.


Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


W0508 14:44:07.521167 140677185722240 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py:143: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 256, 256, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 64, 64, 128)       0         
__________

# **Tracking**

**Challenges in tracking**

*   object occlusion
*   fast movement
*   change of shape
*   false positives

**Demo of tracking: Deep SORT**

based on: https://github.com/ZQPei/deep_sort_pytorch

In [0]:
# install dependencies
import os
from os.path import exists, join, basename

project_name = "deep_sort_pytorch"
if not exists(project_name):
  # clone and install
  !git clone -q --recursive https://github.com/ZQPei/deep_sort_pytorch.git
  
import sys
sys.path.append(project_name)
sys.path.append(join(project_name, 'YOLO3'))

import IPython
from IPython.display import clear_output

In [0]:
# download pretrained weights
if not exists('yolov3.weights'):
  !wget -q https://pjreddie.com/media/files/yolov3.weights
    
if not exists('ckpt.t7'):
  file_id = '1_qwTWdzT9dWNudpusgKavj_4elGgbkUN'
  !curl -Lb ./cookie "https://drive.google.com/uc?export=download&id=$file_id" -o ckpt.t7

In [6]:
# initialize model
import cv2
import time

from YOLO3 import YOLO3
from deep_sort import DeepSort
from util import draw_bboxes

yolo3 = YOLO3("deep_sort_pytorch/YOLO3/cfg/yolo_v3.cfg","yolov3.weights","deep_sort_pytorch/YOLO3/cfg/coco.names", is_xywh=True)
deepsort = DeepSort("ckpt.t7")

Loading weights from yolov3.weights... Done!
Loading weights from ckpt.t7... Done!


In [7]:
# download source video
VIDEO_URL = 'https://motchallenge.net/movies/MOT16-06.mp4'
DURATION_S = 10  # process only the first 10 seconds

video_file_name = 'video.mp4'
if not exists(video_file_name):
  !wget -q $VIDEO_URL
  downloaded_file_name = basename(VIDEO_URL)
  # change duration and name of file
  !ffmpeg -y -loglevel info -t $DURATION_S -i $downloaded_file_name $video_file_name

def show_video(file_name, width=640, height=480):
  import io
  import base64
  from IPython.display import HTML
  video_encoded = base64.b64encode(io.open(file_name, 'rb').read())
  return HTML(data='''<video width="{0}" height="{1}" alt="test" controls>
                      <source src="data:video/mp4;base64,{2}" type="video/mp4" />
                      </video>'''.format(width, height, video_encoded.decode('ascii')))

clear_output()
video = show_video('video.mp4')
video

In [0]:
# track pedestrians in video
video_capture = cv2.VideoCapture()
if video_capture.open('video.mp4'):
  width, height = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
  fps = video_capture.get(cv2.CAP_PROP_FPS)
  !rm -f output.mp4 output.avi
  # can't write out mp4, so try to write into an AVI file
  video_writer = cv2.VideoWriter("output.avi", cv2.VideoWriter_fourcc(*'MJPG'), fps, (width, height))
  while video_capture.isOpened():
    ret, frame = video_capture.read()
    if not ret:
      break
      
    start = time.time()
    xmin, ymin, xmax, ymax = 0, 0, width, height
    im = frame[ymin:ymax, xmin:xmax, (2,1,0)]
    bbox_xywh, cls_conf, cls_ids = yolo3(im)
    if bbox_xywh is not None:
        mask = cls_ids==0
        bbox_xywh = bbox_xywh[mask]
        bbox_xywh[:,3] *= 1.2
        cls_conf = cls_conf[mask]
        outputs = deepsort.update(bbox_xywh, cls_conf, im)
        if len(outputs) > 0:
            bbox_xyxy = outputs[:,:4]
            identities = outputs[:,-1]
            frame = draw_bboxes(frame, bbox_xyxy, identities, offset=(xmin,ymin))

    end = time.time()
    print("time: {}s, fps: {}".format(end-start, 1/(end-start)))
            
    video_writer.write(frame)
  video_capture.release()
  video_writer.release()
  
  # convert AVI to MP4
  !ffmpeg -y -loglevel info -i output.avi output.mp4
else:
  print("can't open the given input video file!")

In [9]:
# visualize results
show_video('output.mp4', width=640, height=480)