<h1>Splitting Video Section</h1>

You need to provide the following:
<ol>
    <li>BUCKET_NAME</li>
    <li>VIDEO_NAME</li>
</ol>

In [None]:
import boto3, os
s3 = boto3.client('s3')

#S3 Bucket directory and video name
BUCKET_NAME = "csm.calpolydxhub" #INPUT S3 BUCKET NAME
VIDEO_NAME = "scooter_Footage_pico.mp4"#INPUT VIDEO NAME
VIDEO_DIR = "video/"+VIDEO_NAME

In [None]:
#Download the video to this directory
video = s3.download_file(BUCKET_NAME, VIDEO_DIR, VIDEO_NAME)

In [None]:
#Make folder "images" in local jupyter notebook
try:
    os.mkdir("images")
except OSError:
    print ("Creation of the directory failed")

<br/>
<h5>Split The Images From The Video</h5>

Split the video into <u>1 Image</u> per second here. <br/>
Images will be stored localy in the local jupyter environment.

In [None]:
import cv2
import math

cap = cv2.VideoCapture(VIDEO_NAME)
# frameRate = cap.get(5) #get frame rate of video
frameRate = 30 #get frame rate of video
Frames_Per_Second = .5
print("FrameRate: ",frameRate)

In [None]:
x=1
while(cap.isOpened()):
    frameId = cap.get(1) #current frame number
    ret, frame = cap.read()
    if (ret != True):
        break
    if (frameId % math.floor(frameRate//Frames_Per_Second) == 0):
        filename = "./images/" +  '{0:05d}'.format(int(x)) + ".jpg";
        x+=1
        cv2.imwrite(filename, frame)
print(x, " images")
cap.release()

<h5>Upload Images To S3</h5>

In [None]:
#Folder name to create and upload the images to in S3
s3_imageFolder = "images" #INPUT S3 FOLDER NAME

In [None]:
#Maximum quantity of images that each sub-folder can contain
images_per_folder = 1000

In [None]:
#The following function is used to upload directory to s3 via boto.
def uploadDirectory(path,bucketname,s3_imageFolder):
    image_count = 0
    for root,dirs,files in os.walk(path):
        for file in sorted(files):
            images_subFolder = str(image_count//images_per_folder)
            image_count +=1
            s3.upload_file(os.path.join(root,file),bucketname, '{}/{}/{}'.format(s3_imageFolder,images_subFolder,file))

In [None]:
#Upload the images to s3
uploadDirectory("images",BUCKET_NAME,s3_imageFolder)

<h5>Clear Video And Images</h5>
<p>Remove downloaded video and images. <b>NOTE: </b>Run this before attempting to split a new video.</p>

In [None]:
import shutil
shutil.rmtree("images")
os.remove(VIDEO_NAME)

<br/>
<h1>Install Dependencies</h1>

<p>Install the required dependecies required to start using retinanet.</p>

<br/>
<h5>Cloning And Install retina-net</h5>

In [None]:
#install retina-net
!git clone https://github.com/fizyr/keras-retinanet.git

<br/>
<p>Installing Retina-net</p>
<p><b>NOTE:</b> you have to run this command everytime you restart the server.</p>

In [None]:
!cd keras-retinanet && pip install . --user

<br/>
<h5>Install other dependecies</h5>

In [None]:
!pip install numpy scipy h5py
!pip install scikit-learn Pillow imutils
!pip install beautifulsoup4
!pip install tensorflow-gpu==1.14
# !pip install tensorflow==1.14
!pip install keras==2.3.1
!pip install opencv-contrib-python
!pip install tensorboard
!pip install wget
!pip install --user git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI

<

<br/>
<h1>Building The Dataset</h1>
<p>When building a new dataset, you need to run the "Clear Images And Annotations" step first, then "Setting Up Images And Annotations" and finally "Creating The train.csv And test.csv".</p>

You need to provide the following:
<ol>
    <li>BUCKET_NAME</li>
    <li>S3_IMAGESPATH</li>
    <li>S3_ANNOTATIONSPATH</li>
</ol>

In [None]:
BUCKET_NAME = "csm.calpolydxhub" #INPUT S3 BUCKET NAME
S3_IMAGESPATH = "images3/" #INPUT S3 PATH TO IMAGES USED FOR THIS TRAINING JOB
S3_ANNOTATIONSPATH = "output3/" #INPUT S3 PATH TO ANNOTATIONS USED FOR THIS TRAINING JOB

<br/>
<h5>Create the directory structure for this project and helper files</h5>

<p>Build the directories for the project</p>
<p><b>NOTE: </b>You don't need to run this step again when building a new dataset.</p>

In [None]:
import os
os.mkdir("config")
os.mkdir("dataset")
os.mkdir("dataset/annotations")
os.mkdir("dataset/images")
os.mkdir("dataset/predictions")
os.mkdir("models")
os.mkdir("snapshots")
os.mkdir("tensorboard")
os.mkdir("video")

<p>Create the python file responsible for providing the paths to all the files</p>

In [None]:
file = open("config/esri_retinanet_config.py","w") 

file.write("# import the necessary packages\n") 
file.write("import os\n") 
file.write("# initialize the base path for the logos dataset\n")
file.write("BASE_PATH = 'dataset'\n")
file.write("# build the path to the annotations and input images\n")
file.write("ANNOT_PATH = os.path.sep.join([BASE_PATH, 'annotations'])\n")
file.write("IMAGES_PATH = os.path.sep.join([BASE_PATH, 'images'])\n")
file.write("# degine the training/testing split\n")
file.write("TRAIN_TEST_SPLIT = 0.75\n")
file.write("#  build the path to the output training and test .csv files\n")
file.write("TRAIN_CSV = os.path.sep.join([BASE_PATH, 'train.csv'])\n")
file.write("TEST_CSV = os.path.sep.join([BASE_PATH, 'test.csv'])\n")
file.write("# build the path to the output classes CSV files\n")
file.write("CLASSES_CSV = os.path.sep.join([BASE_PATH, 'classes.csv'])\n")
file.write("# build the path to the output predictions dir\n")
file.write("OUTPUT_DIR = os.path.sep.join([BASE_PATH, 'predictions'])\n")

file.close() 

file = open("config/__init__.py","w")
file.close() 

<br/>
<h5>Setting Up Images And Annotations</h5>

<p>Set up the function used to download contents of a directory from S3.</p>
<p>This function will be used to download the images used for training from S3.</p>

In [None]:
import boto3, os
import errno
s3 = boto3.client('s3')

def download_dir(path, target, bucket):
    """
    Downloads recursively the given S3 path to the target directory.
    :param path: The S3 directory to download.
    :param target: the local directory to download the files to.
    :param bucket: the name of the bucket to download from
    """

    # Handle missing / at end of prefix
    if not path.endswith('/'):
        path += '/'

    paginator = s3.get_paginator('list_objects_v2')
    for result in paginator.paginate(Bucket=bucket, Prefix=path):
        # Download each file individually
        for key in result['Contents']:
            # Calculate relative path
            rel_path = key['Key'][len(path):]
            # Skip paths ending in /
            if not key['Key'].endswith('/'):
                local_file_path = os.path.join(target, rel_path)
                # Make sure directories exist
                local_file_dir = os.path.dirname(local_file_path)
                s3.download_file(bucket, key['Key'], local_file_path)

In [None]:
#Download the images from S3
download_dir(S3_IMAGESPATH,"dataset/images/",BUCKET_NAME)

<p><b>NOTE: </b>After downloading the images, go into the folder containing those images (dataset/images/) and manually delete the .manifest file.</p>
<br/>

<p>From S3, the following step will get the manifest file containing the data from the labeling job that corresponds with the images downloaded in the previous step</p>

In [None]:
s3_resource = boto3.resource('s3')
s3_outputman= s3_resource.Object(BUCKET_NAME, S3_ANNOTATIONSPATH+"output.manifest")

<br/>
<p>We will need to convert the manifest file into one xml file for each image. This is required so that each image is paired with a file that contains its annotation info.</p>

<p>For the creation of the xml files to be accomplished, you will need to provide the name of the labeling job from ground truth.</p>

In [None]:
labeling_job_name = "SMC-labeling-intersection1-2"

In [None]:
#Convert the manifest file into a list of json objects
import json
from io import StringIO

manifest_raw= s3_outputman.get()['Body'].read().decode('utf-8')
manifest_list = manifest_raw.split("\n")

for i in range(len(manifest_list)-1):
    manifest_list[i] = json.load(StringIO(manifest_list[i])) 

In [None]:
#Make the XML files in the following formate

# <?xml version="1.0"?>
# <annotation>
#     <filename>00001.jpg</filename>
#     <source>
#         <annotation>ArcGIS Pro 2.1</annotation>
#     </source>
#     <size>
#         <width>1280</width>
#         <height>720</height>
#         <depth>3</depth>
#     </size>
#     <object>
#         <name>1</name>
#         <bndbox>
#             <xmin>46.67</xmin>
#             <ymin>41.29</ymin>
#             <xmax>57.79</xmax>
#             <ymax>52.40</ymax>
#         </bndbox>
#     </object>
# </annotation>

for i in range(len(manifest_list)-1):
    filename = manifest_list[i]['source-ref'].split('/')[-1]
    width = manifest_list[i][labeling_job_name]['image_size'][0]['width']
    height = manifest_list[i][labeling_job_name]['image_size'][0]['height']
    depth = manifest_list[i][labeling_job_name]['image_size'][0]['depth']
    labels = manifest_list[i][labeling_job_name]['annotations']
    labels_num = len(labels)

    xml_file = open("dataset/annotations/"+filename[0:-3]+"xml","w")
    xml_file.write('<?xml version="1.0"?>\n')
    xml_file.write('<annotation>\n')
    xml_file.write('<filename>'+filename+'</filename>\n')
    xml_file.write('<source>\n')
    xml_file.write('<annotation>SMC-Scooter-Vision</annotation>\n')
    xml_file.write('</source>\n')
    xml_file.write('<size>\n')
    xml_file.write('<width>'+str(width)+'</width>\n')
    xml_file.write('<height>'+str(height)+'</height>\n')
    xml_file.write('<depth>'+str(depth)+'</depth>\n')
    xml_file.write('</size>\n')

    for i in range(labels_num):
        label_type = labels[i]['class_id']
        label_width = labels[i]['width']
        label_top = labels[i]['top']
        label_height = labels[i]['height']
        label_left = labels[i]['left']
        xml_file.write('<object>\n')
        xml_file.write('<name>'+str(label_type)+'</name>\n')
        xml_file.write('<bndbox>\n')
        xml_file.write('<xmin>'+str(label_left)+'</xmin>\n')
        xml_file.write('<ymin>'+str(label_top)+'</ymin>\n')
        xml_file.write('<xmax>'+str(label_left+label_width)+'</xmax>\n')
        xml_file.write('<ymax>'+str(label_top+label_height)+'</ymax>\n')
        xml_file.write('</bndbox>\n')
        xml_file.write('</object>\n')
    xml_file.write('</annotation>\n')

    xml_file.close()

<br/>
<h5>Creating The train.csv And test.csv</h5>

In [None]:
# import the necessary packages
!pip install imutils
from config import config
from bs4 import BeautifulSoup
from imutils import paths
import argparse
import random
import os

In [None]:
annot_path = config.ANNOT_PATH
images_path = config.IMAGES_PATH
train_csv = config.TRAIN_CSV
test_csv = config.TEST_CSV
classes_csv = config.CLASSES_CSV
train_test_split = config.TRAIN_TEST_SPLIT

In [None]:
# grab all image paths then construct the training and testing split
from os import listdir
from os.path import isfile, join
imagePaths = [f for f in listdir(images_path) if isfile(join(images_path, f))]
# random.shuffle(imagePaths)
i = int(len(imagePaths) * train_test_split)
trainImagePaths = imagePaths[:i]
testImagePaths = imagePaths[i:]

# create the list of datasets to build
dataset = [ ("train", trainImagePaths, train_csv),
            ("test", testImagePaths, test_csv)]

# initialize the set of classes we have
CLASSES = set()


In [None]:
# loop over the datasets
for (dType, imagePaths, outputCSV) in dataset:
    # load the contents
    print ("[INFO] creating '{}' set...".format(dType))
    print ("[INFO] {} total images in '{}' set".format(len(imagePaths), dType))

    # open the output CSV file
    csv = open(outputCSV, "w")

    # loop over the image paths
    for imagePath in imagePaths:
        # build the corresponding annotation path
        fname = imagePath.split(os.path.sep)[0]
        fname = "{}.xml".format(fname[:fname.rfind(".")])
        annotPath = os.path.sep.join([annot_path, fname])

        # load the contents of the annotation file and buid the soup
        contents = open(annotPath).read()
        soup = BeautifulSoup(contents, "html.parser")

        # extract the image dimensions
        w = int(soup.find("width").string)
        h = int(soup.find("height").string)
        
        # loop over all object elements
        for o in soup.find_all("object"):
            #extract the label and bounding box coordinates
            label = o.find("name").string
            xMin = int(float(o.find("xmin").string))
            yMin = int(float(o.find("ymin").string))
            xMax = int(float(o.find("xmax").string))
            yMax = int(float(o.find("ymax").string))

            # truncate any bounding box coordinates that fall outside
            # the boundaries of the image
            xMin = max(0, xMin)
            yMin = max(0, yMin)
            xMax = min(w, xMax)
            yMax = min(h, yMax)

            # ignore the bounding boxes where the minimum values are larger
            # than the maximum values and vice-versa due to annotation errors
            if xMin >= xMax or yMin >= yMax:
                continue
            elif xMax <= xMin or yMax <= yMin:
                continue

            # write the image path, bb coordinates, label to the output CSV
            row = [os.path.abspath('dataset/images'+'/'+imagePath),str(xMin), str(yMin), str(xMax),str(yMax), str(label)]
            csv.write("{}\n".format(",".join(row)))

            # update the set of unique class labels
            CLASSES.add(label)

    # close the CSV file
    csv.close()

In [None]:
# write the classes to file
print("[INFO] writing classes...")
csv = open(classes_csv, "w")
rows = [",".join([c, str(i)]) for (i,c) in enumerate(CLASSES)]
csv.write("\n".join(rows))
csv.close()

<br/>
<h5>Clear Images And Annotations</h5>
<p><b>NOTE: </b>Run this before attempting to build a new dataset</p>

In [None]:
import shutil
shutil.rmtree("dataset/images")
shutil.rmtree("dataset/annotations")
os.mkdir("dataset/images")
os.mkdir("dataset/annotations")
os.remove("dataset/train.csv")
os.remove("dataset/test.csv")
os.remove("dataset/classes.csv")

<br/>
<h1>Training</h1>

<h5>Setting up for training</h5>

<p>Download a pre-trained model to use as backbone</p>
<p><b>NOTE:</b> Don't run this if you already have a base model. A base model exists in the github repository under models, download it then upload it to the model folder created in the previous steps</p>

In [None]:
import wget
url = 'https://github.com/fizyr/keras-retinanet/releases/download/0.5.1/resnet50_coco_best_v2.1.0.h5'
wget.download(url, 'models/resnet50_coco_best_v2.1.0.h5')

<p>Set number of steps and epochs</p>

In [None]:
rows = !wc -l dataset/train.csv
rows = int(rows[0].split()[0])
batch_size = 4;
steps = rows//batch_size
epochs = 40

<p>Begin training</p>
<p><b>NOTE: </b>Don't forget to use the previous model if doing more training.</p>

In [None]:
import os
# previousModel = "resnet50_coco_best_v2.1.0.h5"
previousModel = "model_12_20_2019.h5"
trainingNumber = len(next(os.walk('snapshots'))[1])-1

In [None]:
!retinanet-train --weights models/$previousModel \
--batch-size $batch_size --steps $steps --epochs $epochs \
--snapshot-path snapshots/$trainingNumber --tensorboard-dir tensorboard/$trainingNumber \
csv dataset/train.csv dataset/classes.csv

<h5>Analyze the trained model with tensorboard</h5>

In [None]:
latest_tensorboard = trainingNumber - 1;
!tensorboard --logdir=tensorboard/$latest_tensorboard

<br/>
<h1>Convert Model To An Infrence Model</h1>

<h5>Convert the model</h5>

In [None]:
import datetime

# getting current date and time
today = datetime.datetime.today()
new_modelName = 'model_'+str(today.month)+'_'+str(today.day)+'_'+str(today.year)+'.h5'
print('model name: ',new_modelName)

In [None]:
#Get the latest snapshot
import glob
latest_snapshotFolder = trainingNumber - 1;
list_of_snapshot = glob.glob('snapshots/'+str(latest_snapshotFolder)+'/*.h5') # * means all if need specific format then *.csv
latest_snapshot = max(list_of_snapshot, key=os.path.getctime)

#Start training
!retinanet-convert-model $latest_snapshot models/$new_modelName

<h5>Evaluate the model</h5>

In [None]:
modelName = 'model_2_17_2020.h5' #Name of the model to evaluate
!retinanet-evaluate csv dataset/train.csv dataset/classes.csv models/$modelName

<br/>
<h1>Infrence</h1>

<p><b>NOTE: </b>If you have a problem with dependencies here, go back and run the "Install Retina-net" step again. Make sure to only install and not clone again.</p>

<h5>Load necessary modules</h5>

In [None]:
# show images inline
%matplotlib inline

# automatically reload modules when they have changed
%load_ext autoreload
%autoreload 2

# import keras
import keras

# import keras_retinanet
from keras_retinanet import models
from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from keras_retinanet.utils.visualization import draw_box, draw_caption
from keras_retinanet.utils.colors import label_color
from keras_retinanet.utils.gpu import setup_gpu

# import miscellaneous modules
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import time

# use this to change which GPU to use
gpu = 0

# set the modified tf session as backend in keras
setup_gpu(gpu)

In [None]:
model_path = os.path.join('models', 'model_2_17_2020.h5')

# load retinanet model
model = models.load_model(model_path, backbone_name='resnet50')

labels_to_names = {0: 'sidewalk', 1: 'street'}

<h5>Run predicition on an image</h5>

<p><b>Green</b> labels represnets scooters on the street and <b>Red</b> labels are for scooters on the sidewalk</p>

In [None]:
def predict(imagePath, saveDest, displayImage):
    image = read_image_bgr(imagePath)
    imageName = imagePath.split('/')[-1]
    boxLabeles = []

    # copy to draw on
    draw = image.copy()
    draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

    # preprocess image for network
    image = preprocess_image(image)
    image, scale = resize_image(image)

    # process image
    start = time.time()
    boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
    if(displayImage):
        print("processing time: ", time.time() - start)

    # correct for image scale
    boxes /= scale

    scooters_num = 0
    # visualize detections
    for box, score, label in zip(boxes[0], scores[0], labels[0]):
        # scores are sorted so we can break
        if score < 0.20:
            break
            
        scooters_num +=1
        if(displayImage):
            print(labels_to_names[label],": ",score)        
        
        #+5 changes the color
        #RED: SIDEWALK
        #GREEN: Street
        color = label_color(label+5)

        b = box.astype(int)
        draw_box(draw, b, color=color)  
        draw_box(draw, (1050,250,1120,400), color=color)  
        boxLabeles.append((b[0], b[1], b[2]-b[0], b[3]-b[1]))
    
        caption = "{} {:.3f}".format(labels_to_names[label], score)
        draw_caption(draw, b, caption)

    if(displayImage):
        plt.figure(figsize=(20, 20))
        plt.axis('off')
        plt.imshow(draw)
        plt.show()
    cv2.imwrite(saveDest+'/'+imageName, cv2.cvtColor(draw, cv2.COLOR_BGR2RGB))
    print(scooters_num)
 
    return boxLabeles

In [None]:
# load image
# imageName = 'dataset/images/00132.jpg'
# imageName = 'dataset/images/00715'
imageName = '2.jpg'
predict(imageName, 'dataset/predictions', True)

<br/>
<h1>Sidewalk Detection</h1>

<p>Import dependencies</p>

In [None]:
!pip install fastai

In [None]:
from fastai.vision.models import *
from fastai.vision.learner import *
from fastai.vision import *
from fastai.vision import Image

<p>Test Dataset</p>

In [None]:
path=Path('camvid_tiny') #setup path
path_lbl = path/'labels'
path_img = path/'images'
fnames = get_image_files(path_img)
fnames[:3]

In [None]:
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))

<p>Test Labels</p>

In [None]:
lbl_names = get_image_files(path_lbl)
lbl_names[:3]

In [None]:
get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)

In [None]:
src_size = np.array(mask.shape[1:])   #mask data
src_size,mask.data
codes = np.loadtxt(path/'codes.txt', dtype=str)

<p>Create Dataloader</p>

In [None]:
data = (SegmentationItemList.from_folder(path_img)
        .split_by_rand_pct()
        .label_from_func(get_y_fn, classes=codes)
        .transform(get_transforms(), tfm_y=True, size=256)
        .databunch(bs=2, path=path)
        .normalize(imagenet_stats))

In [None]:
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):     #defining accuracy
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [None]:
learn=unet_learner(data,models.resnet34, metrics=acc_camvid) #creating the architecture with imagenet weights
learn.data.single_ds.tfmargs['size'] = None

<h5>Inference</h5>

In [None]:
learn.load('stage-2')   #Loading the trained Model in this case it is stage-2 modek which is stored under camvid-tiny/models
def infer(x):
    #returrns [ROW][COL]
    start = time.time()
    img = open_image(x)
    c=learn.predict(img)
    print(c[0])
    print(c[1][0][0][0])
    c[0].show()
    print("processing time: ", time.time() - start)
    return c[1][0]

In [None]:
table = infer('2.jpg')

<h5>Functions</h5>

In [None]:
def sidewalkOverlap(sidewalkTable, bbox):
    #sidewalkTable e.g. [ROW][COL]
    # bbox. e.g. (730, 170, 20, 40)
    #returns "street" or "sidewalk"
    
    #Percent of area to check for overlap with sidewalk starting from the bottom of the bounding box
    percentArea = .25
    
    #What percentage of scooter pixels in the checked area should determine a sidewalk overlap
    overlapThreshold = .20
    
    startX = bbox[0] - 1
    startY = bbox[1] - 1
    endX = bbox[0]+bbox[2] -1
    endY = bbox[1]+bbox[3] -1
    
    #How many columns are being checked
    colQty = endX-startX
    
    #Shortened sidewalk table
    table = sidewalkTable[startY:endY]
    
    #Area of the portion used to calculate overlap
    area = colQty*(endY-startY)*percentArea
    
    rowCounter = 0
    sidewalkCounter = 0
    for index in range(len(table)):
        rowCounter+=1
        row = table[-(index+1)]
        newRow = row[startX:endX]
    
        for col in newRow:
            if col == 19:
                sidewalkCounter+=1
            
        if(rowCounter >= colQty*percentArea):
            break
    
    percentOverlap = sidewalkCounter/area
#     print(area)
#     print(sidewalkCounter)
#     print(percentOverlap)

    if(percentOverlap>=overlapThreshold):
        return "sidewalk"
    return "street"
    

In [None]:
sidewalkOverlap(table,(1050,250,1120,400))

<br/>
<h1>Label A Video</h1>

<b>NOTE:</b> This section does not involve any tracking and counting of scooters. It's only here to test the detection model.

<p>Download the video you want labeled from s3.</p>

In [None]:
import boto3, os
s3 = boto3.client('s3')

BUCKET_NAME = "csm.calpolydxhub" #INPUT S3 BUCKET NAME
VIDEO_NAME = "scooter_Footage_pico.mp4"#INPUT VIDEO NAME
VIDEO_DIR = "video/filtered videos/"+VIDEO_NAME

video = s3.download_file(BUCKET_NAME, VIDEO_DIR, VIDEO_NAME)

<br/>
<p>Specify the name of the video to be labeled and what it should be named.</p>

In [None]:
video_path = 'video/scooter_Footage_pico2.mp4'
labeledVideo_path = 'video/labeledVideo2.mp4'

In [None]:
cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*'XVID')

frameRate = cap.get(5) #get frame rate of video
totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
ret, frame = cap.read()
height, width, layers = frame.shape
size = (width,height)
out = cv2.VideoWriter(labeledVideo_path,fourcc, frameRate, size)

frame_number = 0

while (cap.isOpened()) :
    ret,frame = cap.read()
    if ret != True:
        break;
        
    frame_number += 1
    
    if frame_number % 2 == 1:
        new_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        # preprocess image for network
        network_frame = preprocess_image(new_frame)
        network_frame, scale = resize_image(new_frame)

        boxes, scores, labels = model.predict_on_batch(np.expand_dims(network_frame, axis=0))

        # correct for image scale
        boxes /= scale

        # visualize detections
        for box, score, label in zip(boxes[0], scores[0], labels[0]):
            # scores are sorted so we can break
            if score < 0.70:
                break
            #+5 changes the color
            #RED: SIDEWALK
            #GREEN: Street
            color = label_color(label+5)

            b = box.astype(int)
            draw_box(new_frame, b, color=color)

            caption = "{} {:.3f}".format(labels_to_names[label], score)
            draw_caption(new_frame, b, caption)

        out.write(cv2.cvtColor(new_frame, cv2.COLOR_BGR2RGB))
        print("\r", "{}/{} frames completed".format(frame_number, totalFrames), end="")
        
cap.release()
out.release()
cv2.destroyAllWindows()


<br/>
<h1>Count Scooters</h1>

<p>This section will utilize the ML model to count the number of scooters on/off sidewalk from a submitted video</p>

<br/>
<h1>Reduce Video Length</h1>

In [None]:
import boto3, os
s3 = boto3.client('s3')

#S3 Bucket directory and video name
BUCKET_NAME = "csm.calpolydxhub" #INPUT S3 BUCKET NAME
VIDEO_NAME = "broadway-3rd.mpg"#INPUT VIDEO NAME
VIDEO_DIR = "video/20190919/"+VIDEO_NAME

In [None]:
#Download the video to this directory
video = s3.download_file(BUCKET_NAME, VIDEO_DIR, VIDEO_NAME)

In [None]:
import math
cap = cv2.VideoCapture(VIDEO_NAME)

frameRate = cap.get(5) #get frame rate of video
Frames_Per_Second = .5
totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

PADDING = round(frameRate)* 2 #two seconds before and after the detection

frame_number = 0
write_range = 0
write_threshold = PADDING
newFrames_list = [1]
while (cap.isOpened()):
    ret,frame = cap.read()
    if ret != True:
        break;
        
    frame_number += 1
    scooter_num = 0
    write_range -= 1
    if ( (frame_number % math.floor(frameRate//Frames_Per_Second) == 0) and (write_range <= 0) ):
        new_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        # preprocess image for network
        network_frame = preprocess_image(new_frame)
        network_frame, scale = resize_image(new_frame)

        boxes, scores, labels = model.predict_on_batch(np.expand_dims(network_frame, axis=0))

        # correct for image scale
        boxes /= scale

        # visualize detections
        for box, score, label in zip(boxes[0], scores[0], labels[0]):
            # scores are sorted so we can break
            if score < 0.50:
                break
            scooter_num += 1;
        if(scooter_num != 0):
            write_range = PADDING
            for i in range(-1*write_threshold, write_threshold+1):
                frame_num = frame_number + i
                if(frame_num <= 0):
                    continue
                if(not newFrames_list[-1] >= frame_num):
                    newFrames_list.append(frame_num)
    print("\r", "{}/{} frames completed".format(frame_number, totalFrames), end="")    

cap.release()
cv2.destroyAllWindows()

In [None]:
print( "Video will be reduced to ",(len(newFrames_list)/totalFrames)*100, " of the original length")

In [None]:
cap = cv2.VideoCapture(VIDEO_NAME)
fourcc = cv2.VideoWriter_fourcc(*'XMPG') #refrence this doc: http://www.fourcc.org/codecs.php if using a different video format

frameRate = cap.get(5) #get frame rate of video
frameCount = len(newFrames_list)
ret, frame = cap.read()
height, width, layers = frame.shape
size = (width,height)
out = cv2.VideoWriter("MLREDUCED"+VIDEO_NAME,fourcc, frameRate, size)

frame_number = 0
current_frame = 0
while (cap.isOpened()):
    ret,frame = cap.read()
    if ret != True:
        break;
    frame_number += 1
    
    if frame_number == newFrames_list[current_frame]:
        current_frame +=1
        out.write(frame)
    print("\r", "{}/{} frames completed".format(current_frame, frameCount), end="") 
        
cap.release()
out.release()
cv2.destroyAllWindows()

<br/>
<h1>TRACKING</h1>

<h5>Initialize helper functions and classes</h5>

In [None]:
import cv2
import sys
import imutils
from PIL import Image as PImage

scooters = []
sidewalkTable = None
counter_street = 0
counter_sidewalk = 0
class Tracker:
    Id = None
    tracker = None
    box=[]
    status=False
    labelType = None
    correctionByDetector = False
    attemptedToRemove = False
    
    def __init__(self, frame, bbox):
        global counter_street
        global counter_sidewalk 
        self.Id = counter_street
        self.tracker = cv2.TrackerCSRT_create()
        self.box = bbox;
        self.status = self.tracker.init(frame, bbox)
        self.labelType = sidewalkOverlap(sidewalkTable, bbox)
        if(self.labelType == "street"):
            counter_street +=1
        else:
            counter_sidewalk +=1
        self.correctionByDetector = True
        
    def updateTracking(self, frame):
        self.status, self.box = self.tracker.update(frame)
        self.correctionByDetector = False
        if not self.status:
            self.attemptedToRemove = True
            self.removeSelf() 
            
    def reinitialze(self, frame, bbox):
        global counter_street
        global counter_sidewalk
        self.correctionByDetector = True
        self.tracker = cv2.TrackerCSRT_create()
        self.box = bbox;
        self.status = self.tracker.init(frame, bbox)
        newLabel = sidewalkOverlap(sidewalkTable, bbox)   
        #If scooter switches from street to sidewalk, change its count to sidewalk
        if(self.labelType != newLabel):
            if(self.labelType == "street"):
                counter_street -=1
                counter_sidewalk +=1
        self.labelType = newLabel
            
    def removeSelf(self):
        if (self.attemptedToRemove):
            scooters.remove(self)#remove current instance from objects list
        else:
            self.attemptedToRemove = True
            
    def writeFrame(self, frame):
        if self.status:
            p1 = (int(self.box[0]), int(self.box[1]))
            p2 = (int(self.box[0] + self.box[2]), int(self.box[1] + self.box[3]))
            if(self.labelType == 'street'):
                cv2.rectangle(frame, p1, p2, (255,0,0), 2, 1)
            else:
                cv2.rectangle(frame, p1, p2, (255,255,0), 2, 1)


In [None]:
HIGH_CONFIDENCE_THRESHOLD=.95
LOW_CONFIDENCE_THRESHOLD=.2

def inferSidewalk(frame):
    #returrns [ROW][COL]
    new_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    pil_img = PImage.fromarray(new_frame.astype('uint8'), 'RGB')
    pil_img = pil2tensor(pil_img,np.float32)
    inf_img = Image(pil_img.div_(255))
    c=learn.predict(inf_img)
    return c[1][0]
    
def detector(frame):
    highConfidenceBoxes=[]
    lowConfidenceBoxes=[]
    new_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    # preprocess image for network
    network_frame = preprocess_image(new_frame)
    network_frame, scale = resize_image(new_frame)
    boxes, scores, labels = model.predict_on_batch(np.expand_dims(network_frame, axis=0))

    boxes /= scale
    detectorHash={}
    # visualize detections
    for box, score, label in zip(boxes[0], scores[0], labels[0]):
        if score<LOW_CONFIDENCE_THRESHOLD:
            break
        bx = box.astype(int)
        bbox = (bx[0], bx[1], bx[2]-bx[0], bx[3]-bx[1])
        tup= (bbox,labels_to_names[label],score)
        
        if score>=HIGH_CONFIDENCE_THRESHOLD:
            highConfidenceBoxes.append(tup)
            color = label_color(label+5)
            draw_box(frame, bx, color=color)
        else:
            if len(highConfidenceBoxes)==0:
                lowConfidenceBoxes.append(tup)   
            else:
                doesOverlap = False
                for HCbox in highConfidenceBoxes:
                    if overlap(HCbox[0],tup[0], False) >=.8: #Filter overlaped boxes
                        doesOverlap = True
                        break
                if not doesOverlap:
                    color = label_color(label+14)
                    draw_box(frame, bx, color=color)
                    lowConfidenceBoxes.append(tup)
    
    detectorHash['high_confidence']=highConfidenceBoxes
    detectorHash['low_confidence']=highConfidenceBoxes+lowConfidenceBoxes
    return detectorHash

def overlap(i, j, isTracker):
    #box1
    bb1_x1 = i[0]
    bb1_x2 = i[0] + i[2]
    bb1_y1 = i[1]
    bb1_y2 = i[1] + i[3]
    
    #box2
    if isTracker:
        bb2_x1 = j.box[0]
        bb2_x2 = j.box[0] + j.box[2]
        bb2_y1 = j.box[1]
        bb2_y2 = j.box[1] + j.box[3]
    else:
        bb2_x1 = j[0]
        bb2_x2 = j[0] + j[2]
        bb2_y1 = j[1]
        bb2_y2 = j[1] + j[3]
        
    if bb1_x1 >= bb1_x2 or bb1_y1 >= bb1_y2 or bb2_x1 >= bb2_x2 or bb2_y1 >= bb2_y2:
        return -1
    
  
    # determine the coordinates of the intersection rectangle
    x_left = max(bb1_x1, bb2_x1)
    y_top = max(bb1_y1, bb2_y1)
    x_right = min(bb1_x2, bb2_x2)
    y_bottom = min(bb1_y2, bb2_y2)

    if x_right < x_left or y_bottom < y_top:
        return 0.0
    
    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left+1) * (y_bottom - y_top+1)

    # compute the area of both AABBs
    bb1_area = (bb1_x2 - bb1_x1+1) * (bb1_y2 - bb1_y1+1)
    bb2_area = (bb2_x2 - bb2_x1+1) * (bb2_y2 - bb2_y1+1)

#     iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    if intersection_area == bb2_area:
        return 1
    
    overlap = intersection_area/bb1_area
    return overlap

def filterbox(frame,detectorHash):
    highConfDetectorBoxes = detectorHash['high_confidence']
    lowConfDetectorBoxes = detectorHash['low_confidence']
    
    #checks to see if the overlap is significant
    if len(scooters) == 0:
        for i in highConfDetectorBoxes:
            scooters.append( Tracker( frame, i[0]) )
    else:
        for i in lowConfDetectorBoxes:
            referenced = False #has this box been referenced to an existing tracker

            for j in scooters:
                if j.correctionByDetector:
                    continue
                if overlap(i[0], j, True)>0.5:
                    j.reinitialze(frame, i[0])
                    referenced = True
                    break

            if not(referenced) and (i[2]>=HIGH_CONFIDENCE_THRESHOLD):
                scooters.append( Tracker( frame, i[0] ) )
                
                
        for j in scooters: #remove scooter trackers that weren't detected by the scooter
            if j.correctionByDetector:
                continue
            j.removeSelf()
                
                    

<h5>Run program</h5>

In [None]:
#Insert video name below
VIDEO_ADDRESS= 'video/scooter_Footage_pico.mp4'
# VIDEO_ADDRESS= 'broadway-3rd.mpg'

#Insert output video name below
OUTPUTVIDEO_ADDRESS= 'video/intersection3_cut_TRACKED.mp4'

video = cv2.VideoCapture(VIDEO_ADDRESS)

if not video.isOpened():
    print ("Could not open video")
    sys.exit()

frameRate = video.get(5) #get frame rate of video
ret, frame = video.read()
height, width, layers = frame.shape
size = (width,height)
totalFramesInVideo = int(video.get(cv2.CAP_PROP_FRAME_COUNT))


#Initialize output video
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter(OUTPUTVIDEO_ADDRESS,fourcc, frameRate, size)

# Exit if video not opened.


In [None]:
#The number of frames to run the detector after
detectorRoutine = 30
sidewalkDetectionRoutine = 5000

while (video.isOpened()):
    frameId = video.get(1) #current frame number
    print("\r", "{}/{} frames completed".format(frameId, totalFramesInVideo), end="")  
    
    # Read a new frame
    ok, frame = video.read()
    
    if not ok:
        break

    timer = cv2.getTickCount()
    
    # get updated location of objects in subsequent frames
    for scooter in scooters:
        scooter.updateTracking(frame)

    #run sidewalk detector on routine
    if(frameId-1) % sidewalkDetectionRoutine == 0:
        sidewalkTable = inferSidewalk(frame)
        
    #run the detector on routine or when an object is lost
    if (frameId-1) % detectorRoutine == 0:
        bboxes = detector(frame); #get new bounding boxes and label types as tuples from detector model
        filterbox(frame, bboxes); #filter any overlaps in the objects on the frame
        

    fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
    
    for scooter in scooters:
        scooter.writeFrame(frame)
    
    # Display tracker type on frame
    cv2.putText(frame,"Street Count: " + str(int(counter_street)), (100,20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0),2);
    cv2.putText(frame,"Sidewalk Count: " + str(int(counter_sidewalk)), (100,50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0),2);
    cv2.putText(frame, "FPS : " + str(int(fps)), (100,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255,255,255), 2);
    
    output_video.write(frame)
#     if(frameId > 7000):
#         break;
        

    
video.release()
output_video.release()
cv2.destroyAllWindows()