### Import Necessary Packages

In [1]:
import numpy as np
import argparse
import os

import cv2
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
from PIL import Image
import tensorflow as tf
from matplotlib.animation import FuncAnimation
from tensorflow import keras
from tensorflow.keras.applications.inception_v3 import (
    decode_predictions,
    preprocess_input,
)
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image

### Write All Frames From Video

In [1]:
def write_frames(folder, input_video):
    """take in a folder and a video and then write all the frames into it. folder must already exist

        Parameters
        ----------
        folder : str
            existing folder path to save frames to
        input_video : str
            path to saved input fideo to turn into frames

        Returns
        -------
        None

    """
    i = 0
    outfile_prefix = input_video[0 : input_video.find(".")] + "_"
    vs = cv2.VideoCapture(os.path.join(folder, input_video))
    while(True):
        # read the next frame from the file
        grabbed, frame = vs.read()
        # if the frame was not grabbed, then we have reached the end
        if not grabbed:
            break
        if i % 10 == 0:
            outfile_name = outfile_prefix + str(i) + ".jpg"
            cv2.imwrite(os.path.join(folder, outfile_name), frame)
        i += 1

In [5]:
path = 'C:\\Users\\user\\chair_model\\vids'
for filenames in os.listdir(path):
    write_frames(path, filenames)

In [None]:
write_frames('C:\\Users\\user\\Desktop\\videos', 'empty_chair.mp4')

### Take in video and create output video with annotated predictions

In [3]:
# write an annotated output video
# #of predictions must match number of frames in the video
OUTPUT_VIDEO_DIR = 'C:\\Users\\user\\chair_model'

def create_output_video(
    predictions,
    input_vid_location,
    out_location=OUTPUT_VIDEO_DIR,
    frame_jump=10,
    out_name=None,
):
    """take in a video and create an output video with annotated predictions on it

    Parameters
    ----------
    predictions : list
        list of predictions for every frame in the video
    input_video_location : str
        path to saved input fideo to run on
    out_location : str
        path to write the annotated video to
    frame_jump : int
        runt/ write predictions on every nth frame, 1 would be predicting writing to every frame
    out_name : str
        override path and file name to write the video to
    Returns
    -------
    output_video_location: str
        where the annotated video was written to

    """
    # initialize the video stream, pointer to output video file, and
    # frame dimensions
    print("processing video :", input_vid_location)

    if out_name:
        outputFileName = out_name
    else:
        outputFileName = "annotated_" + os.path.basename(input_vid_location)
    output_video_location = os.path.join(out_location, outputFileName)

    vs = cv2.VideoCapture(input_vid_location)

    fps = vs.get(cv2.CAP_PROP_FPS)
    length = int(vs.get(cv2.CAP_PROP_FRAME_COUNT))
    if length != len(predictions):
        print("frames and video length do not match. num frames = ", length)
        return None

    writer = None
    (W, H) = (None, None)
    # loop over frames from the video file stream
    pred_count = 0
    text = "bed score prediction"
    while True:
        # read the next frame from the file
        (grabbed, frame) = vs.read()
        # if the frame was not grabbed, then we have reached the end
        # of the stream
        if not grabbed:
            break
        # if the frame dimensions are empty, grab them
        if W is None or H is None:
            (H, W) = frame.shape[:2]
        output = frame.copy()
        # draw the activity on the output frame

        if (pred_count % frame_jump) == 0:
            text = f"bed score: {predictions[pred_count]}"
            # print("writing prediction ", pred_count, text)
        pred_count += 1
        cv2.putText(output, text, (35, 50), cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 0), 5)
        # check if the video writer is None
        if writer is None:
            # initialize our video writer
            fourcc = cv2.VideoWriter_fourcc(*"MP4V")
            writer = cv2.VideoWriter(output_video_location, fourcc, fps, (W, H), True)

        # write the output frame to disk
        writer.write(output)
        # print("wrote frame")

    # release the file pointers
    print("[INFO] cleaning up...")
    if writer:
        writer.release()
        out = output_video_location
    else:
        out = output_img_location
    vs.release()
    return output_video_location

### Make Images Square

### YOLO Crop Images

In [3]:
# Crop test images to only contain chair

CONFIDENCE = 0.5
SCORE_THRESHOLD = 0.5
IOU_THRESHOLD = 0.4

config_path = 'C:\\Users\\user\\chair_model\\yolov3.cfg'
weights_path = 'C:\\Users\\user\\chair_model\\yolov3.weights'
class_path = 'C:\\Users\\user\\chair_model\\coco.names'

net = cv2.dnn.readNet(weights_path, config_path)

classes = []
with open(class_path, "r") as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))

n = 0
test_image_path = 'E:\\video_unique\\empty'

for filename in os.listdir(test_image_path)[:10]:
    outfile_name = filename[0 : filename.find('.')] + '_crop' + '.jpg'

    img = cv2.imread(os.path.join(test_image_path, filename))
    height, width, channels = img.shape

    blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []

    # Scaling factor used to make bounding box slightly larger to encompass entire object
    scaling_factor = 1.5

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > CONFIDENCE:
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width * scaling_factor)
                h = int(detection[3] * height * scaling_factor)
                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)

    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            
            if label in ['chair']:
                crop_img = img[y:y+h, x:x+w]
                cv2.imwrite(os.path.join(test_image_path, outfile_name), crop_img)
                
            

# cv2.imshow("Image", img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [2]:
# Crop test images to only contain chair

CONFIDENCE = 0.5
SCORE_THRESHOLD = 0.5
IOU_THRESHOLD = 0.4

config_path = 'C:\\Users\\user\\chair_model\\yolov3.cfg'
weights_path = 'C:\\Users\\user\\chair_model\\yolov3.weights'
class_path = 'C:\\Users\\user\\chair_model\\coco.names'

net = cv2.dnn.readNet(weights_path, config_path)

classes = []
with open(class_path, "r") as f:
    classes = [line.strip() for line in f.readlines()]

layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))

In [32]:
test_image_path = 'E:\\video_unique\\empty'

for filename in os.listdir(test_image_path)[:1]:
    outfile_name = filename[0 : filename.find('.')] + '_crop' + '.jpg'

    img = cv2.imread(os.path.join(test_image_path, filename))
    height, width, channels = img.shape

    blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)
    
    class_ids, confidences, boxes = [], [], []
    
    scaling_factor = 1.5

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > CONFIDENCE:
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width * scaling_factor)
                h = int(detection[3] * height * scaling_factor)
                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)
    font = cv2.FONT_HERSHEY_PLAIN

    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            color = colors[i]
            cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
            cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
                            
#             if label in ['chair']:
#                 crop_img = img[y:y+h, x:x+w]
#                 cv2.imwrite(os.path.join(test_image_path, outfile_name), crop_img)

cv2.imshow("Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

### FFMPEG Identify Unique Frames

In [4]:
# FFMPEG tool used to identify unique frames from video samples

!ffmpeg -i E:\\vids\\in_pillow.mp4 -q:v 2 -vf select="eq(pict_type\,PICT_TYPE_I)" \
        -vsync 0 E:\\presort_images\\in_pillow_frame%03d.jpg  

ffmpeg version 4.2.2 Copyright (c) 2000-2019 the FFmpeg developers
  built with gcc 9.2.1 (GCC) 20200122
  configuration: --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libmfx --enable-amf --enable-ffnvcodec --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt
  libavutil      56. 31.100 / 56. 31.100
  li