In [1]:
import sys
!{sys.executable} -m pip install opencv-python

Collecting opencv-python
[?25l  Downloading https://files.pythonhosted.org/packages/d0/f0/cfe88d262c67825b20d396c778beca21829da061717c7aaa8b421ae5132e/opencv_python-4.2.0.34-cp37-cp37m-manylinux1_x86_64.whl (28.2MB)
[K     |████████████████████████████████| 28.2MB 23.8MB/s eta 0:00:01   |██████████                      | 8.9MB 5.3MB/s eta 0:00:04
Installing collected packages: opencv-python
Successfully installed opencv-python-4.2.0.34


In [29]:
import os
import shutil
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from keras.layers import LSTM
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers.core import Flatten

In [40]:
def check_create_dirs(outer_dir, inner_dir):
    if not os.path.exists(outer_dir):
        os.makedirs(outer_dir)
    if not os.path.exists(outer_dir + "/" + inner_dir):
        os.makedirs(outer_dir + "/" + inner_dir)

def generate_filepath(writefile_prefix, directory, frame_number):
    return directory + "/" + writefile_prefix[:-4] + "/" + writefile_prefix[:-4] + "_%d.png" % frame_number

def clear_extracted(directory_1="./contains_human_extracted", directory_2="./human_less_extracted"):
    for directory in [directory_1, directory_2]:
        if os.path.exists(directory):
            shutil.rmtree(directory)

def extract_frames(readfile, writefile_prefix, directory, num_frames_to_save=15, resize=True, x_dim=50, y_dim=50):
    video = cv2.VideoCapture(readfile)
    num_frames = video.get(cv2.CAP_PROP_FRAME_COUNT) # Get number of frames in video
    if num_frames < num_frames_to_save: # Reject if not enough frames
      print("File \"" + readfile + "\" has only " + str(num_frames) + " frames. Discarding.")
      return
    factor = num_frames//num_frames_to_save
    success, image = video.read()
    frame_number = 0
    num_frames_saved = 0
    check_create_dirs(directory, writefile_prefix[:-4])
    while success:
      if (frame_number%factor) == 0:
        # Resize and write image
        if resize:
          image = cv2.resize(image, (x_dim, y_dim))
        cv2.imwrite(generate_filepath(writefile_prefix, directory, frame_number), image)
        num_frames_saved += 1
        # Check if we have saved enough frames
        if num_frames_saved == num_frames_to_save:
          return
      # Get next frame
      success, image = video.read()
      frame_number += 1
    return

def extract_frames_all(read_directory_name, write_directory_name, num_frames_to_save, x_dim, y_dim):
  for filename in os.listdir(read_directory_name):
    extract_frames(read_directory_name + "/" + filename, filename, write_directory_name, num_frames_to_save, True, x_dim, y_dim)

def read_training_data(base_directory_name, training_set, labels, x_dim, y_dim, contains_humans, num_sets_to_read):
  num_sets_read = 0
  for directory in os.listdir(base_directory_name):
    frames_of_video = []
    for filename in os.listdir(base_directory_name + "/" + directory):
      train_image = image.load_img(base_directory_name + "/" + directory + "/" + filename, target_size=(x_dim, y_dim, 3))
      train_image = image.img_to_array(train_image)
      train_image = train_image/255
      train_image = train_image.flatten()
      frames_of_video.append(train_image)
    if contains_humans:
      labels.append([1,0])
    else:
      labels.append([0,1])
    training_set.append(frames_of_video)
    num_sets_read += 1
    if num_sets_to_read:
      if num_sets_to_read == num_sets_read:
        print(str(num_sets_read) + " videos have been read.")
        return
  print(str(num_sets_read) + " videos have been read.")
  return

def get_data_and_labels(x_dim, y_dim, num_sets_to_read, directory_1="contains_human_extracted", directory_2="human_less_extracted"):
  training_set = []
  labels = []
  read_training_data(directory_1, training_set, labels, x_dim, y_dim, True, num_sets_to_read)
  read_training_data(directory_2, training_set, labels, x_dim, y_dim, False, num_sets_to_read)
  full_data_set = np.array(training_set)
  # This is where we split into training and testing sets.
  (x_train, x_test, y_train, y_test) = train_test_split(full_data_set, labels, test_size=0.25, stratify=labels, random_state=42)
  x_train = np.array(x_train)
  y_train = np.array(y_train)
  x_test = np.array(x_test)
  y_test = np.array(y_test)
  return (x_train, x_test, y_train, y_test)

def get_model(num_frames_to_save, x_dim, y_dim): # Change for better model.
  model = Sequential()
  model.add(LSTM(100, input_shape=(num_frames_to_save, x_dim*y_dim*3)))
  model.add(Dropout(0.5))
  model.add(Dense(100, activation='relu'))
  model.add(Dense(2, activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

In [41]:
x_dim = 50
y_dim = 50
num_frames = 10 # How many frames per video should we extract?
num_videos = 5 # How many videos should be use? None = use all available videos.

In [42]:
os.chdir("/notebooks/ALL_NOTEBOOKS")

In [43]:
# May not want to run if already extracted.
clear_extracted()
print("Garbage from previous rounds successfully removed.")
extract_frames_all("contains_human", "contains_human_extracted", num_frames, x_dim, y_dim)
extract_frames_all("human_less", "human_less_extracted", num_frames, x_dim, y_dim)
print("Video frames successfully extracted.")

Garbage from previous rounds successfully removed.
File "contains_human/FARM1-44218-2020_04_06__10_51_58.mkv" has only -1.3835058055282163e+17 frames. Discarding.
File "contains_human/FARM1-44221-2020_04_06__11_16_31.mkv" has only -1.3835058055282163e+17 frames. Discarding.
File "contains_human/FARM1-44236-2020_04_06__11_41_08.mkv" has only -1.3835058055282163e+17 frames. Discarding.
File "contains_human/FARM1-44208-2020_04_06__10_11_58.mkv" has only -1.3835058055282163e+17 frames. Discarding.
File "contains_human/FARM1-44232-2020_04_06__11_36_27.mkv" has only -1.3835058055282163e+17 frames. Discarding.
File "contains_human/FARM1-45850-2020_04_10__11_16_41.mkv" has only -1.3835058055282163e+17 frames. Discarding.
File "contains_human/FARM1-44206-2020_04_06__10_06_31.mkv" has only -1.3835058055282163e+17 frames. Discarding.
File "contains_human/FARM1-45859-2020_04_10__11_46_19.mkv" has only -1.3835058055282163e+17 frames. Discarding.
File "contains_human/FARM1-45842-2020_04_10__10_51_31

In [44]:
# Reading in the frames and splitting into test and training sets.
(x_train, x_test, y_train, y_test) = get_data_and_labels(x_dim, y_dim, num_videos)
print("Train and test data read.")

5 videos have been read.
5 videos have been read.
Train and test data read.


In [45]:
# Retrieve model.
lstm_model = get_model(num_frames, x_dim, y_dim)
print("Model generated.")

Model generated.


In [46]:
# Training and accuracy.
lstm_model.fit(x_train, y_train, epochs=5, batch_size=2, validation_data=(x_test, y_test))
print("Training complete.")

Train on 7 samples, validate on 3 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training complete.
