In [1]:
!pip install pytube
!pip install -U kora

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [10]:
import numpy as np
import cv2
from tqdm.auto import tqdm
from kora.drive import upload_public
from IPython.core.display import display, HTML
from pytube import YouTube
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from collections import deque

def display_video(path):
    url = upload_public(path)
    print(url)
    display(HTML(f'<video width=400 controls><source src="{url}" type="video/mp4"></video>'))

def download_youtube(url, target_path, quality = "360p"):
    yt = YouTube(url)
    yt = yt.streams.filter(file_extension='mp4')
    yt = yt.get_by_resolution(quality)
    yt.download(filename = target_path)

def recrop_transform(img):
    img_gr = np.array(cv2.cvtColor(img,cv2.COLOR_BGR2GRAY))
    min_idx = list(map(np.min, np.nonzero(img_gr)))
    max_idx = list(map(np.max, np.nonzero(img_gr)))
    y_diff = max_idx[0] - min_idx[0]
    x_diff = max_idx[1] - min_idx[1]

    check = y_diff < x_diff

    if check:
      min_idx[1] += (x_diff-y_diff)//2
      max_idx[1] = min_idx[1] + y_diff
    else:
      min_idx[0] += (y_diff-x_diff)//2
      max_idx[0] = min_idx[0] + x_diff

    np_cropped = np.array(img)[min_idx[0]:max_idx[0],min_idx[1]:max_idx[1],:]
    return np_cropped

def recrop_video(ori_path, target_path):
    CODEC = cv2.VideoWriter_fourcc(*'mp4v')
    old_cap = cv2.VideoCapture(ori_path)
    old_cap.set(cv2.CAP_PROP_POS_FRAMES, int(0))
    _, frame = old_cap.read()
    frame = recrop_transform(frame)
    old_cap_frame_count = int(old_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    new_recrop_cap = cv2.VideoWriter(target_path, CODEC , 24 ,(frame.shape[0],frame.shape[1]))
    
    for i in tqdm(range(old_cap_frame_count), desc="Resizing ..."):
        old_cap.set(cv2.CAP_PROP_POS_FRAMES, int(i))
        _, frame = old_cap.read()
        new_recrop_cap.write(recrop_transform(frame))
    new_recrop_cap.release()

def generate_pred_video(ori_path,resized_path,target_path, model, pred_dict, frame_window_size, analysis_ratio = 1, rolling_average = 50):
    frame_queue = deque(maxlen=frame_window_size)
    rolling_average_deque = deque(maxlen=rolling_average)
    last_predict = ""

    CODEC = cv2.VideoWriter_fourcc(*'mp4v')
    FONT = cv2.FONT_HERSHEY_SIMPLEX

    old_cap = cv2.VideoCapture(ori_path)
    old_cap.set(cv2.CAP_PROP_POS_FRAMES, int(0))
    _, frame = old_cap.read()

    cropped_cap = cv2.VideoCapture(resized_path)
    frame_count = int(old_cap.get(cv2.CAP_PROP_FRAME_COUNT))

    new_vid = []

    for i in tqdm(range(frame_count), desc="Predicting ..."):
        old_cap.set(cv2.CAP_PROP_POS_FRAMES, int(i))
        _, old_frame = old_cap.read()

        if i%analysis_ratio == 0:
          cropped_cap.set(cv2.CAP_PROP_POS_FRAMES, int(i))
          _, cropped_frame = cropped_cap.read()
          cropped_frame = cv2.resize(cropped_frame, (64,64))
          cropped_frame = cropped_frame.astype(np.float32)
          cropped_frame = 1/255. * cropped_frame
          frame_queue.append(cropped_frame)

          if len(frame_queue) == frame_window_size:
              np_frames = np.array([list(frame_queue)])
              res = model.predict(np_frames)
              # if len(pred_dict) == 2:
              rolling_average_deque.append(round(res[0][0],5))
              last_predict = pred_dict[round(sum(rolling_average_deque)/len(rolling_average_deque))] + ": " + str(sum(rolling_average_deque)/len(rolling_average_deque))
              # else:
              #     rolling_average.append(round(res[0][np.argmax(res[0])],5))
              #     last_predict = pred_dict[np.argmax(res[0])] + ": " + str()

        old_frame = cv2.putText(old_frame, last_predict, (40,60), FONT, 
                       1.5, (0, 255, 0) if last_predict == "Normal" else (0,0,255) , 3, cv2.LINE_AA)

        new_vid.append(old_frame)

    old_cap.release()
    cropped_cap.release()

    new_result_cap = cv2.VideoWriter(target_path, CODEC , 24 ,(old_frame.shape[1], old_frame.shape[0]))
    for new_frame in tqdm(new_vid):
        new_result_cap.write(new_frame)
    new_result_cap.release()

In [18]:
ANALYSIS_RATIO = 1
FRAME_COUNT = 12
ROLLING_AVERAGE = 50

CRIME_URL = "https://www.youtube.com/watch?v=L9a2xv3PqBw"
CRIME_ORI_PATH = "sample_crime.mp4"
CRIME_RESIZED_PATH = f"resized_{CRIME_ORI_PATH}"
CRIME_TARGET_PATH = f"result_{CRIME_ORI_PATH}"

NORMAL_URL = "https://www.youtube.com/watch?v=L9a2xv3PqBw"
NORMAL_ORI_PATH = "sample_normal.mp4"
NORMAL_RESIZED_PATH = f"resized_{NORMAL_ORI_PATH}"
NORMAL_TARGET_PATH = f"result_{NORMAL_ORI_PATH}"

PRED_DICT = {0: 'Crime', 1: 'Normal'}

In [4]:
# load trained model from drive and copies to local
from google.colab import drive
drive.mount('/content/drive')
import shutil
shutil.copy("/content/drive/MyDrive/DeepLearningProject_CrimeDetection/crime_det.h5","/content/crime_det.h5")
from tensorflow.keras.models import load_model
model = load_model("./crime_det.h5")
model.summary()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_12 (InputLayer)          [(None, 12, 64, 64,  0           []                               
                                 3)]                                                              
                                                                                                  
 time_spatial_distributed_featu  (None, 12, 62, 62,   224        ['input_12[0][0]']               
 re_extractor_conv2d_1 (TimeDis  8)                                                               
 tributed)                                                                                        
                                                             

In [5]:
# download video
download_youtube(CRIME_URL, CRIME_ORI_PATH)
display_video(CRIME_ORI_PATH)

https://drive.google.com/uc?id=1ygcoyohNQ1ib-T9bkfbQdFl_bViy9p-T


In [6]:
# resize downloaded video
recrop_video(CRIME_ORI_PATH, CRIME_RESIZED_PATH)
display_video(CRIME_RESIZED_PATH)

Resizing ...:   0%|          | 0/4177 [00:00<?, ?it/s]

https://drive.google.com/uc?id=1Lh07UZ06m8kyP9lEh30G7m6WAUbZsEXg


In [11]:
# do prediction
generate_pred_video(CRIME_ORI_PATH,CRIME_RESIZED_PATH,CRIME_TARGET_PATH, model, PRED_DICT, FRAME_COUNT, ANALYSIS_RATIO, ROLLING_AVERAGE)
display(CRIME_TARGET_PATH)

Predicting ...:   0%|          | 0/4177 [00:00<?, ?it/s]

  0%|          | 0/4177 [00:00<?, ?it/s]

'result_sample_crime.mp4'

In [12]:
# time it
cropped_cap = cv2.VideoCapture(CRIME_RESIZED_PATH)
frame_count = int(cropped_cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_batch = []
frame_queue = deque(maxlen=FRAME_COUNT)
for i in tqdm(range(frame_count), desc="Generating ..."):
  cropped_cap.set(cv2.CAP_PROP_POS_FRAMES, int(i))
  _, cropped_frame = cropped_cap.read()
  cropped_frame = cv2.resize(cropped_frame, (64,64))
  cropped_frame = cropped_frame.astype(np.float32)
  cropped_frame = 1/255. * cropped_frame
  frame_queue.append(cropped_frame)
  if len(frame_queue) == FRAME_COUNT:
    frame_batch.append(list(frame_queue))
frame_batch = np.array(frame_batch)

from timeit import default_timer as timer
start_timer = timer()
model.predict(frame_batch)
end_timer = timer()

sec_time = end_timer - start_timer
print(f"Model execution of {frame_count} frames took {sec_time} seconds, {int(frame_count/sec_time)}FPS")

Generating ...:   0%|          | 0/4177 [00:00<?, ?it/s]

Model execution of 4177 frames took 4.131938493000007 seconds, 1010FPS


In [13]:
# load mobilenet trained model from drive and copies to local
shutil.copy("/content/drive/MyDrive/DeepLearningProject_CrimeDetection/mobilenet_crime_det.h5","/content/mobilenet_crime_det.h5")
from tensorflow.keras.models import load_model
mobilenet_model = load_model("./mobilenet_crime_det.h5")
mobilenet_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 12, 64, 64, 3)]   0         
                                                                 
 mobilenet_time_distributed_  (None, 12, 2, 2, 576)    441000    
 mobilenetsmallv3_minim (Tim                                     
 eDistributed)                                                   
                                                                 
 mobilenet_time_distributed_  (None, 12, 2304)         0         
 spatial_feature_extractor_f                                     
 latten (TimeDistributed)                                        
                                                                 
 mobilenet_temporal_feature_  (None, 12, 16)           148544    
 extractor_lstm_1 (LSTM)                                         
                                                           

In [14]:
# mobilenet time it
cropped_cap = cv2.VideoCapture(CRIME_RESIZED_PATH)
frame_count = int(cropped_cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_batch = []
frame_queue = deque(maxlen=FRAME_COUNT)
for i in tqdm(range(frame_count), desc="Generating ..."):
  cropped_cap.set(cv2.CAP_PROP_POS_FRAMES, int(i))
  _, cropped_frame = cropped_cap.read()
  cropped_frame = cv2.resize(cropped_frame, (64,64))
  cropped_frame = cropped_frame.astype(np.float32)
  cropped_frame = 1/255. * cropped_frame
  frame_queue.append(cropped_frame)
  if len(frame_queue) == FRAME_COUNT:
    frame_batch.append(list(frame_queue))
frame_batch = np.array(frame_batch)

from timeit import default_timer as timer
start_timer = timer()
mobilenet_model.predict(frame_batch)
end_timer = timer()

sec_time = end_timer - start_timer
print(f"Mobilenet Model execution of {frame_count} frames took {sec_time} seconds, {int(frame_count/sec_time)}FPS")

Generating ...:   0%|          | 0/4177 [00:00<?, ?it/s]

Mobilenet Model execution of 4177 frames took 5.032680078999874 seconds, 829FPS


In [15]:
def get_pred_video_url(url, model, pred_dict, frame_count, analysis_ratio, rolling_average):
    video_id = url.split("=")[-1]
    video_format = ".mp4"
    ori_path = video_id + video_format
    resized_path = "resized_" + ori_path
    target_path = "result_" + ori_path

    download_youtube(url, ori_path)
    display_video(ori_path)
    recrop_video(ori_path, resized_path)
    display_video(resized_path)
    generate_pred_video(ori_path,resized_path,target_path, model, pred_dict, frame_count, analysis_ratio, rolling_average)
    display(target_path)

In [16]:
get_pred_video_url("https://www.youtube.com/watch?v=SBYxlJxgcVk", model, PRED_DICT, FRAME_COUNT, ANALYSIS_RATIO, ROLLING_AVERAGE)

https://drive.google.com/uc?id=1-3kdpHmaS1IcPBekcCLHi4WgyLq8-RSN


Resizing ...:   0%|          | 0/2687 [00:00<?, ?it/s]

https://drive.google.com/uc?id=1HT_TGnRQi_7Eemvi_LFZQfZCTZOxEtZQ


Predicting ...:   0%|          | 0/2687 [00:00<?, ?it/s]

  0%|          | 0/2687 [00:00<?, ?it/s]

'result_SBYxlJxgcVk.mp4'

In [17]:
get_pred_video_url("https://www.youtube.com/watch?v=z9Xio7rbaRg", model, PRED_DICT, FRAME_COUNT, ANALYSIS_RATIO, ROLLING_AVERAGE)

https://drive.google.com/uc?id=1lJvJudFHlmwtIzRCcUE4Yrb_5eE9M4gR


Resizing ...:   0%|          | 0/3035 [00:00<?, ?it/s]

https://drive.google.com/uc?id=1MxpbwXi-2ut1uxoSVme7sVFkFI7ZnQ_v


Predicting ...:   0%|          | 0/3035 [00:00<?, ?it/s]

  0%|          | 0/3035 [00:00<?, ?it/s]

'result_z9Xio7rbaRg.mp4'