## Importing Modules

In [None]:
!git clone https://github.com/engares/MoViNets-for-Violence-Detection-in-Live-Video-Streaming.git

Cloning into 'MoViNets-for-Violence-Detection-in-Live-Video-Streaming'...
remote: Enumerating objects: 398, done.[K
remote: Counting objects: 100% (172/172), done.[K
remote: Compressing objects: 100% (85/85), done.[K
remote: Total 398 (delta 93), reused 162 (delta 86), pack-reused 226[K
Receiving objects: 100% (398/398), 57.37 MiB | 12.79 MiB/s, done.
Resolving deltas: 100% (126/126), done.


In [None]:
!sudo apt-get install pkg-config libhdf5-dev

In [None]:
!pip install -r ./MoViNets-for-Violence-Detection-in-Live-Video-Streaming/requeriments_tflite.txt

[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: './MoViNets-for-Violence-Detection-in-Live-Video-Streaming/requeriments_arm64.txt'[0m[31m
[0m

In [13]:
import random
import os
import cv2
import numpy as np

# Some modules to display an animation using imageio.
import imageio

import tflite_runtime.interpreter as tflite
import tensorflow as tf


## Building the model

In [14]:
# Create the interpreter and signature runner
tflite_filename = './model.tflite'

interpreter = tf.lite.Interpreter(model_path=tflite_filename)
runner = interpreter.get_signature_runner()

init_states = {
    name: tf.zeros(x['shape'], dtype=x['dtype'])
    for name, x in runner.get_input_details().items()
}
del init_states['image']


### Inference on Streaming

In [15]:
def to_gif(images,path = './animation.gif' ):
  converted_images = np.clip(images * 244, 0, 244).astype(np.uint8)
  imageio.mimsave(path, converted_images, fps=5)
  return embed.embed_file(path)

def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):
  """
    Creates frames from each video file present for each category.

    Args:
      video_path: File path to the video.
      n_frames: Number of frames to be created per video file.
      output_size: Pixel size of the output frame image.

    Return:
      An NumPy array of frames in the shape of (n_frames, height, width, channels).
  """
  # Read each video frame by frame
  result = []
  src = cv2.VideoCapture(str(video_path))

  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)

  need_length = 1 + (n_frames - 1) * frame_step

  if need_length > video_length:
    start = 0
  else:
    max_start = video_length - need_length
    start = random.randint(0, max_start + 1)

  src.set(cv2.CAP_PROP_POS_FRAMES, start)
  # ret is a boolean indicating whether read was successful, frame is the image itself
  ret, frame = src.read()
  result.append(format_frames(frame, output_size))

  for _ in range(n_frames - 1):
    for _ in range(frame_step):
      ret, frame = src.read()
    if ret:
      frame = format_frames(frame, output_size)
      result.append(frame)
    else:
      result.append(np.zeros_like(result[0]))
  src.release()
  result = np.array(result)[..., [2, 1, 0]]

  return result

def format_frames(frame, output_size):
  """
    Pad and resize an image from a video.

    Args:
      frame: Image that needs to resized and padded.
      output_size: Pixel size of the output frame image.

    Return:
      Formatted frame with padding of specified output size.
  """
  frame = tf.image.convert_image_dtype(frame, tf.float32)
  frame = tf.image.resize_with_pad(frame, *output_size)
  return frame

def video_to_gif_tensor(video_path, image_size=(224, 224), fps=12):
    """
    Processes frames from a video file, saves them as a GIF in the same directory, and loads the GIF as a TensorFlow tensor.

    Args:
      video_path: String path to the input video file.
      image_size: Tuple indicating the size to which each frame should be resized.
      fps: Frames per second to be used in the GIF.

    Returns:
      A TensorFlow tensor representing the loaded GIF.
    """
    # Generate the gif_path in the same directory with a .gif extension
    gif_path = os.path.splitext(video_path)[0] + '.gif'

    # Assume frames_from_video_file is a function that extracts frames from video
    images = frames_from_video_file(video_path, n_frames=fps)  # function to be defined or replaced

    # Convert images to uint8 and save as GIF
    converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)  # Proper scaling to 255
    imageio.mimsave(gif_path, converted_images, fps=fps)

    # Load the GIF file into a TensorFlow tensor
    raw = tf.io.read_file(gif_path)
    video = tf.io.decode_gif(raw)
    video = tf.image.resize(video, image_size)
    video = tf.cast(video, tf.float32) / 255.0  # Normalize to [0,1]

    return video

CLASSES = ['Fight','No_Fight']

def get_top_k(probs, k=2, label_map=CLASSES):
  """Outputs the top k model labels and probabilities on the given video."""
  top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]
  top_labels = tf.gather(label_map, top_predictions, axis=-1)
  top_labels = [label.decode('utf8') for label in top_labels.numpy()]
  top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
  return tuple(zip(top_labels, top_probs))


In [17]:
# Insert your video clip here

video = video_to_gif_tensor('./MoViNets-for-Violence-Detection-in-Live-Video-Streaming/test_videos/tf_X7GtOGyE_0.avi', image_size=(172, 172))
clips = tf.split(video[tf.newaxis], video.shape[0], axis=1)

# To run on a video, pass in one frame at a time
states = init_states
for clip in clips:
  # Input shape: [1, 1, 172, 172, 3]
  outputs = runner(**states, image=clip)
  logits = outputs.pop('logits')[0]
  states = outputs

probs = tf.nn.softmax(logits)
top_k = get_top_k(probs)
print()
for label, prob in top_k:
  print(label, prob)




Fight 0.5970546
No_Fight 0.40294543
