In [None]:
%pip install tensorflow==2.15.0
%pip install tf-models-official==2.15.0

In [4]:
import tensorflow as tf
import tensorflow_hub as hub
from keras import layers, models
import tensorflow.keras as keras
import os
from official.projects.movinet.modeling import movinet
from official.projects.movinet.modeling import movinet_model
# from tf.keras.optimizers import Adam
# from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
import datetime
import cv2
import os
import numpy as np
import h5py

# Define constants

In [None]:
WSL_PATH = './DL2/project/movinet'
print(os.cwd())
print(os.path.isdir(WSL_PATH))


video_name     = 'recorded_video.mp4'
inf_video_name = 'inference_video.mp4'
model_name     = 'trained_checkpoints-a4-10epoch-30numofframes8batchsize20240325-151003'

model_path     = os.path.join(WSL_PATH, 'models', model_name)
video_path     = os.path.join(WSL_PATH, 'videos', video_name)
inf_video_path = os.path.join(WSL_PATH, 'videos', inf_video_name)
class_names    = ['bird', 'boar', 'dog', 'dragon', 'horse', 'monkey', 'ox', 'rabbit', 'rat', 'sheep', 'snake', 'tiger']

In [5]:
# Function to preprocess video frames
def preprocess_frame(frame, size=(224,224)):
    # Resize frame to 172x172
    frame = cv2.resize(frame, size)
    # Normalize pixel values to be between 0 and 1
    frame = frame / 255.0
    # Expand dimensions to match model's input
    frame = np.expand_dims(frame, axis=0)
    return frame

def process_batch(batch, model):
    batch = np.array(batch)  # Convert list to numpy array
    predictions = model(batch)
    return predictions

def predict_video(video_path, model, size=(224,224)):
  """
  Display video frames with class names.

  Args:
    video_path (str): Path to the video file.
    model (list): List of video frames.
    size (list): size of the model input, which will be used to resize the frame
  """
  print(os.path.isfile(video_path))
  print(video_path)
  
  cap = cv2.VideoCapture(video_path)
  frame_count = 0
  frames = 30
  frames_batch = []
  all_predictions = []
  predictions_results = []
  frames_arr = []

  while cap.isOpened():
    ret, frame = cap.read()
    frame_count += 1
    if not ret:
        break
    
    preprocessed_frame = preprocess_frame(frame)
    frames_arr.append(frame)
    frames_batch.append(preprocessed_frame)

    # # If batch is full, process the batch, ignoreing the rest of the frame if not full batch
    if len(frames_batch) == frames:
        predictions = process_batch(frames_batch, model)
        all_predictions.append(predictions)
        
        # Get class name of top predicted class
        predicted_indices = tf.argmax(predictions, axis=1)
        predicted_class_names = [class_names[i] for i in predicted_indices.numpy()]
        predictions_results.append(predicted_class_names)
        frames_batch = []  # Reset the batch

  cap.release()
  predictions_results = np.array(predictions_results).flatten()
  
  print(f"Processed {len(all_predictions)} batches.")
  print(f"Total frames {frame_count}")
  # print(f"Result: {predictions_results}")
  # print(f"Counts: {len(predictions_results)}")
  
  return predictions_results

In [6]:
# Load Model
# model = load_weights(model_path)
def build_classifier(batch_size, num_frames, resolution, backbone, num_classes):
    """Builds a classifier on top of a backbone model."""
    model = movinet_model.MovinetClassifier(
        backbone=backbone,
        num_classes=num_classes)
    model.build([batch_size, num_frames, resolution, resolution, 3])

    return model

num_frames = 30
resolution = 320
batch_size = 8
model_id = 'a4'
backbone = movinet.Movinet(model_id=model_id)
backbone.trainable = False
model = build_classifier(batch_size, num_frames, resolution, backbone, 12)
weights_path = model_path
model.load_weights(weights_path)

2024-03-25 15:41:54.285864: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-25 15:41:54.441980: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-25 15:41:54.442017: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-25 15:41:54.446519: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-25 15:41:54.446579: I external/local_xla/xla/stream_executor

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f3a9c1f0160>

In [None]:
# Perform prediction
predictions_results = predict_video(video_path, model)

In [None]:
def save_video_with_class_names(video_path, class_names):
    """
    Display video frames with class names.

    Args:
        video_path (list): Path of video.
        class_names (list): List of class names corresponding to each frame.
    """
    
    # name = inf_video_name + '.mp4'
    # fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    frame_count = 0
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    out = cv2.VideoWriter(inf_video_path, fourcc, 20.0, ((width, height)))
    if not out.isOpened():
      print("Error: Could not open output video for writing.")
      return
    
    while cap.isOpened():
      ret, frame = cap.read()
      if not ret:
          break

      if len(class_names) > frame_count:
        frame_with_text = frame.copy()
        cv2.rectangle(frame_with_text, (10, 10), (200, 40), (255, 255, 255), -1)  # Rectangle background
        cv2.putText(frame_with_text, class_names[frame_count], (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)  # Class name text
        # frame_with_text_uint8 = frame_with_text.astype(np.uint8)
        out.write(frame_with_text)
      frame_count += 1

    out.release()
    cap.release()

# Combine to video
save_video_with_class_names(video_path, predictions_results)