In [1]:
# Import necessary libraries
import cv2
import numpy as np
from tensorflow.keras.models import load_model


2024-06-20 11:05:33.622622: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load the trained UNet model
model_path = 'unet_model_final.h5'
model = load_model(model_path)


2024-06-20 11:05:34.855640: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2024-06-20 11:05:34.855670: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: harsh
2024-06-20 11:05:34.855678: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: harsh
2024-06-20 11:05:34.855801: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 555.42.2
2024-06-20 11:05:34.855822: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 555.42.2
2024-06-20 11:05:34.855828: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:309] kernel version seems to match DSO: 555.42.2


In [3]:
# Function to preprocess a video frame for inference
def preprocess_frame(frame):
    img = cv2.resize(frame, (256, 256))  # Resize to match input size for the UNet model
    img = img / 255.0  # Normalize the image
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    return img

# Function to postprocess the predicted mask to match the frame size
def postprocess_mask(predicted_mask, original_size):
    predicted_mask_resized = cv2.resize(predicted_mask, original_size)
    return (predicted_mask_resized > 0.5).astype(np.uint8)  # Thresholding the sigmoid output

# Function to overlay the predicted mask on the frame
def overlay_mask(frame, mask, overlay_color=(0, 255, 0)):
    overlay = frame.copy()
    overlay[mask == 1] = overlay_color
    return overlay


In [4]:
# Define paths
video_path = 'video_test/challenge.mp4'
output_video_path = 'output_video_with_overlay.mp4'  # Path to save the output video
overlay_color = (0, 255, 0)  # Overlay color (green)

# Open the input video
cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Define the codec for the output video
out = cv2.VideoWriter(output_video_path, fourcc, cap.get(cv2.CAP_PROP_FPS), 
                      (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

# Get total number of frames in the video
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Process each frame of the video
for _ in range(total_frames):
    ret, frame = cap.read()
    if not ret:
        break
    
    # Preprocess the frame
    preprocessed_frame = preprocess_frame(frame)
    
    # Run inference
    prediction = model.predict(preprocessed_frame)
    predicted_mask = prediction[0, ..., 0]
    
    # Postprocess the predicted mask
    mask = postprocess_mask(predicted_mask, (frame.shape[1], frame.shape[0]))
    
    # Overlay the mask on the frame
    overlayed_frame = overlay_mask(frame, mask, overlay_color)
    
    # Write the overlayed frame to the output video
    out.write(overlayed_frame)

# Release the video objects
cap.release()
out.release()

print(f"Output video saved to: {output_video_path}")


Output video saved to: output_video_with_overlay.mp4
