In [None]:
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from ultralytics import YOLO
import supervision as sv


In [None]:
def transform(image):
    '''
    Apply sharpness and contrast enhancement, and resize to 1024x1024.
    '''
    sharp = ImageEnhance.Sharpness(image)
    img = sharp.enhance(6)  # Apply sharpness enhancement
    contrast = ImageEnhance.Contrast(img)
    img = contrast.enhance(6)  # Apply contrast enhancement
    img = img.resize((1024, 1024))  # Resize image to 1024x1024
    return img


In [None]:
input_video_path = 'videos/video.mp4'
output_video_path = 'videos/video_3.mp4'

cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
    print("Error: Couldn't open video file.")
    exit()

# Get the video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create a VideoWriter object to save the output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 'mp4v' codec for mp4 files
out = cv2.VideoWriter(output_video_path, fourcc, fps, (1024, 1024))  # Output size: 1024x1024

# Process each frame of the video
while True:
    ret, frame = cap.read()
    if not ret:
        break  # End of video

    # Convert the frame (OpenCV format) to a PIL image
    frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Apply the transformation (contrast + sharpness)
    transformed_frame = transform(frame_pil)

    # Convert the transformed image back to OpenCV format
    transformed_frame_cv = cv2.cvtColor(np.array(transformed_frame), cv2.COLOR_RGB2BGR)

    # Write the transformed frame to the output video
    out.write(transformed_frame_cv)

# Release the video objects and clean up
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Video saved as {output_video_path}")


In [None]:
# Load YOLO model (using the trained model file)
model = YOLO("model_tiny_mambayolo/best.pt")

# Perform inference on the enhanced video
video_info = sv.VideoInfo.from_video_path(output_video_path)

# Run inference on the video using YOLO, stream the results
results = model(output_video_path, stream=True)

# Process results and extract useful information
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
    masks = result.masks  # Masks object for segmentation outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probabilities for classification outputs
    obb = result.obb  # Oriented bounding boxes object for OBB outputs

    # Optional: Display the results
    result.show()  # Display the results on-screen (can be commented out for batch processing)
    
    # Save the result to disk (example: save as an image)
    result.save(filename="result.jpg")
