In [2]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tf_raft.model import RAFT
import os
import re

In [3]:
def natural_key(string):
    """Sort strings containing numbers in a natural order."""
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', string)]

In [2]:
def flow_to_rgb(flow, max_flow=None):
    """Convert flow to RGB image for visualization."""
    h, w = flow.shape[:2]
    flow_map = np.zeros((h, w, 3), dtype=np.float32)
    
    # Compute the magnitude and angle of the flow
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    
    # Print flow statistics for debugging
    print("Flow magnitude stats - min: {:.4f}, max: {:.4f}, mean: {:.4f}".format(
        np.min(magnitude), np.max(magnitude), np.mean(magnitude)
    ))
    
    # Normalize the magnitude
    epsilon = 1e-5
    if max_flow is None:
        max_flow = max(np.max(magnitude), epsilon)
    magnitude = np.clip(magnitude / max_flow, 0, 1)
    
    # Angle is in radians, convert to degrees
    angle = angle * 180 / np.pi / 2
    
    # Build the HSV image
    flow_map[..., 0] = angle
    flow_map[..., 1] = 1
    flow_map[..., 2] = magnitude
    
    # Convert HSV to RGB
    flow_map = cv2.cvtColor(flow_map, cv2.COLOR_HSV2BGR)
    
    return flow_map

In [3]:
# Load and preprocess your images
def load_image(path):
    image = cv2.imread(path)
    return image

def preprocess_image(image, target_size=(448, 512)):
    image = cv2.resize(image, target_size)
    # Convert the first frame to grayscale
    # image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = image.astype(np.float32)
    image = image[np.newaxis, ...]  # Add batch dimension
    return image

In [4]:
def load_raft_model(checkpoint_path, iters=12, iters_pred=12):
    # Initialize the RAFT model
    raft = RAFT(iters=iters, iters_pred=iters_pred)

    try:
        # Load the pre-trained weights without restoring the optimizer state
        raft.load_weights(checkpoint_path).expect_partial()
        print("Checkpoint loaded successfully.")
        return raft
    except Exception as e:
        print("Error loading checkpoint:", e)
        return None, None


In [5]:
# Initialize the RAFT model and perform inference
def compute_raft_flow(raft_model, x1, x2):

    # Perform inference
    try:
        # Assuming x1 and x2 have shape (channel, height, width)
        # x1 = np.transpose(x1, (1, 2, 0))  # Convert to (height, width, channel)
        # x2 = np.transpose(x2, (1, 2, 0))  # Convert to (height, width, channel)
        # x_combined = np.stack([x1, x2], axis=0)
        
        # print(x_combined.shape)
        flow_predictions = raft_model([x1, x2], training=False)
        # Get the final flow prediction
        flow = flow_predictions[-1].numpy().squeeze()
    except Exception as e:
        print("Here?")
        print("Error during inference:", e)
        return None, None

    # Convert flow to RGB for visualization
    flow_rgb = flow_to_rgb(flow)

    return flow_rgb, flow

In [6]:
# Function to process optical flow and extract bounding boxes
def process_optical_flow(flow):
    if flow is None:
        print("Flow is None. Unable to process optical flow.")
        return None  # or handle appropriately
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    threshold = 1.0  # Adjust threshold as needed
    significant_motion = magnitude > threshold
    
    contours, _ = cv2.findContours(significant_motion.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    bboxes = [cv2.boundingRect(contour) for contour in contours]
    
    return bboxes


In [44]:
def read_image():
    # Paths to your images and checkpoint
    image1_path = 'images/image1.png'
    image2_path = 'images/image2.png'
    # checkpoint_path = 'checkpoints_flyingChairs/model'
    checkpoint_path = 'checkpoints_mpiSintel/model'
    
    raft_model = load_raft_model(checkpoint_path)
    
    # Load the images
    image1 = load_image(image1_path)
    image1 = preprocess_image(image1)
    image2 = load_image(image2_path)
    image2 = preprocess_image(image2)
    
    
    # Run RAFT and get the flow visualization
    flow_rgb, flow = compute_raft_flow(raft_model, image1, image2)
    
    if flow_rgb is not None:
        # Display the result using Matplotlib
        plt.figure(figsize=(10, 5))

        plt.subplot(1, 2, 1)
        plt.imshow(flow_rgb)
        plt.title('Optical Flow Visualization')
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.hist(flow.flatten(), bins=50)
        plt.title('Flow Value Distribution')

        plt.tight_layout()
        plt.show()

        # Save the result
        # plt.imsave('sintel_results/optical_flow_result_mpiSintel_iter=30.png', flow_rgb)
        # plt.imsave('flying_chair_results/optical_flow_result_flyingChair_iter=12.png', flow_rgb)
        print("Optical flow visualization saved as 'optical_flow_result.png'.")
    else:
        print("Flow visualization could not be generated.")

In [45]:
def read_video():
    # Load the video
    cap = cv2.VideoCapture('video.mp4')

    # Read the first frame
    ret, frame1 = cap.read()
    if not ret:
        print("Failed to read the video")
        exit()
        
    prvs = preprocess_image(frame1)
    
    checkpoint_path = 'checkpoints_mpiSintel/model'
    # Initialize RAFT model
    raft_model = load_raft_model(checkpoint_path)
    i = 1
    while(cap.isOpened()):
        print("cap number: ", i)
        ret, frame2 = cap.read()
        if not ret:
            break
            
        nxt = preprocess_image(frame2)
        
        # Compute optical flow
        flow_rgb, flow = compute_raft_flow(raft_model, prvs, nxt)

        # Process optical flow
        bboxes = process_optical_flow(flow)

        # Update previous frame
        prvs = nxt

        # Display results
        for bbox in bboxes:
            x, y, w, h = bbox
            cv2.rectangle(frame2, (x, y), (x+w, y+h), (0, 255, 0), 2)

        if flow_rgb is not None:
            # Display the result using Matplotlib
            plt.figure(figsize=(10, 5))

            plt.subplot(1, 2, 1)
            plt.imshow(flow_rgb)
            plt.title('Optical Flow Visualization')
            plt.axis('off')

            plt.subplot(1, 2, 2)
            plt.hist(flow.flatten(), bins=50)
            plt.title('Flow Value Distribution')

            plt.tight_layout()
            plt.show()

            # Save the result
            # plt.imsave('sintel_results/optical_flow_result_mpiSintel_iter=30.png', flow_rgb)
            # plt.imsave('flying_chair_results/optical_flow_result_flyingChair_iter=12.png', flow_rgb)
            # print("Optical flow visualization saved as 'optical_flow_result.png'.")
        else:
            print("Flow visualization could not be generated.")
        i += 1

    cap.release()

In [25]:
def read_seq_images():
    # Paths to the images and checkpoint
    images_path = 'cranehall_images/cranehall'
    # checkpoint_path = 'checkpoints_flyingChairs/model'
    checkpoint_path = 'checkpoints_mpiSintel/model'
    
    raft_model = load_raft_model(checkpoint_path)
    
    image_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif')
    filenames = sorted([f for f in os.listdir(images_path) if f.lower().endswith(image_extensions)], key=natural_key)
    
    prev_frame = None
    i = 0
    # Load the images
    for filename in filenames:
        print("optical flow number: ", i)
        image_path = os.path.join(images_path, filename)
        image = load_image(image_path)
        frame = preprocess_image(image)
    
        if prev_frame is not None:
            # Convert frames to grayscale or preprocess as needed
            # frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            # prev_frame_gray = cv2.cvtColor(previous_frame, cv2.COLOR_BGR2GRAY)

            # Compute optical flow between consecutive frames
            flow_rgb, flow = compute_raft_flow(raft_model, prev_frame, frame)

            if flow_rgb is not None:
                # Display the result using Matplotlib
                plt.figure(figsize=(10, 5))

                plt.subplot(1, 2, 1)
                plt.imshow(flow_rgb)
                plt.title('Optical Flow Visualization')
                plt.axis('off')

                plt.subplot(1, 2, 2)
                plt.hist(flow.flatten(), bins=50)
                plt.title('Flow Value Distribution')

                plt.tight_layout()
                # plt.show()

                # Save the result
                result_name = "trolley0_MPISintel_iter_20_results/optical_flow_" + str(i) + ".png"
                plt.imsave(result_name, flow_rgb)
                # plt.imsave('flying_chair_results/optical_flow_result_flyingChair_iter=12.png', flow_rgb)
                # print("Optical flow visualization saved as 'optical_flow_result.png'.")
            else:
                print("Flow visualization could not be generated.")
        i += 1
        prev_frame = frame


In [None]:
read_seq_images()

In [4]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

images_path = 'cranehall_images'
image_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif')
filenames = sorted([f for f in os.listdir(images_path) if f.lower().endswith(image_extensions)], key=natural_key)

prev_frame = None
i = 0

# Use the model
for filename in filenames:
    print("Image number: ", i)
    image_path = os.path.join(images_path, filename)
    # image = load_image(image_path)
    # frame = preprocess_image(image)

    results = model(source=image_path, save=True, conf=0.4)  # predict on an image
#path = model.export(format="onnx")  # export the model to ONNX format

Image number:  0


  return torch._C._cuda_getDeviceCount() > 0



image 1/1 /cnvrg/cranehall_images/trolley0_cam_trolley_right_2024-07-02T15_05_01_624Z.png: 512x640 (no detections), 194.1ms
Speed: 2.7ms preprocess, 194.1ms inference, 0.8ms postprocess per image at shape (1, 3, 512, 640)
Results saved to [1mruns/detect/predict2[0m
Image number:  0

image 1/1 /cnvrg/cranehall_images/trolley0_cam_trolley_right_2024-07-02T15_05_01_824Z.png: 512x640 (no detections), 172.2ms
Speed: 2.5ms preprocess, 172.2ms inference, 0.6ms postprocess per image at shape (1, 3, 512, 640)
Results saved to [1mruns/detect/predict2[0m
Image number:  0

image 1/1 /cnvrg/cranehall_images/trolley0_cam_trolley_right_2024-07-02T15_05_02_124Z.png: 512x640 (no detections), 177.2ms
Speed: 2.6ms preprocess, 177.2ms inference, 0.6ms postprocess per image at shape (1, 3, 512, 640)
Results saved to [1mruns/detect/predict2[0m
Image number:  0

image 1/1 /cnvrg/cranehall_images/trolley0_cam_trolley_right_2024-07-02T15_05_02_424Z.png: 512x640 (no detections), 167.0ms
Speed: 2.5ms prep