In [1]:
from pathlib import Path

from dynamic_fusion.utils.dataset import CocoTestDataset

dataset_path = Path('..', 'data', 'interim', 'coco', 'train', '2subbins')


metrics_temporal_scale = []
spatial_scale = 1
temporal_scale = 1
dataset = CocoTestDataset(dataset_path, (spatial_scale, spatial_scale), threshold=1.35)
dataset.get_metadata(0)

  from .autonotebook import tqdm as notebook_tqdm


(1.0948929518178372, (820.0602249998783, 14829.065615176862))

In [4]:
from matplotlib import pyplot as plt
import numpy as np

from dynamic_fusion.utils.network import to_numpy
from dynamic_fusion.utils.visualization import create_red_blue_cmap, img_to_colormap
import cv2

import einops

import numpy as np
import torch

from dynamic_fusion.utils.video import get_video

downscaled = False
FPS = 30
N_aps_frames = 10*FPS

DIR = '6dof'
if downscaled:
    DIR += '_downscaled'

(Path('dynamic_fusion') / DIR).mkdir(parents=True, exist_ok=True)

for I in range(0, 7):
    if not downscaled:
        epss,_,_,counts,_,_,_,_,preprocessed_image, transforms = dataset[I]
    else:
        _,_,_,_,epss,_,_,counts,preprocessed_image, transforms = dataset[I]

    exponentiation_multiplier, illuminance_range = dataset.get_metadata(I)
    epss = to_numpy(epss)
    counts = to_numpy(counts)
    Ts_normalized = np.linspace(0,1, N_aps_frames)
    video = get_video(preprocessed_image, transforms, Ts_normalized, downscaled, not downscaled, torch.device('cuda'))
    video = to_numpy(video)

    # epss = einops.reduce(epss, "(C 2) B X Y -> C B X Y", 'sum')
    # counts = einops.reduce(counts, "(C 2) B X Y -> C B X Y", 'sum')

    size = tuple(epss[0].shape[-2:])
    # In total, should be 20 seconds
    out = cv2.VideoWriter(f"dynamic_fusion/{DIR}/{downscaled=}_{I}.mp4", cv2.VideoWriter.fourcc(*"mp4v"), FPS, (size[1], size[0]*2))

    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.4 if not downscaled else 0.2
    font_color = (255, 255, 255)  # White color
    line_type = 1
    position = (10, 10)  # Position of the text (bottom left corner)
    position_1 = (10, 30)
    position_2 = (10, 50)
    position_3 = (10, 70)

    all_frames = []

    for i, image in enumerate(video):
        i_event_frame = (epss.shape[0] * i) // N_aps_frames
        eps, count = epss[i_event_frame], counts[i_event_frame]

        colored_event_polarity_sum = img_to_colormap(eps.sum(axis=0), create_red_blue_cmap(501))

        evr_frame = count.sum(axis=0).mean()
        evr = evr_frame / 20 * len(video)

        image_processed = cv2.cvtColor((image * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR)
        if downscaled:
            image_processed = cv2.resize(image_processed, (colored_event_polarity_sum.shape[1], colored_event_polarity_sum.shape[0]))

        frame_processed = (colored_event_polarity_sum * 255).astype(np.uint8)
        frame_processed = np.concatenate((image_processed, frame_processed), axis=0)



        cv2.putText(frame_processed, f"Events per second per pixel={evr:.2f}", position, font, font_scale, font_color, line_type)
        cv2.putText(frame_processed, f"Events per frame per pixel={evr_frame:.2f}", position_1, font, font_scale, font_color, line_type)
        cv2.putText(frame_processed, f"Illuminance range={illuminance_range[0]:.2f}-{illuminance_range[1]:.2f}", position_2, font, font_scale, font_color, line_type)
        cv2.putText(frame_processed, f"Exponentiation multiplier={exponentiation_multiplier:.2f}", position_3, font, font_scale, font_color, line_type)


        out.write(frame_processed)
    out.release()


In [5]:
import cv2
import numpy as np
import os

# Directory containing videos
video_dir = f'dynamic_fusion/{DIR}/'
# Output video file path
output_video_path = f'dynamic_fusion/{DIR}/video.mp4'

# Read all video files
video_files = [f for f in os.listdir(video_dir) if f.endswith('.mp4')]
video_paths = [os.path.join(video_dir, f) for f in video_files]

# Open video files
caps = [cv2.VideoCapture(vp) for vp in video_paths]

# Determine the width, height, and FPS from the first video (assuming all are the same)
width, height = int(caps[0].get(cv2.CAP_PROP_FRAME_WIDTH)), int(caps[0].get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = caps[0].get(cv2.CAP_PROP_FPS)

# Find the length of the longest video
max_length = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) for cap in caps)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # or 'XVID'
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width * len(caps), height))

# Read frames from each video, stack horizontally, and write to the output video
for i in range(max_length):
    frames = []
    for cap in caps:
        ret, frame = cap.read()
        if not ret:
            # If the video is shorter than the longest one, use the last frame
            frame = np.zeros((height, width, 3), np.uint8)
        frames.append(frame)

    # Stack frames horizontally
    stacked_frame = np.hstack(frames)
    
    # Write the stacked frame to the output video
    out.write(stacked_frame)

# Release everything when done
for cap in caps:
    cap.release()
out.release()

print(f'Output video has been saved to {output_video_path}')

Output video has been saved to dynamic_fusion/6dof/video.mp4
