In [None]:
import cv2
import numpy as np
import os
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file name.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    filename = os.path.basename(file_path)
    tokens = filename.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Filename {filename} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_videos(videos_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple video files based on a common time axis.
    
    Instead of specifying an absolute time_step, the function computes the base time step 
    (i.e. the minimum effective frame duration among the videos) and then multiplies it 
    by the provided time_multiplier.
    
    IMPORTANT: To ensure the movies are never cut short, the function uses the maximum total
    effective time among the videos. That way the output covers the entire duration of all movies;
    for any video that ends before the others, its last frame is repeated.
    
    Parameters:
        videos_dict (dict): Keys are video file paths, values are effective frame durations (in seconds)
                            for each video.
        time_multiplier (float): Multiplier to the base time step (min of frame durations). 
                                 For maximum resolution, use 1.
        grid_shape (tuple): (rows, columns) specifying the grid layout for the output video.
        fps_output (float, optional): Output video playback frames per second. Defaults to 30.0.
        output_file_path (str, optional): A file path whose directory is used for saving the output video.
                                          The final file name will include the protein names.
        
    Returns:
        None. The output video is saved to a file in the specified directory.
        Also prints the total playback duration (in seconds) of the output grid video.
    """
    # Convert videos_dict keys and values to lists
    file_paths = list(videos_dict.keys())
    frame_durations = list(videos_dict.values())
    
    n_videos = len(file_paths)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in file_paths]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name using the output file path's directory.
    out_dir = os.path.dirname(output_file_path)
    final_output_file = os.path.join(out_dir, f"{proteins_combined}_{channel}_synced.avi")
    
    # Open all videos to get info.
    caps = [cv2.VideoCapture(fp) for fp in file_paths]
    
    # Calculate each video's total time.
    # --- NEVER CUT SHORT: Use the maximum total time so that the output covers the full duration of all movies.
    total_times = []
    num_frames_list = []  # store total frames for each video
    for cap, fd in zip(caps, frame_durations):
        num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        num_frames_list.append(num_frames)
        total_times.append(num_frames * fd)
    total_time_sync = max(total_times)
    
    # Compute base time step as the minimum effective frame duration among videos.
    base_time_step = min(frame_durations)
    # Final time step is base multiplied by the given multiplier.
    time_step = base_time_step * time_multiplier
    
    # Determine the number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration of the output video.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie will be {playback_duration_sec:.2f} seconds long.")
    
    # Read the first frame from each video to get dimensions.
    dimensions = []
    for cap in caps:
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        ret, frame = cap.read()
        if not ret:
            raise ValueError("Failed to read the first frame from one or more videos.")
        h, w = frame.shape[:2]
        dimensions.append((h, w))
    
    # Define a common cell size using the maximum height and width among all videos.
    cell_height = max(h for h, w in dimensions)
    cell_width  = max(w for h, w in dimensions)
    
    # Calculate output video dimensions based on grid shape.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width  = grid_shape[1] * cell_width
    
    # Define the codec and create the VideoWriter object for the output video.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each video.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        for idx, (cap, fd) in enumerate(zip(caps, frame_durations)):
            frame_idx = int(current_time / fd)
            if frame_idx >= num_frames_list[idx]:
                frame_idx = num_frames_list[idx] - 1
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            if not ret:
                frame = np.zeros((dimensions[idx][0], dimensions[idx][1], 3), dtype=np.uint8)
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    for cap in caps:
        cap.release()
    out.release()
    
    print(f"Output saved as: {final_output_file}")

# Example usage:
output_file_path = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/movies/"


videos = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_cy5_60fps_1008frames.avi": 160,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_cy5_60fps_743frames.avi": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_cy5_60fps_743frames.avi": 150,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_cy5_60fps_481frames.avi": 300,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_cy5_60fps_481frames.avi": 300,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_cy5_60fps_507frames.avi": 32,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_cy5_60fps_675frames.avi": 18,

}


# For maximum temporal resolution, use a multiplier of 1.
# Increase the multiplier to reduce the number of output frames.
time_multiplier = 50
grid_shape = (1, 4)

synchronize_videos(videos, time_multiplier, grid_shape, channel="cy5", fps_output=30, output_file_path=output_file_path)


In [None]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
        
        # Print first few frames to verify sorting
        print(f"Pattern: {pattern}")
        print(f"  Total frames: {len(frame_files)}")
        print(f"  First 5 frames: {[os.path.basename(f) for f in frame_files[:5]]}")
        print(f"  Last 5 frames: {[os.path.basename(f) for f in frame_files[-5:]]}")
        print()
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name.
    out_dir = os.path.dirname(output_file_path)
    final_output_file = os.path.join(out_dir, f"{proteins_combined}_{channel}_synced.avi")
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie will be {playback_duration_sec:.2f} seconds long.")
    print(f"Base time step: {base_time_step}s, Final time step: {time_step}s")
    print(f"Total sync time: {total_time_sync/3600:.2f} hours")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    print(f"Individual frame dimensions: {cell_width}x{cell_height}")
    print(f"Output grid dimensions: {output_frame_width}x{output_frame_height}")
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved as: {final_output_file}")


# Example usage with your PNG frame directories:
frames_data = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 160,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 32,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 18,
}

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 32  # Maximum temporal resolution!
grid_shape = (2, 4)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=30, output_file_path=output_file_path)

In [None]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
        
        # Print first few frames to verify sorting
        print(f"Pattern: {pattern}")
        print(f"  Total frames: {len(frame_files)}")
        print(f"  First 5 frames: {[os.path.basename(f) for f in frame_files[:5]]}")
        print(f"  Last 5 frames: {[os.path.basename(f) for f in frame_files[-5:]]}")
        print()
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name.
    out_dir = os.path.dirname(output_file_path)
    final_output_file = os.path.join(out_dir, f"{proteins_combined}_{channel}_synced.avi")
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie will be {playback_duration_sec:.2f} seconds long.")
    print(f"Base time step: {base_time_step}s, Final time step: {time_step}s")
    print(f"Total sync time: {total_time_sync/3600:.2f} hours")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    print(f"Individual frame dimensions: {cell_width}x{cell_height}")
    print(f"Output grid dimensions: {output_frame_width}x{output_frame_height}")
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved as: {final_output_file}")


# Example usage with your PNG frame directories:
frames_data = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 160,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 32,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 18,
}

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 32  # Maximum temporal resolution!
grid_shape = (2, 4)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=30, output_file_path=output_file_path)

In [None]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
        
        # Print first few frames to verify sorting
        print(f"Pattern: {pattern}")
        print(f"  Total frames: {len(frame_files)}")
        print(f"  First 5 frames: {[os.path.basename(f) for f in frame_files[:5]]}")
        print(f"  Last 5 frames: {[os.path.basename(f) for f in frame_files[-5:]]}")
        print()
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name.
    out_dir = os.path.dirname(output_file_path)
    final_output_file = os.path.join(out_dir, f"{proteins_combined}_{channel}_synced.avi")
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie will be {playback_duration_sec:.2f} seconds long.")
    print(f"Base time step: {base_time_step}s, Final time step: {time_step}s")
    print(f"Total sync time: {total_time_sync/3600:.2f} hours")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    print(f"Individual frame dimensions: {cell_width}x{cell_height}")
    print(f"Output grid dimensions: {output_frame_width}x{output_frame_height}")
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved as: {final_output_file}")


# Example usage with your PNG frame directories:
frames_data = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 160,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 32,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 18,
}

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 32  # Maximum temporal resolution!
grid_shape = (2, 4)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=30, output_file_path=output_file_path)

In [None]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
        
        # Print first few frames to verify sorting
        print(f"Pattern: {pattern}")
        print(f"  Total frames: {len(frame_files)}")
        print(f"  First 5 frames: {[os.path.basename(f) for f in frame_files[:5]]}")
        print(f"  Last 5 frames: {[os.path.basename(f) for f in frame_files[-5:]]}")
        print()
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name.
    out_dir = os.path.dirname(output_file_path)
    final_output_file = os.path.join(out_dir, f"{proteins_combined}_{channel}_synced.avi")
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie will be {playback_duration_sec:.2f} seconds long.")
    print(f"Base time step: {base_time_step}s, Final time step: {time_step}s")
    print(f"Total sync time: {total_time_sync/3600:.2f} hours")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    print(f"Individual frame dimensions: {cell_width}x{cell_height}")
    print(f"Output grid dimensions: {output_frame_width}x{output_frame_height}")
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved as: {final_output_file}")


# Example usage with your PNG frame directories:
frames_data = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 160,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 32,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 18,
}

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 32  # Maximum temporal resolution!
grid_shape = (2, 4)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=30, output_file_path=output_file_path)

In [None]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
        
        # Print first few frames to verify sorting
        print(f"Pattern: {pattern}")
        print(f"  Total frames: {len(frame_files)}")
        print(f"  First 5 frames: {[os.path.basename(f) for f in frame_files[:5]]}")
        print(f"  Last 5 frames: {[os.path.basename(f) for f in frame_files[-5:]]}")
        print()
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name.
    out_dir = os.path.dirname(output_file_path)
    final_output_file = os.path.join(out_dir, f"{proteins_combined}_{channel}_synced.avi")
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie will be {playback_duration_sec:.2f} seconds long.")
    print(f"Base time step: {base_time_step}s, Final time step: {time_step}s")
    print(f"Total sync time: {total_time_sync/3600:.2f} hours")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    print(f"Individual frame dimensions: {cell_width}x{cell_height}")
    print(f"Output grid dimensions: {output_frame_width}x{output_frame_height}")
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved as: {final_output_file}")


# Example usage with your PNG frame directories:
frames_data = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 160,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 32,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 18,
}

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 32  # Maximum temporal resolution!
grid_shape = (2, 4)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=30, output_file_path=output_file_path)

In [None]:


videos = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/paper-v2/fig2-assets/k401/K401_cy5-4x-240sint.avi": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/paper-v2/fig2-assets/kif3/Kif3_cy5-3x-9sint.avi": 9,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/paper-v2/fig2-assets/ThTr/ThTr_cy5-4x-240sint.avi": 240,

}

# For maximum temporal resolution, use a multiplier of 1.
# Increase the multiplier to reduce the number of output frames.
time_multiplier = 24
grid_shape = (1, 3)

synchronize_videos(videos, time_multiplier, grid_shape, channel="cy5", fps_output=60, output_file_path=output_file_path)

In [None]:
concentrations = ["160", "80", "40", "20", "10", "5", "2p5", "1p25"]

videos = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/101324-k401-titration-rt/2p5TMB-1ulDNA_/output_data/movies/K401_160nM-RT_Rep1_cy5_120fps_2616frames.avi": 60,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111024-B-F-titrations-RT/2p5TMB-1ulDNA_/output_data_/movies/B_160nM_Rep1_cy5_60fps_961frames.avi": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data_/movies/C_160nM_Rep1_cy5_60fps_743frames.avi": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/110324-D_titration-RT/2p5TMB-1ulDNA_1/output_data_/movies/D_2p5nM_Rep1_cy5_24fps_338frames.avi": 64,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data_/movies/E_160nM_Rep1_cy5_60fps_743frames.avi": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111024-B-F-titrations-RT/2p5TMB-1ulDNA_/output_data_/movies/F_160nM_Rep1_cy5_60fps_961frames.avi": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data_/movies/G_160nM_Rep1_cy5_60fps_743frames.avi": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/100624-kif3-titration-RT/2p5ulTMB-1ulDNAXnM_/output_data_/movies/Kif3_5nM_2-RT_Rep1_cy5_120fps_1800frames.avi": 8,

}

# For maximum temporal resolution, use a multiplier of 1.
# Increase the multiplier to reduce the number of output frames.
time_multiplier = 64
grid_shape = (2, 4)

synchronize_videos(videos, time_multiplier, grid_shape, channel="cy5", fps_output=30, output_file_path=output_file_path)

In [None]:
concentrations = ["160", "80", "40", "20", "10", "5", "2p5", "1p25"]

videos = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/101324-k401-titration-rt/2p5TMB-1ulDNA_/output_data/movies/K401_160nM-RT_Rep1_cy5_120fps_2616frames.avi": 60,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/091024-sustainedMotors-RT/2p5TMB-1ulDNA100nM_/output_data/movies/Kif5-RT_Rep1_cy5_120fps_2922frames.avi": 45,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/030225-AdPa-titrations/2p5ulTMB_1ulDNA_/output_data/movies/AdPa_160nM_Rep1_cy5_120fps_1082frames.avi": 120, 
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/020124-ThTr-titrations-RT/2p5ulTMB-1ulDNA_/output_data_/movies/ThTr_160nM-RT_Rep1_cy5_60fps_1307frames.avi": 27*4, 
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/021025-BleSto-AcSu2-titrations/2p5ulTMB-1ulDNA_1/output_data_/movies/BleSto_80nM_Rep1_cy5_60fps_921frames.avi": 100,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/021025-BleSto-AcSu2-titrations/2p5ulTMB-1ulDNA_1/output_data_/movies/AcSu2_5nM_Rep1_cy5_60fps_921frames.avi": 100,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/021625-HeAl-titrations-RT/2p5ulTMB-1ulDNA_/output_data/movies/HeAl_5nM-RT_Rep1_cy5_60fps_1000frames.avi": 24,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/030225-NaGr-titrations/2p5ulTMB_1ulDNA_2/output_data/movies/NaGr_5nM_Rep1_cy5_120fps_2000frames.avi": 12,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/021025-DiPu-titrations/2p5ulTMB-1ulDNA_1/output_data_/movies/DiPu_5nM-RT_Rep1_cy5_60fps_880frames.avi": 60,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/092224-TiLa_NaGr-RT/2p5ulTMB-1ulDNA100nM_/output_data/movies/TiLa-RT_Rep1_cy5_120fps_1800frames.avi": 8,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/100624-kif3-titration-RT/2p5ulTMB-1ulDNAXnM_/output_data_/movies/Kif3_5nM_2-RT_Rep1_cy5_120fps_1800frames.avi": 8,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data_/movies/negative_Rep1_cy5_60fps_743frames.avi": 150,

}

# For maximum temporal resolution, use a multiplier of 1.
# Increase the multiplier to reduce the number of output frames.
time_multiplier = 16
grid_shape = (3, 4)

synchronize_videos(videos, time_multiplier, grid_shape, channel="cy5", fps_output=60, output_file_path=output_file_path)

In [None]:
concentrations = ["160", "80", "40", "20", "10", "5", "2p5", "1p25"]


for i in concentrations:

    videos = {
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/100624-kif3-titration-RT/2p5ulTMB-1ulDNAXnM_/output_data_/movies/Kif3_{i}nM_1-RT_Rep1_cy5_120fps_1800frames.avi": 6,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/ubuntu/110324-D_titration-RT/2p5TMB-1ulDNA_1/output_data_/movies/D_{i}nM_Rep1_cy5_24fps_338frames.avi": 64,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/030225-NaGr-titrations/2p5ulTMB_1ulDNA_2/output_data/movies/NaGr_{i}nM_Rep1_cy5_120fps_2000frames.avi": 12,
        "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/021625-HeAl-titrations-RT/2p5ulTMB-1ulDNA_/output_data/movies/HeAl_80nM-RT_Rep1_cy5_60fps_1000frames.avi": 24,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/021025-BleSto-AcSu2-titrations/2p5ulTMB-1ulDNA_1/output_data_/movies/AcSu2_{i}nM_Rep1_cy5_60fps_921frames.avi": 100,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/021025-DiPu-titrations/2p5ulTMB-1ulDNA_1/output_data_/movies/DiPu_{i}nM-RT_Rep1_cy5_60fps_880frames.avi": 60,
    }

    # For maximum temporal resolution, use a multiplier of 1.
    # Increase the multiplier to reduce the number of output frames.
    time_multiplier = 16
    grid_shape = (2, 3)

    synchronize_videos(videos, time_multiplier, grid_shape, channel="cy5", fps_output=60, output_file_path=output_file_path)

In [None]:
concentrations = ["160", "80", "40", "20", "10", "5", "2p5", "1p25"]


for i in concentrations:

    videos = {
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/101324-k401-titration-rt/2p5TMB-1ulDNA_/output_data_/movies/K401_{i}nM-RT_Rep1_cy5_120fps_2616frames.avi": 60,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data_/movies/C_{i}nM_Rep1_cy5_60fps_743frames.avi": 150,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data_/movies/G_{i}nM_Rep1_cy5_60fps_743frames.avi": 150,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/020124-ThTr-titrations-RT/2p5ulTMB-1ulDNA_/output_data_/movies/ThTr_{i}nM-RT_Rep1_cy5_60fps_1307frames.avi": 27*4,

        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/030225-AdPa-titrations/2p5ulTMB_1ulDNA_/output_data/movies/AdPa_{i}nM_Rep1_cy5_120fps_1082frames.avi": 120,

    }

    # For maximum temporal resolution, use a multiplier of 1.
    # Increase the multiplier to reduce the number of output frames.
    time_multiplier = 4
    grid_shape = (2, 3)

    synchronize_videos(videos, time_multiplier, grid_shape, channel="cy5", fps_output=60, output_file_path=output_file_path)

In [None]:
concentrations = ["160", "80", "40", "20", "10", "5", "2p5", "1p25"]


for i in concentrations:

    videos = {
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/021025-BleSto-AcSu2-titrations/2p5ulTMB-1ulDNA_1/output_data_/movies/AcSu2_80nM_Rep1_cy5_60fps_921frames.avi
        ": 60,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data_/movies/C_{i}nM_Rep1_cy5_60fps_743frames.avi": 150,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data_/movies/G_{i}nM_Rep1_cy5_60fps_743frames.avi": 150,
        f"../../../../Thomson Lab Dropbox/David Larios/activedrops/main/020124-ThTr-titrations-RT/2p5ulTMB-1ulDNA_/output_data_/movies/ThTr_{i}nM-RT_Rep1_cy5_60fps_1307frames.avi": 27*4

    }

    # For maximum temporal resolution, use a multiplier of 1.
    # Increase the multiplier to reduce the number of output frames.
    time_multiplier = 4
    grid_shape = (1, 4)

    synchronize_videos(videos, time_multiplier, grid_shape, channel="cy5", fps_output=60, output_file_path=output_file_path)

In [None]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
        
        # Print first few frames to verify sorting
        print(f"Pattern: {pattern}")
        print(f"  Total frames: {len(frame_files)}")
        print(f"  First 5 frames: {[os.path.basename(f) for f in frame_files[:5]]}")
        print(f"  Last 5 frames: {[os.path.basename(f) for f in frame_files[-5:]]}")
        print()
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name.
    out_dir = os.path.dirname(output_file_path)
    final_output_file = os.path.join(out_dir, f"{proteins_combined}_{channel}_synced.avi")
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie will be {playback_duration_sec:.2f} seconds long.")
    print(f"Base time step: {base_time_step}s, Final time step: {time_step}s")
    print(f"Total sync time: {total_time_sync/3600:.2f} hours")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    print(f"Individual frame dimensions: {cell_width}x{cell_height}")
    print(f"Output grid dimensions: {output_frame_width}x{output_frame_height}")
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved as: {final_output_file}")


# Example usage with your PNG frame directories:
frames_data = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 160,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 32,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 18,
}

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 32  # Maximum temporal resolution!
grid_shape = (2, 4)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=30, output_file_path=output_file_path)

In [None]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
        
        # Print first few frames to verify sorting
        print(f"Pattern: {pattern}")
        print(f"  Total frames: {len(frame_files)}")
        print(f"  First 5 frames: {[os.path.basename(f) for f in frame_files[:5]]}")
        print(f"  Last 5 frames: {[os.path.basename(f) for f in frame_files[-5:]]}")
        print()
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name.
    out_dir = os.path.dirname(output_file_path)
    final_output_file = os.path.join(out_dir, f"{proteins_combined}_{channel}_synced.avi")
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie will be {playback_duration_sec:.2f} seconds long.")
    print(f"Base time step: {base_time_step}s, Final time step: {time_step}s")
    print(f"Total sync time: {total_time_sync/3600:.2f} hours")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    print(f"Individual frame dimensions: {cell_width}x{cell_height}")
    print(f"Output grid dimensions: {output_frame_width}x{output_frame_height}")
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved as: {final_output_file}")


# Example usage with your PNG frame directories:
frames_data = {
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 160,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 32,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 18,
}

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 32  # Maximum temporal resolution!
grid_shape = (2, 4)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=30, output_file_path=output_file_path)

In [4]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name, including time_multiplier and fps_output.
    out_dir = os.path.dirname(output_file_path)
    # Format time_multiplier and fps_output for filename (avoid decimal point if possible)
    tm_str = f"{int(time_multiplier)}" if int(time_multiplier) == time_multiplier else f"{time_multiplier}"
    fps_str = f"{int(fps_output)}" if int(fps_output) == fps_output else f"{fps_output}"
    final_output_file = os.path.join(
        out_dir, 
        f"{proteins_combined}_{channel}_synced_tm{tm_str}_fps{fps_str}.avi"
    )
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie duration: {playback_duration_sec:.2f} seconds")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved to: {final_output_file}")



# Example usage with your PNG frame directories:
frames_data = {
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 240,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 160,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/BOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 150,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    # "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OBO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 300,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOB_Rep1_heatmaps_cy5/heatmap_frame_*.png": 16*2,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/paper/figures/fig4-assets/original_tiffs/output_data/movies/OOO_Rep1_heatmaps_cy5/heatmap_frame_*.png": 9*2,
}

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 1  # Maximum temporal resolution!
grid_shape = (1, 2)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=90, output_file_path=output_file_path)

Movie duration: 10.01 seconds


Processing frames: 100%|██████████| 901/901 [04:10<00:00,  3.60frame/s]

Output saved to: ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/movies/OOB_Rep1-OOO_Rep1_cy5_synced_tm1_fps90.avi





In [None]:
z

In [5]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

def extract_protein_name(file_path):
    """
    Extracts the protein name from the file path.
    Assumes the protein name is in the first two underscore-separated tokens,
    where the second token may have additional info (e.g. "-RT") which is removed.
    """
    # Extract the directory name from the path
    dir_name = os.path.basename(os.path.dirname(file_path))
    tokens = dir_name.split('_')
    if len(tokens) < 2:
        raise ValueError(f"Directory name {dir_name} does not have enough tokens to extract protein name.")
    token1 = tokens[0]
    token2 = tokens[1].split('-')[0]
    return f"{token1}_{token2}"

def synchronize_frames_from_directories(frames_dict, time_multiplier, grid_shape, channel, fps_output=30.0, output_file_path="combined_video.avi"):
    """
    Synchronizes multiple frame directories based on a common time axis.
    
    Parameters:
        frames_dict (dict): Keys are frame directory paths with wildcards (e.g., "path/to/frames/*.png"), 
                           values are frame intervals in seconds.
        time_multiplier (float): Multiplier to the base time step.
        grid_shape (tuple): (rows, columns) specifying the grid layout.
        fps_output (float): Output video playback frames per second.
        output_file_path (str): Directory for saving the output video.
    """
    # Convert frames_dict keys and values to lists
    frame_patterns = list(frames_dict.keys())
    frame_intervals = list(frames_dict.values())
    
    n_videos = len(frame_patterns)
    grid_cells = grid_shape[0] * grid_shape[1]
    if grid_cells < n_videos:
        raise ValueError("Grid shape is too small for the number of videos provided.")
    
    # Get actual frame files for each pattern and count them
    frame_files_per_video = []
    total_frames_list = []
    
    for pattern in frame_patterns:
        # Get all PNG files matching the pattern
        frame_files = glob.glob(pattern)
        if not frame_files:
            raise ValueError(f"No PNG files found matching pattern: {pattern}")
        
        # Sort frames by the actual frame number, not the filename string
        def extract_frame_number(filepath):
            filename = os.path.basename(filepath)
            # Extract number from "heatmap_frame_55.png" -> 55
            if filename.startswith("heatmap_frame_") and filename.endswith(".png"):
                number_str = filename[14:-4]  # Remove "heatmap_frame_" and ".png"
                try:
                    return int(number_str)
                except ValueError:
                    return 0
            return 0
        
        # Sort by frame number, not by filename string
        frame_files = sorted(frame_files, key=extract_frame_number)
        
        frame_files_per_video.append(frame_files)
        total_frames_list.append(len(frame_files))
    
    # Extract protein names for output naming.
    protein_names = [extract_protein_name(fp) for fp in frame_patterns]
    proteins_combined = "-".join(protein_names)
    
    # Build the final output file name, including time_multiplier and fps_output.
    out_dir = os.path.dirname(output_file_path)
    # Format time_multiplier and fps_output for filename (avoid decimal point if possible)
    tm_str = f"{int(time_multiplier)}" if int(time_multiplier) == time_multiplier else f"{time_multiplier}"
    fps_str = f"{int(fps_output)}" if int(fps_output) == fps_output else f"{fps_output}"
    final_output_file = os.path.join(
        out_dir, 
        f"{proteins_combined}_{channel}_synced_tm{tm_str}_fps{fps_str}.avi"
    )
    
    # Calculate total times and find maximum.
    total_times = [frames * interval for frames, interval in zip(total_frames_list, frame_intervals)]
    total_time_sync = max(total_times)
    
    # Compute time step.
    base_time_step = min(frame_intervals)
    time_step = base_time_step * time_multiplier
    
    # Determine number of output frames.
    num_output_frames = int(total_time_sync / time_step)
    
    # Calculate playback duration.
    playback_duration_sec = num_output_frames / fps_output
    print(f"Movie duration: {playback_duration_sec:.2f} seconds")
    
    # Get dimensions from first frame of first directory.
    first_frame = cv2.imread(frame_files_per_video[0][0])
    if first_frame is None:
        raise ValueError(f"Could not read first frame from {frame_files_per_video[0][0]}")
    
    h, w = first_frame.shape[:2]
    cell_height, cell_width = h, w
    
    # Calculate output dimensions.
    output_frame_height = grid_shape[0] * cell_height
    output_frame_width = grid_shape[1] * cell_width
    
    # Create video writer.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(final_output_file, fourcc, fps_output, (output_frame_width, output_frame_height))
    
    # Process frames: for each time step, compute the corresponding frame from each directory.
    for i in tqdm(range(num_output_frames), desc="Processing frames", unit="frame"):
        current_time = i * time_step
        frames = []
        
        for idx, (frame_files, frame_interval) in enumerate(zip(frame_files_per_video, frame_intervals)):
            # Calculate which frame we need for this time.
            frame_idx = int(round(current_time / frame_interval))
            
            # Ensure frame index is within bounds.
            if frame_idx >= total_frames_list[idx]:
                frame_idx = total_frames_list[idx] - 1
            
            # Get the frame file path.
            frame_path = frame_files[frame_idx]
            
            # Read the frame.
            frame = cv2.imread(frame_path)
            if frame is None:
                # If frame doesn't exist, create a black frame.
                frame = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            
            frame = cv2.resize(frame, (cell_width, cell_height))
            frames.append(frame)
        
        # Fill remaining grid cells with black frames.
        while len(frames) < grid_cells:
            black = np.zeros((cell_height, cell_width, 3), dtype=np.uint8)
            frames.append(black)
        
        # Combine frames into grid.
        grid_rows = []
        for r in range(grid_shape[0]):
            row_frames = frames[r * grid_shape[1] : (r + 1) * grid_shape[1]]
            row_combined = np.hstack(row_frames)
            grid_rows.append(row_combined)
        combined_frame = np.vstack(grid_rows)
        
        out.write(combined_frame)
    
    out.release()
    print(f"Output saved to: {final_output_file}")



# Example usage with your PNG frame directories:
frames_data = {

    "../../../../Thomson Lab Dropbox/David Larios/activedrops/microscope/091325-kbio_txtl-strep_noStrep/5ulTMBorEM-2ulkbio1uMdilutedinStreporNA_/output_data/movies/K401-BIO + Strep_Rep1_heatmaps_cy5/heatmap_frame_*.png": 60,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/microscope/091325-kbio_txtl-strep_noStrep/5ulTMBorEM-2ulkbio1uMdilutedinStreporNA_/output_data/movies/K401-BIO + H2O_Rep1_heatmaps_cy5/heatmap_frame_*.png": 60,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/101324-k401-titration-rt/2p5TMB-1ulDNA_/output_data/movies/K401_DNA_Rep1_heatmaps_cy5/heatmap_frame_*.png": 60,
    "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/101324-k401-titration-rt/2p5TMB-1ulDNA_/output_data/movies/negative_Rep1_heatmaps_cy5/heatmap_frame_*.png": 60,


}


output_file_path = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/movies/"

# Now you can use much lower time_multiplier since no seeking issues:
time_multiplier = 10  # Maximum temporal resolution!
grid_shape = (1, 4)

synchronize_frames_from_directories(frames_data, time_multiplier, grid_shape, 
                                   channel="cy5", fps_output=30, output_file_path=output_file_path)

Movie duration: 10.67 seconds


Processing frames: 100%|██████████| 320/320 [02:56<00:00,  1.81frame/s]

Output saved to: ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/movies/K401-BIO + Strep_Rep1-K401-BIO + H2O_Rep1-K401_DNA-negative_Rep1_cy5_synced_tm10_fps30.avi



