In [1]:
# File management
import glob
import os
import shutil
import csv

# Data processing
import numpy as np
import pandas as pd

# Plotting
import matplotlib.pyplot as plt
from skimage import io
from scipy.integrate import solve_ivp
from scipy.optimize import curve_fit, minimize
from scipy.ndimage import gaussian_filter1d
from scipy.stats import norm
from PIL import Image, ImageEnhance, ImageOps

# Utilities
import multiprocessing as mp
from multiprocessing import Pool, cpu_count
mp.set_start_method('fork', force=True)
from ipywidgets import interact, FloatSlider, Layout, interactive
import random
from tqdm import tqdm
import itertools
import cv2
from natsort import natsorted
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns

from scipy.ndimage import gaussian_filter1d  # Import for Gaussian smoothing

# Set up logging
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


file management

In [2]:

def consolidate_images(base_dir):
    # Dynamically list all experimental folders using glob
    folders = [f for f in glob.glob(os.path.join(base_dir, '*/')) if os.path.isdir(f)]
    folders.sort()  # Optional: sort to ensure consistent processing order

    # Derive the new directory name from the common prefix
    common_prefix = os.path.commonprefix([os.path.basename(os.path.normpath(f)) for f in folders])
    new_dir = os.path.join(base_dir, common_prefix)

    # Check if the new directory already exists and has more than one subfolder
    if os.path.exists(new_dir) and len([f for f in os.listdir(new_dir) if os.path.isdir(os.path.join(new_dir, f))]) > 1:
        print(f"Consolidation appears to be already done. Directory '{new_dir}' already exists with subfolders.")
        return

    # Create the new directory for consolidated images
    os.makedirs(new_dir, exist_ok=True)

    # Dynamically list all Pos folders from the first experimental folder
    first_folder_path = folders[0]
    pos_folders = [d for d in os.listdir(first_folder_path) if os.path.isdir(os.path.join(first_folder_path, d))]
    pos_folders.sort()  # Optional: sort to ensure consistent processing order

    # Create subfolders for each Pos
    for pos in pos_folders:
        pos_folder_path = os.path.join(new_dir, pos)
        os.makedirs(pos_folder_path, exist_ok=True)

    # Initialize counters for Cy5, GFP, DAPI, Brightfield images, metadata, and display_and_comments.txt files
    cy5_counter = {pos: 0 for pos in pos_folders}
    gfp_counter = {pos: 0 for pos in pos_folders}
    dapi_counter = {pos: 0 for pos in pos_folders}
    brightfield_counter = {pos: 0 for pos in pos_folders}  # New brightfield counter
    metadata_counter = {pos: 0 for pos in pos_folders}
    comments_counter = 0

    # Function to count images in a folder
    def count_images(folder_path):
        if not os.path.exists(folder_path):
            return 0
        return len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])

    # Move images, metadata, and display_and_comments.txt, and update counters
    for folder in folders:
        for pos in pos_folders:
            current_pos_folder_path = os.path.join(folder, pos)
            
            if not os.path.exists(current_pos_folder_path):
                print(f"Warning: {current_pos_folder_path} does not exist.")
                continue
            
            images = sorted(os.listdir(current_pos_folder_path))
            
            for image in images:
                old_image_path = os.path.join(current_pos_folder_path, image)
                image_lower = image.lower()  # Make the image name lowercase
                
                if 'cy5' in image_lower:
                    prefix, ext = os.path.splitext(image)
                    parts = prefix.split('_')
                    new_image_name = f'img_{cy5_counter[pos]:09d}_{parts[2]}_{parts[3]}{ext}'
                    cy5_counter[pos] += 1
                elif 'gfp' in image_lower:
                    prefix, ext = os.path.splitext(image)
                    parts = prefix.split('_')
                    new_image_name = f'img_{gfp_counter[pos]:09d}_{parts[2]}_{parts[3]}{ext}'
                    gfp_counter[pos] += 1
                elif 'dapi' in image_lower:
                    prefix, ext = os.path.splitext(image)
                    parts = prefix.split('_')
                    new_image_name = f'img_{dapi_counter[pos]:09d}_{parts[2]}_{parts[3]}{ext}'
                    dapi_counter[pos] += 1
                elif 'brightfield' in image_lower:  # New brightfield handling
                    prefix, ext = os.path.splitext(image)
                    parts = prefix.split('_')
                    new_image_name = f'img_{brightfield_counter[pos]:09d}_{parts[2]}_{parts[3]}{ext}'
                    brightfield_counter[pos] += 1
                elif image == 'metadata.txt':
                    # Move and rename metadata.txt to avoid overwriting
                    new_image_name = f'metadata_{metadata_counter[pos]:03d}.txt'
                    metadata_counter[pos] += 1
                else:
                    continue
                
                new_image_path = os.path.join(new_dir, pos, new_image_name)
                
                try:
                    shutil.move(old_image_path, new_image_path)
                except Exception as e:
                    print(f"Error moving {old_image_path} to {new_image_path}: {e}")

            # Move display_and_comments.txt and rename it
            comments_file_path = os.path.join(folder, 'display_and_comments.txt')
            if os.path.exists(comments_file_path):
                new_comments_path = os.path.join(new_dir, f'display_and_comments_{comments_counter:03d}.txt')
                comments_counter += 1
                try:
                    shutil.move(comments_file_path, new_comments_path)
                except Exception as e:
                    print(f"Error moving {comments_file_path} to {new_comments_path}: {e}")

            # If the Pos folder is empty after moving images, delete it
            if not os.listdir(current_pos_folder_path):
                try:
                    os.rmdir(current_pos_folder_path)
                    print(f"Deleted empty folder: {current_pos_folder_path}")
                except Exception as e:
                    print(f"Error deleting folder {current_pos_folder_path}: {e}")

        # If the main folder is empty after moving the display_and_comments.txt file, delete it
        if not os.listdir(folder):
            try:
                os.rmdir(folder)
                print(f"Deleted empty folder: {folder}")
            except Exception as e:
                print(f"Error deleting folder {folder}: {e}")

    # Check and count images in final folders
    print("\nChecking final consolidated folders:")
    for pos in pos_folders:
        pos_folder_path = os.path.join(new_dir, pos)
        count = count_images(pos_folder_path)
        print(f"Images in final {pos_folder_path}: {count}")

    print("Renaming, moving, and cleanup completed.")



def organize_conditions(data_path, conditions_dict):
    """
    Organizes PosX folders into condition folders as specified by conditions_dict.
    
    Args:
        data_path (str): Path to the data directory.
        conditions_dict (dict): Dictionary where keys are condition names and values are lists of PosX folders.
    """
    for condition, pos_folders in conditions_dict.items():
        # Create condition folder if it doesn't exist
        condition_path = os.path.join(data_path, condition)
        os.makedirs(condition_path, exist_ok=True)
        
        # Ensure pos_folders is a list, even if only one PosX is provided
        if isinstance(pos_folders, str):
            pos_folders = [pos_folders]
        
        # Move PosX folders into the condition folder
        for pos_folder in pos_folders:
            src_path = os.path.join(data_path, pos_folder)
            dest_path = os.path.join(condition_path, pos_folder)
            
            if os.path.exists(src_path):
                shutil.move(src_path, dest_path)
            else:
                print(f"Warning: {src_path} does not exist. Skipping.")

def reorgTiffsToOriginal(data_path, conditions, subconditions):
    """
    Renames subconditions as RepX and moves the raw data to the "original" folder.
    
    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions.
    """
    for condition in conditions:
        # Get the actual subconditions in the directory
        actual_subconditions = [name for name in os.listdir(os.path.join(data_path, condition)) if os.path.isdir(os.path.join(data_path, condition, name))]
        
        # Ensure subconditions list matches the number of actual subconditions
        actual_subconditions.sort()
        matched_subconditions = subconditions[:len(actual_subconditions)]
        
        # Rename the actual subconditions to match the subconditions in your list
        for i, actual_subcondition in enumerate(actual_subconditions):
            os.rename(os.path.join(data_path, condition, actual_subcondition), os.path.join(data_path, condition, matched_subconditions[i]))
        
        for subcondition in matched_subconditions:
            # Construct the path to the subcondition directory
            subcondition_path = os.path.join(data_path, condition, subcondition)
            
            # Create the path for the "original" directory within the subcondition directory
            original_dir_path = os.path.join(subcondition_path, "original")
            
            # Always create the "original" directory
            os.makedirs(original_dir_path, exist_ok=True)
            
            # Iterate over all files in the subcondition directory
            for filename in os.listdir(subcondition_path):
                # Check if the file is a .tif file
                if filename.endswith(".tif"):
                    # Construct the full path to the file
                    file_path = os.path.join(subcondition_path, filename)
                    
                    # Construct the path to move the file to
                    destination_path = os.path.join(original_dir_path, filename)
                    
                    # Move the file to the "original" directory
                    shutil.move(file_path, destination_path)
            print(f"Moved .tif files from {subcondition_path} to {original_dir_path}")

def prepare_conditions(data_path):
    """
    Prepares conditions and subconditions, renaming subconditions to 'RepX'.
    
    Args:
        data_path (str): Path to the data directory.
    
    Returns:
        conditions (list): List of condition names.
        subconditions (list): List of renamed subconditions as 'RepX'.
    """
    # List conditions while ignoring 'output_data'
    conditions = natsorted([
        f for f in os.listdir(data_path) 
        if os.path.isdir(os.path.join(data_path, f)) and f != 'output_data'
    ])
    
    # Determine the maximum number of subconditions across all conditions
    max_num_subconditions = max([
        len([
            f for f in os.listdir(os.path.join(data_path, condition)) 
            if os.path.isdir(os.path.join(data_path, condition, f))
        ])
        for condition in conditions
    ])
    
    # Rename subconditions to 'RepX' where X is the index (1-based)
    subconditions = [f'Rep{i+1}' for i in range(max_num_subconditions)]
    
    return conditions, subconditions




def reorgTiffs_Split_dapi(data_path, conditions, subconditions, file_interval=None):
    """
    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions.
        file_interval (int or None): Option to copy every nth file. If None, this feature is not used.

    This function copies 'DAPI' images from the 'original' folder into
    the 'DAPI' folder, using the specified interval.
    """
    for condition in conditions:
        for subcondition in subconditions:
            # Construct the path to the 'original' directory within the subcondition
            original_dir_path = os.path.join(data_path, condition, subcondition, "original")

            if not os.path.exists(original_dir_path):
                print(f"Error: The original directory {original_dir_path} does not exist.")
                continue

            # Create the directory for the DAPI channel
            dapi_dir = os.path.join(data_path, condition, subcondition, f"dapi-{file_interval}x")
            os.makedirs(dapi_dir, exist_ok=True)

            # Check if the expected output is already there
            expected_files = [f for f in sorted(os.listdir(original_dir_path))
                              if f.lower().endswith(".tif") and "dapi" in f.lower()]
            expected_output_files = expected_files[::file_interval or 1]
            already_copied_files = set(os.listdir(dapi_dir))

            # If all expected files are already copied, skip this subcondition
            if all(file in already_copied_files for file in expected_output_files):
                print(f"Skipping {subcondition} as the expected output is already present.")
                continue

            # Separate list for DAPI files
            dapi_files = []

            # Iterate over all files in the original directory
            file_list = sorted(os.listdir(original_dir_path))
            for filename in file_list:
                # Check if the file is a .tif file and contains 'DAPI' (case insensitive)
                if filename.lower().endswith(".tif") and "dapi" in filename.lower():
                    dapi_files.append(filename)

            # Copy files based on the file_interval
            if file_interval is None:
                file_interval = 1  # Copy all files if no interval is set

            for idx, filename in enumerate(dapi_files):
                if idx % file_interval == 0:
                    file_path = os.path.join(original_dir_path, filename)
                    shutil.copy(file_path, os.path.join(dapi_dir, filename))

            print(f"Copied every {file_interval}th 'DAPI' file from {original_dir_path} into {dapi_dir}.")

In [3]:
# Example usage
base_dir = '../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/'
consolidate_images(base_dir)

Consolidation appears to be already done. Directory '../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/3ulTMB-0p5ulDNA_all50nM_' already exists with subfolders.


In [4]:
# Example usage
data_path = "../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/3ulTMB-0p5ulDNA_all50nM_/"

calibration_curve_paths = sorted(glob.glob("../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/calibration_curve/***ugml.tif"))


conditions_dict = {
    "A": "Pos0", 
    "B": "Pos1",
    "C": "Pos2",
    "D": "Pos3",
    "E": "Pos4", 
    "F": "Pos5",
    "G": "Pos6",
    "H": "Pos7"
}

# Organize PosX folders into condition folders
organize_conditions(data_path, conditions_dict)

# Now run the existing functions to reorganize the tiffs and rename the folders
conditions, subconditions = prepare_conditions(data_path)
time_interval_list = [45] * len(conditions)  # time intervals in seconds between frames for each condition

print("Conditions:", conditions)
print("Subconditions:", subconditions)

Conditions: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'negative']
Subconditions: ['Rep1']


In [5]:
reorgTiffsToOriginal(data_path, conditions, subconditions)


Moved .tif files from ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/3ulTMB-0p5ulDNA_all50nM_/A/Rep1 to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/3ulTMB-0p5ulDNA_all50nM_/A/Rep1/original
Moved .tif files from ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/3ulTMB-0p5ulDNA_all50nM_/B/Rep1 to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/3ulTMB-0p5ulDNA_all50nM_/B/Rep1/original
Moved .tif files from ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/3ulTMB-0p5ulDNA_all50nM_/C/Rep1 to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/083024-ABCDEFGH-RT/3ulTMB-0p5ulDNA_all50nM_/C/Rep1/original
Moved .tif files from ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbo

fluorescence analysis and video creation

In [6]:


def ensure_output_dir(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

def calculate_mean_intensity(path):
    """Calculate mean intensity of an image."""
    return io.imread(path).mean()

def calculate_protein_concentration(mean_intensity, intercept, slope):
    """Calculate protein concentration in ng/ul and nM."""
    conc_ng_ul = (mean_intensity - intercept) / slope
    return conc_ng_ul

def calculate_protein_concentration_nM(conc_ng_ul, mw_kda):
    """Convert protein concentration from ng/ul to nM."""
    conc_nM = (conc_ng_ul * 1e-3) / (mw_kda * 1e3) * 1e9
    return conc_nM

def calculate_number_of_protein_molecules(protein_mass, mw_kda):
    """Calculate number of protein molecules."""
    return (protein_mass * 6e14) / (mw_kda * 1e3)

def convert_time_units(time_values_s):
    """Convert time values from seconds to minutes and hours."""
    time_values_min = time_values_s / 60
    time_values_h = time_values_s / 3600
    return time_values_s, time_values_min, time_values_h

def process_image(args):
    image_file, output_directory_path, channel, slope, intercept, vmax, time_interval, i, show_scalebar, min_frame, skip_frames, condition, subcondition = args
    # Read the image into a numpy array
    intensity_matrix = io.imread(image_file)

    if channel == "cy5":
        # Normalize intensity matrix to range [0, 1] for cy5 channel
        matrix_to_plot = intensity_matrix / 1000
        label = 'Normalized Fluorescence Intensity'
    else:
        # Convert intensity values to protein concentration using the calibration curve
        matrix_to_plot = calculate_protein_concentration(intensity_matrix, slope, intercept)
        matrix_to_plot = matrix_to_plot / 27000 * 1E6
        label = 'Protein concentration (nM)'

    # Plot the heatmap
    fig, ax = plt.subplots(figsize=(12, 12))
    im = ax.imshow(matrix_to_plot, cmap='gray', interpolation='nearest', vmin=0, vmax=vmax)

    if show_scalebar:
        plt.colorbar(im, ax=ax, label=label)
    plt.title(f"Time (min): {(i - min_frame) * time_interval * skip_frames / 60:.2f} \nTime (h): {(i - min_frame) * time_interval * skip_frames / 3600:.2f} \n{condition} - {subcondition} - {channel}", fontsize=20)
    plt.xlabel('x [µm]')
    plt.ylabel('y [µm]')
    plt.grid(True, color='#d3d3d3', linewidth=0.5, alpha=0.5)

    # Save the heatmap
    heatmap_filename = f"heatmap_frame_{i}.png"
    heatmap_path = os.path.join(output_directory_path, heatmap_filename)
    plt.savefig(heatmap_path, bbox_inches='tight', pad_inches=0.1, dpi=200)
    plt.close(fig)

def fluorescence_heatmap(data_path, conditions, subconditions, channel, time_interval_list, vmax, min_frame, max_frame=None, skip_frames=1, calibration_curve_paths=None, show_scalebar=True, batch_size=100):
    """
    Reads each image as a matrix, creates, and saves a heatmap representing the normalized pixel-wise fluorescence intensity.

    Args:
    - data_path (str): Base directory where the images are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): Channel specifying the fluorescence ('cy5' or 'gfp').
    - time_interval_list (list): List of time intervals in seconds between frames for each condition.
    - min_frame (int): Minimum frame number to start processing from.
    - max_frame (int): Maximum frame number to stop processing at.
    - vmax (float): Maximum value for color scale in the heatmap.
    - skip_frames (int): Interval to skip frames (default is 1, meaning process every frame).
    - calibration_curve_paths (list): List of file paths for the calibration curve images.
    - show_scalebar (bool): Whether to show the color scale bar in the heatmap.
    - batch_size (int): Number of images to process in each batch to avoid memory overload.
    """
    output_data_dir = os.path.join(data_path, "output_data", "movies")
    ensure_output_dir(output_data_dir)

    for idx, condition in enumerate(conditions):
        time_interval = time_interval_list[idx]

        for subcondition in subconditions:
            # Determine the directory paths based on the channel
            input_directory_path = os.path.join(data_path, condition, subcondition, "original")
            output_directory_path = os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_{channel}")

            # Create the output directory if it doesn't exist, or clear it if it does
            if os.path.exists(output_directory_path):
                shutil.rmtree(output_directory_path)
            os.makedirs(output_directory_path, exist_ok=True)

            # Get all .tif files in the folder
            image_files = sorted(glob.glob(os.path.join(input_directory_path, f"*{channel}*.tif")))[min_frame:max_frame:skip_frames]

            # Setup calibration curve for non-cy5 channels
            slope, intercept = None, None
            if channel != "cy5":
                # Calibration curve data and fit
                sample_concentration_values = [0, 2, 5, 10, 20, 40, 80, 160, 320]

                if calibration_curve_paths is None or len(calibration_curve_paths) != len(sample_concentration_values):
                    raise ValueError(f"Mismatch in lengths: {len(calibration_curve_paths)} calibration images, {len(sample_concentration_values)} sample concentrations")

                mean_intensity_calibration = [calculate_mean_intensity(path) for path in calibration_curve_paths]
                slope, intercept = np.polyfit(sample_concentration_values, mean_intensity_calibration, 1)

            # Progress bar for the entire subcondition
            with tqdm(total=len(image_files), desc=f"Processing {condition} - {subcondition}", leave=True, dynamic_ncols=True) as pbar:
                # Process images in batches to avoid memory overload
                for batch_start in range(0, len(image_files), batch_size):
                    batch_end = min(batch_start + batch_size, len(image_files))
                    batch_files = image_files[batch_start:batch_end]

                    # Prepare arguments for multiprocessing
                    args = [(image_file, output_directory_path, channel, slope, intercept, vmax, time_interval, i, show_scalebar, min_frame, skip_frames, condition, subcondition)
                            for i, image_file in enumerate(batch_files, start=batch_start + min_frame)]

                    with mp.Pool(mp.cpu_count()) as pool:
                        for _ in pool.imap(process_image, args):
                            pbar.update(1)

def prepare_conditions(data_path):
    """
    Prepare conditions and subconditions, renaming subconditions to 'RepX' where X is the index.
    
    Args:
        data_path (str): Path to the data directory.
    
    Returns:
        conditions (list): List of condition names.
        subconditions (list): List of renamed subconditions as 'RepX'.
    """
    # List conditions while ignoring 'output_data'
    conditions = natsorted([
        f for f in os.listdir(data_path) 
        if os.path.isdir(os.path.join(data_path, f)) and f != 'output_data'
    ])
    
    # Determine the number of subconditions by counting directories in the first condition
    num_subconditions = len([
        f for f in os.listdir(os.path.join(data_path, conditions[0])) 
        if os.path.isdir(os.path.join(data_path, conditions[0], f))
    ])
    
    # Rename subconditions to 'RepX' where X is the index (1-based)
    subconditions = [f'Rep{i+1}' for i in range(num_subconditions)]
    
    return conditions, subconditions

def process_video_creation(args):
    image_files, out_path, frame_rate = args

    # Get the resolution of the first image (assuming all images are the same size)
    first_image = cv2.imread(image_files[0])
    video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(out_path, fourcc, frame_rate, video_resolution)

    for file in image_files:
        img = cv2.imread(file)
        out.write(img)  # Write the image as a frame in the video

    out.release()

def create_movies(data_path, conditions, subconditions, channel, frame_rate=30, max_frame=None, skip_frames=1, batch_size=100):
    """
    Creates video files from heatmaps stored in the specified directory.

    Args:
    - data_path (str): Base path where the heatmaps are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): The specific channel being processed ('cy5' or 'gfp').
    - frame_rate (int): Frame rate for the output video. Defaults to 30.
    - max_frame (int, optional): Maximum number of frames to be included in the video. If None, all frames are included.
    - skip_frames (int): Interval to skip frames (default is 1, meaning process every frame).
    - batch_size (int): Number of images to process in each batch to avoid memory overload.
    """
    output_data_dir = os.path.join(data_path, "output_data", "movies")
    ensure_output_dir(output_data_dir)

    for condition in conditions:
        for subcondition in subconditions:
            images_dir = os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_{channel}")
            video_filename = f"{condition}_{subcondition}_{channel}.avi"
            out_path = os.path.join(output_data_dir, video_filename)

            # Get all .png files in the folder
            image_files = natsorted(glob.glob(os.path.join(images_dir, "*.png")))[::skip_frames]
            if max_frame is not None:
                image_files = image_files[:max_frame]

            if len(image_files) == 0:
                print(f"No images found for {condition} - {subcondition} in {channel}.")
                continue

            # Calculate the video duration
            video_duration = len(image_files) / frame_rate
            print(f"Creating video for {condition} - {subcondition} with duration: {video_duration:.2f} seconds.")

            # Get the resolution of the first image (assuming all images are the same size)
            first_image = cv2.imread(image_files[0])
            video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

            # Define the codec and create the VideoWriter object
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            out = cv2.VideoWriter(out_path, fourcc, frame_rate, video_resolution)

            # Progress bar for the entire subcondition
            with tqdm(total=len(image_files), desc=f"Creating video for {condition} - {subcondition}", leave=True, dynamic_ncols=True) as pbar:
                # Process images in batches to avoid memory overload
                for batch_start in range(0, len(image_files), batch_size):
                    batch_end = min(batch_start + batch_size, len(image_files))
                    batch_files = image_files[batch_start:batch_end]

                    for image_file in batch_files:
                        img = cv2.imread(image_file)
                        out.write(img)  # Write the image as a frame in the video
                        pbar.update(1)

            # Release the video writer
            out.release()

def process_frame(args):
    frame_index, temp_img_dir, conditions, subconditions, channel, grid_rows, grid_cols, data_path = args
    
    fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(grid_cols * 6, grid_rows * 6))
    plt.subplots_adjust(hspace=0.1, wspace=0.1)  # Adjust spacing

    # Ensure axes is always 2D
    if grid_rows == 1 and grid_cols == 1:
        axes = np.array([[axes]])
    elif grid_rows == 1 or grid_cols == 1:
        axes = np.array(axes).reshape(grid_rows, grid_cols)

    plot_index = 0

    # Loop through each condition and subcondition
    for col_idx, condition in enumerate(conditions):
        for row_idx, subcondition in enumerate(subconditions):
            # Determine the image path
            images_dir = os.path.join(data_path, "output_data", "movies", f"{condition}_{subcondition}_heatmaps_{channel}")
            image_files = natsorted(glob.glob(os.path.join(images_dir, "*.png")))

            if frame_index < len(image_files):
                # Use the available frame
                image_path = image_files[frame_index]
            else:
                # If no more frames, use the last available frame
                image_path = image_files[-1]

            img = io.imread(image_path)

            # Plot the image in the appropriate subplot
            ax = axes[row_idx if len(subconditions) > 1 else plot_index // grid_cols,
                      col_idx if len(subconditions) > 1 else plot_index % grid_cols]
            ax.imshow(img, cmap='gray', vmin=0, vmax=img.max())
            ax.axis('off')  # Remove axes

            plot_index += 1

    # Turn off any unused subplots
    for ax in axes.flatten()[plot_index:]:
        ax.axis('off')

    # Save the combined frame
    combined_image_path = os.path.join(temp_img_dir, f"combined_frame_{frame_index:04d}.png")
    plt.savefig(combined_image_path, bbox_inches='tight', pad_inches=0)
    plt.close(fig)

def create_combined_heatmap_movie_custom_grid(data_path, conditions, subconditions, channel, grid_rows=None, grid_cols=None, frame_rate=30, batch_size=50):
    """
    Combines heatmaps from different conditions and subconditions into a single video.
    Allows specifying the number of grid rows and columns or uses an adaptive layout based on subconditions.

    Args:
    - data_path (str): Base path where the heatmaps are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): The specific channel being processed ('cy5' or 'gfp').
    - grid_rows (int, optional): Number of rows in the grid. If None, calculated adaptively.
    - grid_cols (int, optional): Number of columns in the grid. If None, calculated adaptively.
    - frame_rate (int): Frame rate for the output video. Defaults to 30.
    - batch_size (int): Number of frames to process in each batch to avoid memory overload.
    """
    # Determine grid dimensions if not provided
    total_plots = len(conditions) * len(subconditions)
    
    if grid_rows is None or grid_cols is None:
        if len(subconditions) == 1:
            grid_cols = int(np.ceil(np.sqrt(total_plots)))
            grid_rows = int(np.ceil(total_plots / grid_cols))
            while grid_cols * grid_rows >= total_plots:
                if (grid_cols - 1) * grid_rows >= total_plots:
                    grid_cols -= 1
                elif grid_cols * (grid_rows - 1) >= total_plots:
                    grid_rows -= 1
                else:
                    break
        else:
            grid_rows = len(subconditions)
            grid_cols = len(conditions)
    
    # Define the output directory for temporary images (now called 'combined_frames' in 'movies' directory)
    output_data_dir = os.path.join(data_path, "output_data", "movies")
    combined_frames_dir = os.path.join(output_data_dir, "combined_frames")
    ensure_output_dir(combined_frames_dir)

    # Determine the maximum number of frames based on the longest video
    max_num_frames = 0
    for condition in conditions:
        for subcondition in subconditions:
            image_dir = os.path.join(data_path, "output_data", "movies", f"{condition}_{subcondition}_heatmaps_{channel}")
            num_frames = len(natsorted(glob.glob(os.path.join(image_dir, "*.png"))))
            if num_frames > max_num_frames:
                max_num_frames = num_frames

    if max_num_frames == 0:
        print(f"No frames to process. Check if the directories exist and contain images.")
        return

    # Calculate and print the video duration
    video_duration = max_num_frames / frame_rate
    print(f"Creating video with duration: {video_duration:.2f} seconds.")

    # Progress bar for the entire operation
    with tqdm(total=max_num_frames, desc="Creating combined frames", leave=True, dynamic_ncols=True) as pbar:
        for batch_start in range(0, max_num_frames, batch_size):
            batch_end = min(batch_start + batch_size, max_num_frames)
            batch_frames = range(batch_start, batch_end)

            args_list = [
                (
                    frame_index, combined_frames_dir, conditions, subconditions, channel,
                    grid_rows, grid_cols, data_path
                )
                for frame_index in batch_frames
            ]
            
            with mp.Pool(mp.cpu_count()) as pool:
                for _ in pool.imap(process_frame, args_list):
                    pbar.update(1)

    # Compile the images into a video using OpenCV
    combined_image_files = natsorted(glob.glob(os.path.join(combined_frames_dir, "combined_frame_*.png")))

    # Get the resolution of the first image
    first_image = cv2.imread(combined_image_files[0])
    height, width, layers = first_image.shape
    video_resolution = (width, height)

    # Define the codec and create a VideoWriter object
    output_filename = f"combined_heatmap_movie_{channel}.avi"
    output_file = os.path.join(output_data_dir, output_filename)
    ensure_output_dir(output_data_dir)

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(output_file, fourcc, frame_rate, video_resolution)

    for image_file in combined_image_files:
        img = cv2.imread(image_file)
        out.write(img)  # Write the image as a frame in the video

    out.release()
    print(f"Combined video saved to {output_file}")

def delete_temporary_image_directories(data_path, conditions, subconditions):
    """
    Deletes all the temporary directories containing the images used for creating movies, for all channels.

    Args:
    - data_path (str): Base path where the temporary images are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    """
    # Define the output directory
    output_data_dir = os.path.join(data_path, "output_data", "movies")
    
    for condition in conditions:
        for subcondition in subconditions:
            # Find all channel-specific directories and remove them
            temp_dirs = glob.glob(os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_*"))
            for temp_dir in temp_dirs:
                if os.path.exists(temp_dir):
                    shutil.rmtree(temp_dir)
                    print(f"Deleted temporary directory: {temp_dir}")
    
    # Delete the 'combined_frames' directory used in combined movie creation
    combined_frames_dir = os.path.join(output_data_dir, "combined_frames")
    if os.path.exists(combined_frames_dir):
        shutil.rmtree(combined_frames_dir)
        print(f"Deleted temporary images directory: {combined_frames_dir}")

def delete_produced_output_all_channels(output_base_dir, conditions, subconditions):
    """
    Deletes all the produced output including temporary directories and generated files for all channels.
    
    Args:
    - output_base_dir (str): The base directory where output files are stored.
    - conditions (list): List of conditions defining subdirectories within the output base directory.
    - subconditions (list): List of subconditions defining further subdirectories.
    """
    output_data_dir = os.path.join(output_base_dir, "output_data")
    
    # Delete the main output_data directory if it exists
    if os.path.exists(output_data_dir):
        shutil.rmtree(output_data_dir)
        print(f"Deleted main output directory: {output_data_dir}")
    
    # Loop through each condition and subcondition to delete individual directories
    for condition in conditions:
        for subcondition in subconditions:
            # Remove all heatmap directories for each condition and subcondition
            channel_dirs = glob.glob(os.path.join(output_data_dir, f"movies/{condition}_{subcondition}_heatmaps_*"))
            for temp_dir in channel_dirs:
                if os.path.exists(temp_dir):
                    shutil.rmtree(temp_dir)
                    print(f"Deleted temporary directory: {temp_dir}")
                
            # Find and remove all combined movie files
            combined_movie_files = glob.glob(os.path.join(output_data_dir, f"movies/combined_heatmap_movie_*.avi"))
            for combined_movie_file in combined_movie_files:
                if os.path.exists(combined_movie_file):
                    os.remove(combined_movie_file)
                    print(f"Deleted combined movie file: {combined_movie_file}")

    # Delete the 'combined_frames' directory used for combined movie creation
    temp_img_dir = os.path.join(output_data_dir, "movies/combined_frames")
    if os.path.exists(temp_img_dir):
        shutil.rmtree(temp_img_dir)
        print(f"Deleted temporary images directory: {temp_img_dir}")


In [None]:
# Call the function
fluorescence_heatmap(
    data_path, 
    conditions, 
    subconditions, 
    channel='cy5', 
    time_interval_list=time_interval_list, 
    vmax=24, 
    skip_frames=1, 
    calibration_curve_paths=calibration_curve_paths, 
    show_scalebar=False,
    )

In [None]:
# Example usage
create_movies(
    data_path, 
    conditions, 
    subconditions, 
    channel='cy5', 
    frame_rate=120,
    skip_frames=1
    )


In [None]:
create_combined_heatmap_movie_custom_grid(
    data_path, 
    conditions, 
    subconditions, 
    channel='cy5', 
    grid_rows=2, 
    grid_cols=4, 
    frame_rate=120,
    batch_size=50
    )


In [None]:
# Call the function
fluorescence_heatmap(
    data_path, 
    conditions, 
    subconditions, 
    channel='GFP', 
    time_interval_list=time_interval_list, 
    vmax=500, 
    skip_frames=1, 
    calibration_curve_paths=calibration_curve_paths, 
    show_scalebar=True,
    )

In [None]:
# Example usage
create_movies(
    data_path, 
    conditions, 
    subconditions, 
    channel='GFP', 
    frame_rate=120,
    skip_frames=1
    )


In [None]:
create_combined_heatmap_movie_custom_grid(
    data_path, 
    conditions, 
    subconditions, 
    channel='GFP', 
    grid_rows=2, 
    grid_cols=4, 
    frame_rate=120,
    )


In [None]:
delete_temporary_image_directories(data_path, conditions, subconditions)

In [None]:
delete_produced_output_all_channels(data_path, conditions, subconditions)


fluorescence quantification

In [None]:

def ensure_output_dir(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

def calculate_mean_intensity(path):
    """Calculate mean intensity of an image."""
    return io.imread(path).mean()

def subtract_negative_intensity(paths, negative_paths, skip_frames=1):
    """Subtract the mean intensity of the negative control images from the raw data."""
    # Apply frame skipping
    paths = paths[::skip_frames]
    negative_paths = negative_paths[::skip_frames]

    # Calculate mean intensities for samples
    with mp.Pool(mp.cpu_count()) as pool:
        mean_intensity_list = list(tqdm(pool.imap(calculate_mean_intensity, paths), total=len(paths), desc="Calculating sample intensities"))

    # Calculate mean intensities for negatives (no progress bar)
    with mp.Pool(mp.cpu_count()) as pool:
        negative_intensity_list = pool.map(calculate_mean_intensity, negative_paths)

    # Subtract negative control intensities from sample intensities
    mean_intensity_list = np.array(mean_intensity_list) - np.array(negative_intensity_list)
    
    return mean_intensity_list

def calculate_protein_concentration(mean_intensity, intercept, slope):
    """Calculate protein concentration in ng/ul."""
    conc_ng_ul = (mean_intensity - intercept) / slope
    return conc_ng_ul

def calculate_protein_concentration_nM(conc_ng_ul, mw_kda):
    """Convert protein concentration from ng/ul to nM."""
    conc_nM = (conc_ng_ul * 1e-3) / (mw_kda * 1e3) * 1e9
    return conc_nM

def calculate_number_of_protein_molecules(protein_mass, mw_kda):
    """Calculate number of protein molecules."""
    return (protein_mass * 6e14) / (mw_kda * 1e3)

def convert_time_units(time_values_s):
    """Convert time values from seconds to minutes and hours."""
    time_values_min = time_values_s / 60
    time_values_h = time_values_s / 3600
    return time_values_s, time_values_min, time_values_h

def add_derivative_column(df, time_column, value_column, new_column_name):
    """Calculate the derivative of a value column with respect to time and add it as a new column."""
    time_deltas = np.diff(df[time_column], prepend=np.nan)
    value_deltas = np.diff(df[value_column], prepend=np.nan)
    df[new_column_name] = value_deltas / time_deltas

    # Apply Gaussian smoothing with sigma = 2
    df[new_column_name] = gaussian_filter1d(df[new_column_name], sigma=2, mode='nearest')

    return df

def calculate_translation_rate(df, time_column, protein_molecules_column, protein_length, ribosome_count, droplet_volume_ul):
    """Calculate the translation rate of individual proteins in amino acids per second."""
    df['Translation Rate aa_s'] = (df[protein_molecules_column] / ribosome_count) / (df[time_column] * protein_length * droplet_volume_ul * 1e-6)
    return df

def quantify_tiffiles(data_path, conditions, subconditions, calibration_curve_paths, mw_kda_list, droplet_volume_list, time_interval_s_list, protein_lengths_list, ribosome_count=10**11, skip_frames=1, subtract_negative=True, negative_condition='negative'):
    """Process images to calculate protein concentration and generate plots, with an option to skip frames."""
    all_data = []

    # Sort the calibration curve paths
    calibration_curve_paths = sorted(calibration_curve_paths)

    # Calibration curve data and fit
    sample_concentration_values = [0, 2, 5, 10, 20, 40, 80, 160, 320]
    with mp.Pool(mp.cpu_count()) as pool:
        mean_intensity_calibration = pool.map(calculate_mean_intensity, calibration_curve_paths)
    slope, intercept = np.polyfit(sample_concentration_values, mean_intensity_calibration, 1)

    for idx, condition in enumerate(conditions):
        mw_kda = mw_kda_list[idx]
        droplet_volume = droplet_volume_list[idx]
        time_interval_s = time_interval_s_list[idx]
        protein_length = protein_lengths_list[idx]

        for subcondition in subconditions:
            pattern = os.path.join(data_path, condition, subcondition, "original", "*GFP*.tif")
            paths = sorted(glob.glob(pattern))

            if not paths:
                print(f"No image files found for condition {condition}, subcondition {subcondition}.")
                continue

            # Find the negative control paths
            negative_pattern = os.path.join(data_path, negative_condition, subcondition, "original", "*GFP*.tif")
            negative_paths = sorted(glob.glob(negative_pattern))

            if subtract_negative and negative_paths:
                mean_intensity_list = subtract_negative_intensity(paths, negative_paths, skip_frames)
            else:
                if not subtract_negative:
                    paths = paths[::skip_frames]
                with mp.Pool(mp.cpu_count()) as pool:
                    mean_intensity_list = list(tqdm(pool.imap(calculate_mean_intensity, paths), total=len(paths), desc=f"Calculating intensities for {condition} - {subcondition}"))

            protein_concentration_list = [calculate_protein_concentration(intensity, intercept, slope) for intensity in mean_intensity_list]
            protein_concentration_nM_list = [calculate_protein_concentration_nM(conc_ng_ul, mw_kda) for conc_ng_ul in protein_concentration_list]

            min_intensity = min(mean_intensity_list)
            mean_intensity_list = np.array(mean_intensity_list) - min_intensity
            protein_concentration_list = np.array(protein_concentration_list) - min(protein_concentration_list)
            protein_concentration_nM_list = np.array(protein_concentration_nM_list) - min(protein_concentration_nM_list)

            time_values_s = np.arange(len(mean_intensity_list)) * time_interval_s * skip_frames
            time_values_s, time_values_min, time_values_h = convert_time_units(time_values_s)
            
            protein_mass_list = protein_concentration_list * droplet_volume
            df = pd.DataFrame({
                "Condition": condition,
                "Subcondition": subcondition,
                "Time_s": time_values_s,
                "Time_min": time_values_min,
                "Time_h": time_values_h,
                "Mean Intensity": mean_intensity_list,
                "Protein Concentration_ng_ul": protein_concentration_list,
                "Protein Concentration_nM": protein_concentration_nM_list,
            })

            df["Number of Protein Molecules"] = [calculate_number_of_protein_molecules(mass, mw_kda) for mass in protein_mass_list]

            # Add the derivative (rate of change) of protein molecules over time
            df = add_derivative_column(df, "Time_s", "Number of Protein Molecules", "Rate of Change of Protein Molecules per Second")

            # Calculate the translation rate in amino acids per second for individual proteins
            df = calculate_translation_rate(df, "Time_s", "Number of Protein Molecules", protein_length, ribosome_count, droplet_volume)

            all_data.append(df)

    combined_df = pd.concat(all_data, ignore_index=True)

    mean_df = combined_df.groupby(["Condition", "Time_s", "Time_min", "Time_h"]).mean(numeric_only=True).reset_index()

    # Add the derivative (rate of change) to the mean dataframe as well
    mean_df = add_derivative_column(mean_df, "Time_s", "Number of Protein Molecules", "Rate of Change of Protein Molecules per Second")

    # Calculate the translation rate in amino acids per second for the mean dataframe
    mean_df = calculate_translation_rate(mean_df, "Time_s", "Number of Protein Molecules", protein_lengths_list[0], ribosome_count, droplet_volume_list[0])

    output_dir = os.path.join(data_path, "output_data")
    ensure_output_dir(output_dir)

    combined_csv_path = os.path.join(output_dir, "combined_experiment.csv")
    combined_df.to_csv(combined_csv_path, index=False)

    mean_csv_path = os.path.join(output_dir, "mean_experiment.csv")
    mean_df.to_csv(mean_csv_path, index=False)

    plot_results(combined_df, mean_df, output_dir, sample_concentration_values, mean_intensity_calibration, slope, intercept, subtract_negative=subtract_negative, negative_condition=negative_condition)

    return combined_csv_path, mean_csv_path



def plot_results(df, mean_df, output_dir, sample_concentration_values, mean_intensity_calibration, slope, intercept, subtract_negative=False, negative_condition='negative'):
    """Generate and save plots for the experimental data."""
    
    calibration_dir = os.path.join(output_dir, "calibration")
    ensure_output_dir(calibration_dir)
    
    combined_plots_dir = os.path.join(output_dir, "combined_plots")
    ensure_output_dir(combined_plots_dir)
    
    mean_plots_dir = os.path.join(output_dir, "mean_plots")
    ensure_output_dir(mean_plots_dir)
    
    combined_log_plots_dir = os.path.join(output_dir, "combined_plots_log")
    ensure_output_dir(combined_log_plots_dir)
    
    mean_log_plots_dir = os.path.join(output_dir, "mean_plots_log")
    ensure_output_dir(mean_log_plots_dir)
    
    plt.rcParams["figure.figsize"] = [12, 8]
    plt.rcParams["image.cmap"] = "viridis"
    dpi_setting = 200
    
    # Plot calibration curve
    plt.figure(dpi=dpi_setting)
    plt.scatter(sample_concentration_values, mean_intensity_calibration, label="Data Points")
    plt.plot(sample_concentration_values, np.polyval([slope, intercept], sample_concentration_values), color='r', label=f"Fit: y = {slope:.2f}x + {intercept:.2f}")
    plt.xlabel("Protein Concentration ng_ul")
    plt.ylabel("Mean Intensity")
    plt.title("Calibration Curve")
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(calibration_dir, "calibration_curve.png"), dpi=dpi_setting)
    plt.close()

    # Determine time units from dataframe
    time_units = [(col, col.replace('_', ' ').title()) for col in df.columns if col.startswith("Time_")]
    
    # Determine metrics (columns to plot) dynamically, excluding time units and non-numeric columns
    numeric_cols = df.select_dtypes(include=['number']).columns
    metrics = [(col, col.replace('_', ' ').title()) for col in numeric_cols if col not in [col[0] for col in time_units]]

    # Plot combined data for each metric
    for metric, ylabel in metrics:
        for time_unit, xlabel in time_units:
            plt.figure(dpi=dpi_setting)
            for (condition, subcondition), group in df.groupby(["Condition", "Subcondition"]):
                if subtract_negative and condition == negative_condition:
                    continue
                plt.plot(group[time_unit], group[metric], label=f"{condition} - {subcondition}")
            
            plt.xlabel(xlabel)
            plt.ylabel(ylabel)
            plt.title(f"Combined {ylabel} over {xlabel} for All Conditions")
            if metric == 'Translation Rate aa_s':
                plt.ylim(0, 1)  # Set y-axis limit for translation rate plot
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(combined_plots_dir, f"combined_{metric}_plot_{time_unit}.png"), dpi=dpi_setting)
            plt.close()

            # Generate log scale version
            plt.figure(dpi=dpi_setting)
            for (condition, subcondition), group in df.groupby(["Condition", "Subcondition"]):
                if subtract_negative and condition == negative_condition:
                    continue
                plt.plot(group[time_unit], group[metric], label=f"{condition} - {subcondition}")
            
            plt.xlabel(xlabel)
            plt.ylabel(ylabel)
            plt.title(f"Combined {ylabel} over {xlabel} for All Conditions (Log Scale)")
            plt.yscale('log')
            if metric == 'Translation Rate aa_s':
                plt.ylim(1e-3, 1)  # Adjust y-axis limits for log scale if necessary
            plt.legend()
            plt.grid(True, which="both", ls="--")
            plt.savefig(os.path.join(combined_log_plots_dir, f"combined_{metric}_plot_{time_unit}_log.png"), dpi=dpi_setting)
            plt.close()

    # Plot mean data for each metric
    for metric, ylabel in metrics:
        for time_unit, xlabel in time_units:
            plt.figure(dpi=dpi_setting)
            for condition, group in mean_df.groupby("Condition"):
                if subtract_negative and condition == negative_condition:
                    continue
                plt.plot(group[time_unit], group[metric], label=f"{condition}")
                
            plt.xlabel(xlabel)
            plt.ylabel(ylabel)
            plt.title(f"Mean {ylabel} over {xlabel} for All Conditions")
            if metric == 'Translation Rate aa_s':
                plt.ylim(0, 1)  # Set y-axis limit for translation rate plot
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(mean_plots_dir, f"mean_{metric}_plot_{time_unit}.png"), dpi=dpi_setting)
            plt.close()

            # Generate log scale version
            plt.figure(dpi=dpi_setting)
            for condition, group in mean_df.groupby("Condition"):
                if subtract_negative and condition == negative_condition:
                    continue
                plt.plot(group[time_unit], group[metric], label=f"{condition}")
                
            plt.xlabel(xlabel)
            plt.ylabel(ylabel)
            plt.title(f"Mean {ylabel} over {xlabel} for All Conditions (Log Scale)")
            plt.yscale('log')
            if metric == 'Translation Rate aa_s':
                plt.ylim(1e-3, 1)  # Adjust y-axis limits for log scale if necessary
            plt.legend()
            plt.grid(True, which="both", ls="--")
            plt.savefig(os.path.join(mean_log_plots_dir, f"mean_{metric}_plot_{time_unit}_log.png"), dpi=dpi_setting)
            plt.close()


In [None]:
# Example usage
mw_kda_list = [100] * len(conditions)
droplet_volume_list = [2] * len(conditions)
protein_lengths_list = [500] * len(conditions)  # Assuming 500 amino acids per protein

quantify_tiffiles(
    data_path, 
    conditions, 
    subconditions, 
    calibration_curve_paths, 
    mw_kda_list, 
    droplet_volume_list, 
    time_interval_list, 
    protein_lengths_list,
    ribosome_count=10**10,
    skip_frames=1,
    subtract_negative=True
)

PIV

In [12]:
# Convert a single image (helper function for multiprocessing)
def process_single_image(file_name, output_dir, brightness_factor, contrast_factor, num_digits, i):
    image = Image.open(file_name).convert("L")
    image_resized = image.resize((2048, 2048), Image.LANCZOS)

    enhancer = ImageEnhance.Brightness(image_resized)
    image_brightened = enhancer.enhance(brightness_factor)
    enhancer = ImageEnhance.Contrast(image_brightened)
    image_contrasted = enhancer.enhance(contrast_factor)

    padded_index = str(i + 1).zfill(num_digits)
    base_file_name = f'converted_image_{padded_index}.tif'
    processed_image_path = os.path.join(output_dir, base_file_name)
    image_contrasted.save(processed_image_path, format='TIFF', compression='tiff_lzw')


# Convert PIVlab images to the right size using multiprocessing
def convert_images(data_path, conditions, subconditions, max_frame, brightness_factor=1, contrast_factor=1, skip_frames=1):
    for condition in tqdm(conditions, desc="Conditions", leave=False):
        for subcondition in tqdm(subconditions, desc="Subconditions", leave=False):
            input_dir = os.path.join(data_path, condition, subcondition, "piv_movie")
            output_dir = os.path.join(data_path, condition, subcondition, "piv_movie_converted")

            os.makedirs(output_dir, exist_ok=True)

            input_files = natsorted(glob.glob(os.path.join(input_dir, '*.jpg')))

            if max_frame:
                input_files = input_files[:max_frame]

            # Apply frame skipping
            input_files = input_files[::skip_frames]

            output_files = natsorted(glob.glob(os.path.join(output_dir, '*.tif')))
            if len(input_files) <= len(output_files):
                print(f"Conversion might already be completed or partial for {output_dir}. Continuing...")
                # Optional: Add logic to check and continue incomplete work.

            num_digits = len(str(len(input_files)))

            # Use all available cores
            with Pool(cpu_count()) as pool:
                list(tqdm(pool.starmap(process_single_image, [(file_name, output_dir, brightness_factor, contrast_factor, num_digits, i) for i, file_name in enumerate(input_files)]), total=len(input_files), desc="Converting Images", leave=False))


# Helper function to plot autocorrelation
def plot_autocorrelation_values(data_path, condition, subcondition, frame_id, lambda_tau, results, fitted_values, intervector_distance_microns):
    output_directory_dfs = os.path.join(data_path, condition, subcondition, "autocorrelation_plots")
    os.makedirs(output_directory_dfs, exist_ok=True)

    plt.figure(figsize=(10, 6))

    x_values = np.arange(len(results)) * intervector_distance_microns * 1E6

    plt.plot(x_values, results, label='Autocorrelation Values', marker='o', linestyle='-', markersize=5)
    plt.plot(x_values, fitted_values, label='Fitted Exponential Decay', linestyle='--', color='red')
    plt.axvline(x=lambda_tau, color='green', linestyle='-.', label=f'Correlation Length = {lambda_tau:.2f} µm')

    plt.xlabel('Scaled Lag (µm)')
    plt.ylabel('Autocorrelation')
    plt.title(f'Autocorrelation Function and Fitted Exponential Decay (Frame {frame_id})')
    plt.legend()
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)
    # plt.ylim(0, 1.1)

    plt.tight_layout()

    filename = os.path.join(output_directory_dfs, f'autocorrelation_frame_{frame_id}.jpg')
    plt.savefig(filename, dpi=200, format='jpg')
    plt.close()


# Helper function to calculate correlation length
def correlation_length(data_frame):
    # Reshaping the data frame to a 2D grid and normalizing
    v = data_frame.pivot(index='y [m]', columns='x [m]', values="velocity magnitude [m/s]").values
    v -= np.mean(v)  # Centering the data

    # FFT to find the power spectrum and compute the autocorrelation
    fft_v = np.fft.fft2(v)
    autocorr = np.fft.ifft2(fft_v * np.conj(fft_v))
    autocorr = np.real(autocorr) / np.max(np.real(autocorr))  # Normalize the autocorrelation

    # Preparing to extract the autocorrelation values along the diagonal
    r_values = min(v.shape) // 2
    results = np.zeros(r_values)
    for r in range(r_values):
        # Properly average over symmetric pairs around the center
        autocorrelation_value = (autocorr[r, r] + autocorr[-r, -r]) / 2
        results[r] = autocorrelation_value

    # Normalize the results to start from 1
    results /= results[0]

    # Exponential decay fitting to extract the correlation length
    def exponential_decay(x, A, B, C):
        return A * np.exp(-x / B) + C

    # Fit parameters and handling potential issues with initial parameter guesses
    try:
        params, _ = curve_fit(exponential_decay, np.arange(len(results)), results, p0=(1, 10, 0), maxfev=5000)
    except RuntimeError:
        # Handle cases where the curve fit does not converge
        params = [np.nan, np.nan, np.nan]  # Use NaN to indicate the fit failed

    A, B, C = params
    fitted_values = exponential_decay(np.arange(r_values), *params)

    # Calculate the correlation length
    intervector_distance_microns = ((data_frame["y [m]"].max() - data_frame["y [m]"].min()) / v.shape[0])
    if B > 0 and A != C:  # Ensure valid values for logarithmic calculation
        lambda_tau = -B * np.log((0.3 - C) / A) * intervector_distance_microns
    else:
        lambda_tau = np.nan  # Return NaN if parameters are not suitable for calculation

    return lambda_tau, results, fitted_values, intervector_distance_microns


# Load PIV data from PIVlab into dataframes
def load_piv_data(data_path, condition, subcondition, min_frame=0, max_frame=None, skip_frames=1):
    input_piv_data = os.path.join(data_path, condition, subcondition, "piv_data", "PIVlab_****.txt")
    
    # Using a for loop instead of list comprehension
    dfs = []
    for file in tqdm(sorted(glob.glob(input_piv_data))[min_frame:max_frame:skip_frames], desc=f"Loading PIV data for {condition} {subcondition}", leave=False):
        df = pd.read_csv(file, skiprows=2).fillna(0).rename(columns={
            "magnitude [m/s]": "velocity magnitude [m/s]",
            "simple shear [1/s]": "shear [1/s]",
            "simple strain [1/s]": "strain [1/s]",
            "Vector type [-]": "data type [-]"
        })
        dfs.append(df)

    return dfs

# Generate dataframes from PIV data with time intervals applied
def generate_dataframes_from_piv_data(data_path, condition, subcondition, min_frame=0, max_frame=None, skip_frames=1, plot_autocorrelation=True, time_interval=1):
    output_directory_dfs = os.path.join(data_path, condition, subcondition, "dataframes_PIV")
    os.makedirs(output_directory_dfs, exist_ok=True)

    # Load PIV data
    data_frames = load_piv_data(data_path, condition, subcondition, min_frame, max_frame, skip_frames)

    # Calculating mean values with valid vectors only
    mean_values = []
    for frame_id, data_frame in enumerate(tqdm(data_frames, desc=f"Generating dataframes for {condition} {subcondition}", leave=False)):
        lambda_tau, results, fitted_values, intervector_distance_microns = correlation_length(data_frame)
        if plot_autocorrelation:
            plot_autocorrelation_values(data_path, condition, subcondition, frame_id, lambda_tau * 1E6, results, fitted_values, intervector_distance_microns)
        data_frame["correlation length [m]"] = lambda_tau
        data_frame = data_frame[data_frame["data type [-]"] == 1]
        mean_values.append(data_frame.mean(axis=0))

    # Creating mean DataFrame
    mean_data_frame = pd.DataFrame(mean_values)
    mean_data_frame.reset_index(drop=False, inplace=True)
    mean_data_frame.rename(columns={'index': 'frame'}, inplace=True)

    # Subtract the minimum row value for each column from the entire column for velocity magnitude
    mean_data_frame["velocity magnitude [m/s]"] = mean_data_frame["velocity magnitude [m/s]"] - mean_data_frame["velocity magnitude [m/s]"].min()
    
    # add a column with total distance travelled
    mean_data_frame["distance [m]"] = mean_data_frame["velocity magnitude [m/s]"].cumsum() * time_interval
    mean_data_frame["distance [m]"] = mean_data_frame["distance [m]"] - mean_data_frame["distance [m]"].min()

    # Calculate power and add to DataFrame
    volume = 2E-9  # µl --> m^3
    viscosity = 1E-3  # mPa*S
    mean_data_frame["power [W]"] = volume * viscosity * (mean_data_frame["velocity magnitude [m/s]"]/mean_data_frame["correlation length [m]"])**2

    # Scale time appropriately using the provided time_interval
    mean_data_frame["time (s)"] = mean_data_frame["frame"] * time_interval
    mean_data_frame["time (min)"] = mean_data_frame["time (s)"] / 60
    mean_data_frame["time (h)"] = mean_data_frame["time (min)"] / 60

    # Creating pivot matrices for each feature
    features = data_frames[0].columns[:-1]
    pivot_matrices = {feature: [] for feature in features}

    for data_frame in data_frames:
        temporary_dictionary = {feature: data_frame.pivot(index='y [m]', columns='x [m]', values=feature).values for feature in features}
        for feature in features:
            pivot_matrices[feature].append(temporary_dictionary[feature])

    pivot_data_frame = pd.DataFrame(pivot_matrices)

    # Adjusting column names in mean_data_frame
    mean_data_frame.columns = [f"{column}_mean" if column not in ["frame", "time (s)", "time (min)", "time (h)"] else column for column in mean_data_frame.columns]
    
    # Adding time column to pivot_data_frame
    pivot_data_frame["frame"] = mean_data_frame["frame"].values
    
    # subtract the minimum row value for each column from the entire column in 
    
    # Save DataFrames to CSV
    mean_df_output_path = os.path.join(output_directory_dfs, "mean_values.csv")
    mean_data_frame.to_csv(mean_df_output_path, index=False)

    pivot_df_output_path = os.path.join(output_directory_dfs, "features_matrices.csv")
    pivot_data_frame.to_csv(pivot_df_output_path, index=False)

    return mean_data_frame, pivot_data_frame



# Plot the PIVlab output as heatmaps
def generate_heatmaps_from_dataframes(df, data_path, condition, subcondition, feature_limits, time_interval=3):
    for feature, limits in feature_limits.items():
        vmin, vmax = limits

        for j in tqdm(range(len(df)), desc=f"Generating heatmaps for {condition} {subcondition} {feature}", leave=False):
            vals = df.iloc[j, df.columns.get_loc(feature)]

            output_directory_heatmaps = os.path.join(data_path, condition, subcondition, "heatmaps_PIV", f"{feature.split()[0]}", f"{feature.split()[0]}_heatmap_{j}.jpg")
            image_files_pattern = f"{data_path}/{condition}/{subcondition}/piv_movie_converted/converted_image_****.tif"
            image_files = sorted(glob.glob(image_files_pattern))[j]
            image = Image.open(image_files)

            plt.figure(figsize=(10, 6))
            plt.imshow(image, cmap=None, extent=[-2762/2, 2762/2, -2762/2, 2762/2]) # piv image
            im = plt.imshow(vals, cmap='inferno', origin='upper', alpha=0.7, extent=[-2762/2, 2762/2, -2762/2, 2762/2], vmin=vmin, vmax=vmax) # heatmap
            plt.xlabel('x [um]')
            plt.ylabel('y [um]')
            cbar = plt.colorbar(im)
            cbar.set_label(feature)
            time = df.iloc[j, -1]
            plt.title(f"PIV - {feature}  ||  time: {int(time * time_interval/60)} min -- {int(time * time_interval/3600)} hours")

            os.makedirs(os.path.dirname(output_directory_heatmaps), exist_ok=True)
            plt.savefig(output_directory_heatmaps, format='jpg', dpi=200)
            plt.close()


def create_movies_PIV(data_path, condition, subcondition, frame_rate, feature_limits=None, max_frame=None):
    plots_dir = f"{data_path}/{condition}/{subcondition}/heatmaps_PIV/"
    for feature in feature_limits.keys():
        feature_name_for_file = feature.split()[0]
        heatmap_dir = os.path.join(data_path, condition, subcondition, "heatmaps_PIV", f"{feature.split()[0]}", f"{feature.split()[0]}_heatmap_****.jpg")
        image_files = natsorted(glob.glob(heatmap_dir))

        if not image_files:
            print(f"No images found for feature {feature_name_for_file}.")
            continue

        # Limit the number of files if max_frame is specified
        image_files = image_files[:max_frame] if max_frame is not None else image_files

        # Get the resolution of the first image (assuming all images are the same size)
        first_image = cv2.imread(image_files[0])
        video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

        # Define the codec and create VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out_path = f'{plots_dir}{feature_name_for_file}.avi'
        out = cv2.VideoWriter(out_path, fourcc, frame_rate, video_resolution)

        for file in tqdm(image_files, desc=f"Creating movie for {condition} {subcondition} {feature}", leave=False):
            img = cv2.imread(file)
            out.write(img)  # Write the image as is, without resizing

        out.release()
        print(f"Video saved to {out_path}")


# Process PIV data for all conditions and subconditions, then average and save results
def process_piv_data(data_path, conditions, subconditions, feature_limits, time_intervals, skip_frames, min_frame=0, max_frame=None, plot_autocorrelation=True, frame_rate=120, heatmaps=True):
    for i, condition in tqdm(enumerate(conditions), desc="Processing PIV data", total=len(conditions), leave=True):
        time_interval = time_intervals[i] * skip_frames
        results = []
        for subcondition in tqdm(subconditions, desc=f"Processing subconditions for {condition}", leave=False):
            m, p = generate_dataframes_from_piv_data(data_path, condition, subcondition, min_frame, max_frame, skip_frames, plot_autocorrelation, time_interval)
            results.append(m)

            if heatmaps == True:
                convert_images(data_path, conditions, subconditions, max_frame=None, brightness_factor=1, contrast_factor=1, skip_frames=skip_frames)
                generate_heatmaps_from_dataframes(p, data_path, condition, subcondition, feature_limits, time_interval)
                create_movies_PIV(data_path, condition, subcondition, frame_rate, feature_limits=feature_limits, max_frame=max_frame)

        # Averaging and saving the results for the current condition
        save_path = os.path.join(data_path, condition, 'averaged')
        average_df = sum(results) / len(results)
        
        os.makedirs(save_path, exist_ok=True)  # Ensure the directory exists
        average_df.to_csv(os.path.join(save_path, f"{condition}_average.csv"))
        
        
        
        
# Generate PCA from PIVlab output
def plot_pca(dfs, data_path, conditions, subconditions, features):
    # Perform PCA and Plot
    plt.figure(figsize=(10, 6))

    # Get colors from Seaborn's "colorblind" color palette
    sns.set_palette("colorblind", color_codes=True)
    colors = sns.color_palette("colorblind", n_colors=len(conditions))

    for group_index, (df, condition, subcondition) in enumerate(zip(dfs, conditions, subconditions)):
        pca = PCA(n_components=2)
        principalComponents = pca.fit_transform(df.loc[:, features])
        principalDf = pd.DataFrame(data=principalComponents, columns=['principal component 1', 'principal component 2'])

        # Scaling alpha to increase with respect to the frame index
        num_points = principalDf.shape[0]
        alphas = np.linspace(0.001, 1, num_points)  # Alpha values linearly spaced from 1 to 0.01
        
        # Plotting each line segment with increasing alpha
        for i in range(1, num_points):
            plt.plot(principalDf['principal component 1'][i-1:i+1], principalDf['principal component 2'][i-1:i+1], 
                     alpha=alphas[i], linestyle='-', linewidth=2, color=colors[group_index])

        # Plotting the points
        plt.scatter(principalDf['principal component 1'], principalDf['principal component 2'], 
                    alpha=0.5, label=f'{condition}_{subcondition}', s=10, color=colors[group_index])

    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.title('PCA of PIV Features (All Samples)')
    plt.legend()
    plt.grid(True)

    output_dir_pca = os.path.join(data_path, conditions[-1], subconditions[-1], "plots_PIV", "PCA.jpg")
    os.makedirs(os.path.dirname(output_dir_pca), exist_ok=True)
    plt.savefig(output_dir_pca, format='jpg', dpi=200)
    plt.close()



def plot_PIV_features(data_path, conditions, subconditions, features_pca, sigma=10, min_frame=0, max_frame=None):
    for condition in tqdm(conditions, desc="Plotting PIV features", leave=True):
        dfs = []
        
        for subcondition in subconditions:
            # Construct the file path
            file_path = os.path.join(data_path, condition, subcondition, "dataframes_PIV", "mean_values.csv")
            df = pd.read_csv(file_path)

            # Apply Gaussian filter
            df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))

            # Rename columns
            df = df.rename(columns={
                "data type [-]_mean": "work [J]",
                "correlation length [m]_mean": "correlation length [um]",
                "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"
            })

            # Calculate cumulative work
            df["work [J]"] = df["power [W]_mean"].cumsum()

            # Slice the dataframe if min_frame and max_frame are provided
            df = df.iloc[min_frame:max_frame, :]

            dfs.append(df)

        # Plot PCA
        plot_pca(dfs, data_path, [condition] * len(subconditions), subconditions, features_pca)

        # Plot individual features
        for feature in dfs[0].columns[5:-3]:
            plt.figure(figsize=(12, 8))
            for df, subcondition in zip(dfs, subconditions):
                output_directory_plots = os.path.join(data_path, condition, subcondition, "plots_PIV")
                
                # Ensure the directory exists
                os.makedirs(output_directory_plots, exist_ok=True)
                
                plt.plot(df["time (h)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1, label=f'{condition}_{subcondition}')
                plt.xlabel('Time (hours)')
                plt.ylabel(feature)
                plt.title(f"PIV - {feature}")
                plt.grid(True, which='both', linestyle='--', linewidth=0.5)
                plt.legend()
                plt.savefig(os.path.join(output_directory_plots, f"{feature.split()[0]}_h.jpg"), format='jpg', dpi=200)
                plt.close()

                plt.plot(df["time (min)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1, label=f'{condition}_{subcondition}')
                plt.xlabel('Time (minutes)')
                plt.ylabel(feature)
                plt.title(f"PIV - {feature}")
                plt.grid(True, which='both', linestyle='--', linewidth=0.5)
                plt.legend()
                plt.savefig(os.path.join(output_directory_plots, f"{feature.split()[0]}_min.jpg"), format='jpg', dpi=200)
                plt.close()



def plot_PIV_features_averaged(data_path, conditions, features_pca, sigma=2, min_frame=0, max_frame=None):
    """
    Generate PIV plots averaged over subconditions and save them in the 'PIV_plots_averaged' folder.
    
    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        features_pca (list): List of features to include in PCA and plotting.
        sigma (float): The standard deviation for Gaussian kernel applied for smoothing.
        min_frame (int): The minimum frame to include in the analysis.
        max_frame (int): The maximum frame to include in the analysis.
    """
    for condition in tqdm(conditions, desc="Plotting averaged PIV features", leave=True):
        # Path to the averaged data
        averaged_data_path = os.path.join(data_path, condition, "averaged")
        averaged_df_file = os.path.join(averaged_data_path, f"{condition}_average.csv")
        
        if not os.path.exists(averaged_df_file):
            print(f"Error: Averaged dataframe {averaged_df_file} does not exist.")
            continue
        
        # Load the averaged dataframe
        df = pd.read_csv(averaged_df_file)
        
        # Apply Gaussian smoothing
        df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))
        
        # Rename columns as necessary for consistency in plotting
        df = df.rename(columns={"data type [-]_mean": "work [J]", 
                                "correlation length [m]_mean": "correlation length [um]", 
                                "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"})
        
        df["work [J]"] = df["power [W]_mean"].cumsum()
        
        # Limit the frames if specified
        df = df.iloc[min_frame:max_frame, :]
        
        # Prepare output directory
        output_directory_plots = os.path.join(averaged_data_path, "PIV_plots_averaged")
        os.makedirs(output_directory_plots, exist_ok=True)
        
        # PCA and feature plotting
        for feature in features_pca:
            plt.figure(figsize=(12, 8))
            plt.plot(df["time (h)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1)
            plt.xlabel('Time (hours)')
            plt.ylabel(feature)
            plt.title(f"Averaged PIV - {feature} over Subconditions")
            plt.grid(True, which='both', linestyle='--', linewidth=0.5)
            plt.savefig(os.path.join(output_directory_plots, f"{feature.split()[0]}_h.jpg"), format='jpg', dpi=200)
            plt.close()
            
            plt.plot(df["time (min)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1)
            plt.xlabel('Time (minutes)')
            plt.ylabel(feature)
            plt.title(f"Averaged PIV - {feature} over Subconditions")
            plt.grid(True, which='both', linestyle='--', linewidth=0.5)
            plt.savefig(os.path.join(output_directory_plots, f"{feature.split()[0]}_min.jpg"), format='jpg', dpi=200)
            plt.close()




def plot_PIV_features_combined(data_path, conditions, features_pca, sigma=2, min_frame=0, max_frame=None):
    """
    Generate combined PIV plots across all conditions and save them in the 'combined_PIV_plots' folder.

    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        features_pca (list): List of features to include in PCA and plotting.
        sigma (float): The standard deviation for Gaussian kernel applied for smoothing.
        min_frame (int): The minimum frame to include in the analysis.
        max_frame (int): The maximum frame to include in the analysis.
    """
    # Prepare output directory for combined plots
    combined_output_dir = os.path.join(data_path, "PIV_plots", "averaged_conditions")
    os.makedirs(combined_output_dir, exist_ok=True)
    
    # Dictionary to store data for combined plots
    combined_data = {feature: {} for feature in features_pca}
    
    for condition in tqdm(conditions, desc="Collecting PIV data", leave=True):
        # Path to the averaged data
        averaged_data_path = os.path.join(data_path, condition, "averaged")
        averaged_df_file = os.path.join(averaged_data_path, f"{condition}_average.csv")
        
        if not os.path.exists(averaged_df_file):
            print(f"Error: Averaged dataframe {averaged_df_file} does not exist.")
            continue
        
        # Load the averaged dataframe
        df = pd.read_csv(averaged_df_file)
        
        # Apply Gaussian smoothing
        df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))
        
        # Rename columns as necessary for consistency in plotting
        df = df.rename(columns={"data type [-]_mean": "work [J]", 
                                "correlation length [m]_mean": "correlation length [um]", 
                                "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"})
        
        df["work [J]"] = df["power [W]_mean"].cumsum()
        
        # Limit the frames if specified
        df = df.iloc[min_frame:max_frame, :]
        
        # Store data for combined plots
        for feature in features_pca:
            combined_data[feature][condition] = (df["time (h)"], df[feature])
    
    # Generate combined plots
    for feature in features_pca:
        plt.figure(figsize=(12, 8))
        
        for condition, (time, values) in combined_data[feature].items():
            plt.plot(time, values, marker='o', linestyle='-', markersize=1, linewidth=1, label=condition)
        
        plt.xlabel('Time (hours)')
        plt.ylabel(feature)
        plt.title(f"Combined PIV - {feature} across Conditions")
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend()
        plt.savefig(os.path.join(combined_output_dir, f"combined_{feature.split()[0]}_h.jpg"), format='jpg', dpi=200)
        plt.close()

        plt.figure(figsize=(12, 8))
        
        for condition, (time, values) in combined_data[feature].items():
            time_in_min = time * 60  # Convert hours to minutes for the second plot
            plt.plot(time_in_min, values, marker='o', linestyle='-', markersize=1, linewidth=1, label=condition)
        
        plt.xlabel('Time (minutes)')
        plt.ylabel(feature)
        plt.title(f"Combined PIV - {feature} across Conditions")
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend()
        plt.savefig(os.path.join(combined_output_dir, f"combined_{feature.split()[0]}_min.jpg"), format='jpg', dpi=200)
        plt.close()


def plot_PIV_features_all_conditions_subconditions(data_path, conditions, subconditions, features_pca, sigma=2, min_frame=0, max_frame=None):
    """
    Generate PIV plots that display all conditions and subconditions together on the same plot for each feature.

    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions for each condition.
        features_pca (list): List of features to include in plotting.
        sigma (float): The standard deviation for Gaussian kernel applied for smoothing.
        min_frame (int): The minimum frame to include in the analysis.
        max_frame (int): The maximum frame to include in the analysis.
    """
    # Prepare output directory for combined plots
    combined_output_dir = os.path.join(data_path, "PIV_plots", "all_conditions_subconditions")
    os.makedirs(combined_output_dir, exist_ok=True)
    
    for feature in features_pca:
        plt.figure(figsize=(12, 8))
        
        for condition in conditions:
            for subcondition in subconditions:
                # Path to the subcondition data
                subcondition_data_path = os.path.join(data_path, condition, subcondition, "dataframes_PIV", "mean_values.csv")
                
                if not os.path.exists(subcondition_data_path):
                    print(f"Error: Data file {subcondition_data_path} does not exist.")
                    continue
                
                # Load the subcondition dataframe
                df = pd.read_csv(subcondition_data_path)
                
                # Apply Gaussian smoothing
                df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))
                
                # Rename columns for consistency in plotting
                df = df.rename(columns={"data type [-]_mean": "work [J]", 
                                        "correlation length [m]_mean": "correlation length [um]", 
                                        "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"})
                
                df["work [J]"] = df["power [W]_mean"].cumsum()
                
                # Limit the frames if specified
                df = df.iloc[min_frame:max_frame, :]
                
                # Plot each subcondition on the same figure
                plt.plot(df["time (h)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1, label=f'{condition} - {subcondition}')
        
        plt.xlabel('Time (hours)')
        plt.ylabel(feature)
        plt.title(f"All Conditions and Subconditions Combined - {feature}")
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend(loc='best', fontsize='small', ncol=2)  # Adjust legend to fit all entries
        plt.savefig(os.path.join(combined_output_dir, f"combined_{feature.split()[0]}_h.jpg"), format='jpg', dpi=200)
        plt.close()

        plt.figure(figsize=(12, 8))
        
        for condition in conditions:
            for subcondition in subconditions:
                # Load the subcondition dataframe again
                df = pd.read_csv(os.path.join(data_path, condition, subcondition, "dataframes_PIV", "mean_values.csv"))
                df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))
                df = df.rename(columns={"data type [-]_mean": "work [J]", 
                                        "correlation length [m]_mean": "correlation length [um]", 
                                        "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"})
                df["work [J]"] = df["power [W]_mean"].cumsum()
                df = df.iloc[min_frame:max_frame, :]
                
                time_in_min = df["time (h)"] * 60  # Convert hours to minutes for the second plot
                plt.plot(time_in_min, df[feature], marker='o', linestyle='-', markersize=1, linewidth=1, label=f'{condition} - {subcondition}')
        
        plt.xlabel('Time (minutes)')
        plt.ylabel(feature)
        plt.title(f"All Conditions and Subconditions Combined - {feature}")
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend(loc='best', fontsize='small', ncol=2)  # Adjust legend to fit all entries
        plt.savefig(os.path.join(combined_output_dir, f"combined_{feature.split()[0]}_min.jpg"), format='jpg', dpi=200)
        plt.close()


In [None]:
reorgTiffs_Split_dapi(data_path, ['G'], subconditions, file_interval=8)

In [9]:
conditions = ['C', 'D', 'H']


In [21]:
# Define feature limits and other parameters
v = 2E-7
velocity_limits = (0, v)
other_limits = (-0.0005, 0.0005)
time_interval_list = [45, 45, 45]  # time intervals in seconds between frames for each condition
skip_frames = 1 ### CHANGE THIS TO SKIP FRAMES


velocity_limits = (None, None)
other_limits = (None, None)


feature_limits = {
    # 'u [m/s]': (-v, v), 
    # 'v [m/s]': (-v, v), 
    # 'data type [-]': (None, None),
    'velocity magnitude [m/s]': velocity_limits,
    'vorticity [1/s]': other_limits,
    'divergence [1/s]': other_limits,
    # 'dcev [1]': (0, 250),
    'shear [1/s]': other_limits,
    'strain [1/s]': other_limits,
    'vector direction [degrees]': (-180, 180),
}


# Features for PCA and plotting
features_pca = [
    "vorticity [1/s]_mean",
    "velocity magnitude [um/s]",
    "divergence [1/s]_mean",
    "shear [1/s]_mean",
    "strain [1/s]_mean",
    "correlation length [um]", 
    "power [W]_mean",
    "work [J]",
]


In [22]:
# Process PIV data
process_piv_data(
    data_path, 
    conditions, 
    subconditions, 
    feature_limits, 
    time_interval_list, 
    min_frame=0, 
    max_frame=None, 
    skip_frames=skip_frames, 
    plot_autocorrelation=False, 
    frame_rate=1, 
    heatmaps=False
    )


Processing PIV data:   0%|          | 0/3 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
Processing PIV data:  33%|███▎      | 1/3 [00:35<01:10, 35.32s/it]
[

In [23]:
# Plot features and PCA
plot_PIV_features(
    data_path, 
    conditions, 
    subconditions, 
    features_pca, 
    min_frame=0, 
    max_frame=None
    )


Plotting PIV features: 100%|██████████| 3/3 [00:10<00:00,  3.53s/it]


In [24]:
# Plot features and PCA
plot_PIV_features_averaged(
    data_path, 
    conditions, 
    features_pca, 
    min_frame=0, 
    max_frame=None
    )

Plotting averaged PIV features: 100%|██████████| 3/3 [00:05<00:00,  1.88s/it]


In [25]:
# Plot features and PCA
plot_PIV_features_combined(
    data_path, 
    conditions, 
    features_pca, 
    min_frame=0, 
    max_frame=None
    )

Collecting PIV data:   0%|          | 0/3 [00:00<?, ?it/s]

Collecting PIV data: 100%|██████████| 3/3 [00:00<00:00, 56.45it/s]


In [26]:
# Plot features and PCA
plot_PIV_features_all_conditions_subconditions(
    data_path, 
    conditions, 
    subconditions, 
    features_pca, 
    min_frame=0, 
    max_frame=None
    )
