In [1]:
# file management
import glob
import os
import shutil

# data processing
import numpy as np
import pandas as pd

# plotting
import matplotlib.pyplot as plt
from skimage import io
from scipy.ndimage import gaussian_filter1d

# utilities
import multiprocessing as mp
mp.set_start_method('fork', force=True)
from tqdm import tqdm
from natsort import natsorted
import cv2

# Set up logging
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


def reorgTiffsToOriginal(data_path, conditions, subconditions):
    """
    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions.
        
    This function renames the subconditions as PosX and moves the raw data to the "original" folder.
    """
    for condition in conditions:
        # Get the actual subconditions in the directory
        actual_subconditions = [name for name in os.listdir(os.path.join(data_path, condition)) if os.path.isdir(os.path.join(data_path, condition, name))]
        
        # Rename the actual subconditions to match the subconditions in your list
        for i, actual_subcondition in enumerate(sorted(actual_subconditions)):
            os.rename(os.path.join(data_path, condition, actual_subcondition), os.path.join(data_path, condition, subconditions[i]))
        
        for subcondition in subconditions:
            # Construct the path to the subcondition directory
            subcondition_path = os.path.join(data_path, condition, subcondition)
            
            # Create the path for the "original" directory within the subcondition directory
            original_dir_path = os.path.join(subcondition_path, "original")
            
            # Always create the "original" directory
            os.makedirs(original_dir_path, exist_ok=True)
            
            # Iterate over all files in the subcondition directory
            for filename in os.listdir(subcondition_path):
                # Check if the file is a .tif file
                if filename.endswith(".tif"):
                    # Construct the full path to the file
                    file_path = os.path.join(subcondition_path, filename)
                    
                    # Construct the path to move the file to
                    destination_path = os.path.join(original_dir_path, filename)
                    
                    # Move the file to the "original" directory
                    shutil.move(file_path, destination_path)
            print(f"Moved .tif files from {subcondition_path} to {original_dir_path}")


def ensure_output_dir(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)


def calculate_mean_intensity(path):
    """Calculate mean intensity of an image."""
    return io.imread(path).mean()


def calculate_protein_concentration(mean_intensity, intercept, slope):
    """Calculate protein concentration in ng/ul and nM."""
    conc_ng_ul = (mean_intensity - intercept) / slope
    return conc_ng_ul


def calculate_protein_concentration_nM(conc_ng_ul, mw_kda):
    """Convert protein concentration from ng/ul to nM."""
    conc_nM = (conc_ng_ul * 1e-3) / (mw_kda * 1e3) * 1e9
    return conc_nM


def calculate_number_of_protein_molecules(protein_mass, mw_kda):
    """Calculate number of protein molecules."""
    return (protein_mass * 6e14) / (mw_kda * 1e3)


def convert_time_units(time_values_s):
    """Convert time values from seconds to minutes and hours."""
    time_values_min = time_values_s / 60
    time_values_h = time_values_s / 3600
    return time_values_s, time_values_min, time_values_h


def process_image(args):
    image_file, output_directory_path, channel, slope, intercept, vmax, time_interval, i, show_scalebar, min_frame, skip_frames, condition, subcondition = args
    # Read the image into a numpy array
    intensity_matrix = io.imread(image_file)

    if channel == "Cy5":
        matrix_to_plot = intensity_matrix
        # Use raw intensity for cy5 channel
        label = 'Fluorescence Intensity'
    else:
        # Convert intensity values to protein concentration using the calibration curve
        matrix_to_plot = calculate_protein_concentration(intensity_matrix, slope, intercept)
        matrix_to_plot = matrix_to_plot / 27000 * 1E6
        label = 'Protein concentration (nM)'

    # Plot the heatmap
    fig, ax = plt.subplots(figsize=(12, 12))
    im = ax.imshow(matrix_to_plot, cmap='gray', interpolation='nearest', vmin=0, vmax=vmax)

    if show_scalebar:
        plt.colorbar(im, ax=ax, label=label)
    plt.title(f"Time (min): {(i - min_frame) * time_interval * skip_frames / 60:.2f} \nTime (h): {(i - min_frame) * time_interval * skip_frames / 3600:.2f} \n{condition} - {subcondition} - {channel}", fontsize=20)
    plt.xlabel('x [µm]')
    plt.ylabel('y [µm]')
    plt.grid(True, color='#d3d3d3', linewidth=0.5, alpha=0.5)

    # Save the heatmap
    heatmap_filename = f"heatmap_frame_{i}.png"
    heatmap_path = os.path.join(output_directory_path, heatmap_filename)
    plt.savefig(heatmap_path, bbox_inches='tight', pad_inches=0.1, dpi=300)
    plt.close(fig)


def fluorescence_heatmap(data_path, conditions, subconditions, channel, time_interval_list, min_frame, max_frame, vmax, skip_frames=1, calibration_curve_paths=None, show_scalebar=True):
    """
    Reads each image as a matrix, creates, and saves a heatmap representing the normalized pixel-wise fluorescence intensity.

    Args:
    - data_path (str): Base directory where the images are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): Channel specifying the fluorescence ('cy5' or 'gfp').
    - time_interval_list (list): List of time intervals in seconds between frames for each condition.
    - min_frame (int): Minimum frame number to start processing from.
    - max_frame (int): Maximum frame number to stop processing at.
    - vmax (float): Maximum value for color scale in the heatmap.
    - skip_frames (int): Interval to skip frames (default is 1, meaning process every frame).
    - calibration_curve_paths (list): List of file paths for the calibration curve images.
    - show_scalebar (bool): Whether to show the color scale bar in the heatmap.
    """
    output_data_dir = os.path.join(data_path, "output_data", "movies")
    ensure_output_dir(output_data_dir)

    for idx, condition in enumerate(conditions):
        time_interval = time_interval_list[idx]

        for subcondition in subconditions:
            # Determine the directory paths based on the channel
            input_directory_path = os.path.join(data_path, condition, subcondition, "original")
            output_directory_path = os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_{channel}")

            # Create the output directory if it doesn't exist, or clear it if it does
            if os.path.exists(output_directory_path):
                shutil.rmtree(output_directory_path)
            os.makedirs(output_directory_path, exist_ok=True)

            # Get all .tif files in the folder
            image_files = sorted(glob.glob(os.path.join(input_directory_path, f"*{channel}*.tif")))[min_frame:max_frame:skip_frames]

            # Setup calibration curve for non-cy5 channels
            slope, intercept = None, None
            if channel != "Cy5":
                # Calibration curve data and fit
                sample_concentration_values = [0, 2, 5, 10, 20, 40, 80, 160, 320]

                if calibration_curve_paths is None or len(calibration_curve_paths) != len(sample_concentration_values):
                    raise ValueError(f"Mismatch in lengths: {len(calibration_curve_paths)} calibration images, {len(sample_concentration_values)} sample concentrations")

                with mp.Pool(mp.cpu_count()) as pool:
                    mean_intensity_calibration = pool.map(calculate_mean_intensity, calibration_curve_paths)
                slope, intercept = np.polyfit(sample_concentration_values, mean_intensity_calibration, 1)

            # Prepare arguments for multiprocessing
            args = [(image_file, output_directory_path, channel, slope, intercept, vmax, time_interval, i, show_scalebar, min_frame, skip_frames, condition, subcondition) for i, image_file in enumerate(image_files, start=min_frame)]

            # Use multiprocessing to process images
            with mp.Pool(mp.cpu_count()) as pool:
                list(tqdm(pool.imap(process_image, args), total=len(args), desc=f"Processing {condition} - {subcondition}"))


def prepare_conditions(data_path, num_reps):
    # List conditions while ignoring 'output_data'
    conditions = natsorted([
        f for f in os.listdir(data_path) 
        if os.path.isdir(os.path.join(data_path, f)) and f != 'output_data'
    ])
    
    # Generate subconditions list based on num_reps
    subconditions = [f"Rep{x}" for x in range(1, num_reps + 1)]
    
    return conditions, subconditions


def process_video_creation(args):
    condition, subcondition, images_dir, out_path, frame_rate, max_frame = args

    image_files = natsorted(glob.glob(os.path.join(images_dir, "*.png")))

    if not image_files:
        print(f"No images found for subcondition {subcondition}.")
        return

    # Limit the number of files if max_frame is specified
    image_files = image_files[:max_frame] if max_frame is not None else image_files

    # Get the resolution of the first image (assuming all images are the same size)
    first_image = cv2.imread(image_files[0])
    video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(out_path, fourcc, frame_rate, video_resolution)

    for file in tqdm(image_files, desc=f"Creating video for {condition}", leave=False):
        img = cv2.imread(file)
        out.write(img)  # Write the image as a frame in the video

    out.release()
    print(f"Video saved to {out_path}")



def ensure_output_dir(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

def process_video_creation(args):
    condition, subcondition, images_dir, out_path, frame_rate, max_frame = args

    image_files = natsorted(glob.glob(os.path.join(images_dir, "*.png")))

    if not image_files:
        print(f"No images found for subcondition {subcondition}.")
        return

    # Limit the number of files if max_frame is specified
    image_files = image_files[:max_frame] if max_frame is not None else image_files

    # Get the resolution of the first image (assuming all images are the same size)
    first_image = cv2.imread(image_files[0])
    video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(out_path, fourcc, frame_rate, video_resolution)

    for file in tqdm(image_files, desc=f"Creating video for {condition} - {subcondition}", leave=False):
        img = cv2.imread(file)
        out.write(img)  # Write the image as a frame in the video

    out.release()
    print(f"Video saved to {out_path}")

def create_movies(data_path, conditions, subconditions, channel, frame_rate=30, max_frame=None, delete_temp_dirs=False):
    """
    Creates video files from heatmaps stored in the specified directory.

    Args:
    - data_path (str): Base path where the heatmaps are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): The specific channel being processed ('cy5' or 'gfp').
    - frame_rate (int): Frame rate for the output video. Defaults to 30.
    - max_frame (int, optional): Maximum number of frames to be included in the video. If None, all frames are included.
    - delete_temp_dirs (bool): If True, deletes temporary heatmap directories after movie creation.
    """
    output_data_dir = os.path.join(data_path, "output_data", "movies")
    ensure_output_dir(output_data_dir)

    args_list = []

    for condition in conditions:
        for subcondition in subconditions:
            images_dir = os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_{channel}")
            video_filename = f"{condition}_{subcondition}_{channel}.avi"
            out_path = os.path.join(output_data_dir, video_filename)

            # Prepare arguments for multiprocessing
            args_list.append((condition, subcondition, images_dir, out_path, frame_rate, max_frame))

    # Use multiprocessing to process video creation for all conditions and subconditions
    with mp.Pool(mp.cpu_count()) as pool:
        list(tqdm(pool.imap(process_video_creation, args_list), total=len(args_list), desc="Creating videos"))

    # Delete temporary directories if specified
    if delete_temp_dirs:
        for condition in conditions:
            for subcondition in subconditions:
                temp_dir = os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_{channel}")
                if os.path.exists(temp_dir):
                    shutil.rmtree(temp_dir)
                    print(f"Deleted temporary directory: {temp_dir}")


def quantify_tiffiles(data_path, conditions, subconditions, calibration_curve_paths, mw_kda_list, droplet_volume_list, time_interval_s_list):
    """Process images to calculate protein concentration and generate plots."""
    all_data = []

    # Sort the calibration curve paths
    calibration_curve_paths = sorted(calibration_curve_paths)

    # Calibration curve data and fit
    sample_concentration_values = [0, 2, 5, 10, 20, 40, 80, 160, 320]
    with mp.Pool(mp.cpu_count()) as pool:
        mean_intensity_calibration = pool.map(calculate_mean_intensity, calibration_curve_paths)
    slope, intercept = np.polyfit(sample_concentration_values, mean_intensity_calibration, 1)

    for idx, condition in enumerate(conditions):
        # Get condition-specific parameters
        mw_kda = mw_kda_list[idx]
        droplet_volume = droplet_volume_list[idx]
        time_interval_s = time_interval_s_list[idx]

        for subcondition in subconditions:
            # Construct paths based on condition and subcondition
            pattern = os.path.join(data_path, condition, subcondition, "original", "img_*********_4x_GFP_000.tif")
            paths = sorted(glob.glob(pattern))

            if not paths:
                print(f"No image files found for condition {condition}, subcondition {subcondition}.")
                continue

            # Calculate mean intensity for samples
            with mp.Pool(mp.cpu_count()) as pool:
                mean_intensity_list = list(tqdm(pool.imap(calculate_mean_intensity, paths), total=len(paths), desc=f"Calculating intensities for {condition} - {subcondition}"))

            # Calculate protein concentrations in ng/ul
            protein_concentration_list = [calculate_protein_concentration(intensity, intercept, slope) for intensity in mean_intensity_list]

            # Convert to nM
            protein_concentration_nM_list = [calculate_protein_concentration_nM(conc_ng_ul, mw_kda) for conc_ng_ul in protein_concentration_list]

            # Normalize intensities and concentrations
            min_intensity = min(mean_intensity_list)
            mean_intensity_list = np.array(mean_intensity_list) - min_intensity
            protein_concentration_list = np.array(protein_concentration_list) - min(protein_concentration_list)
            protein_concentration_nM_list = np.array(protein_concentration_nM_list) - min(protein_concentration_nM_list)

            # Time values
            time_values_s = np.arange(len(mean_intensity_list)) * time_interval_s
            time_values_s, time_values_min, time_values_h = convert_time_units(time_values_s)
            
            df = pd.DataFrame({
                "Condition": condition,
                "Subcondition": subcondition,
                "Time_s": time_values_s,
                "Time_min": time_values_min,
                "Time_h": time_values_h,
                "Mean Intensity": mean_intensity_list,
                "Protein Concentration_ng_ul": protein_concentration_list,
                "Protein Concentration_nM": protein_concentration_nM_list
            })

            # Calculate number of protein molecules
            protein_mass_list = df["Protein Concentration_ng_ul"] * droplet_volume
            df["Number of Protein Molecules"] = [calculate_number_of_protein_molecules(mass, mw_kda) for mass in protein_mass_list]

            # Calculate rate of change of protein molecules
            t_vals = np.linspace(0, (len(df) - 1) * time_interval_s, len(df))
            dp_dt = gaussian_filter1d(np.gradient(df["Number of Protein Molecules"], t_vals), sigma=2)
            df["Rate of Change of Number of Protein Molecules (PM/s)"] = dp_dt

            # Append the data for this condition and subcondition to the list
            all_data.append(df)

    # Combine all data into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True)

    # Calculate mean for each condition across subconditions
    mean_df = combined_df.groupby(["Condition", "Time_s", "Time_min", "Time_h"]).mean(numeric_only=True).reset_index()

    # Set the output directory within the data path
    output_dir = os.path.join(data_path, "output_data")
    ensure_output_dir(output_dir)

    # Save combined results to CSV
    combined_csv_path = os.path.join(output_dir, "combined_experiment.csv")
    combined_df.to_csv(combined_csv_path, index=False)

    # Save mean results to CSV
    mean_csv_path = os.path.join(output_dir, "mean_experiment.csv")
    mean_df.to_csv(mean_csv_path, index=False)

    # Plotting
    plot_results(combined_df, mean_df, output_dir, sample_concentration_values, mean_intensity_calibration, slope, intercept)

    return combined_csv_path, mean_csv_path


def plot_results(df, mean_df, output_dir, sample_concentration_values, mean_intensity_calibration, slope, intercept):
    """Generate plots based on the processed data."""
    # Create subdirectories for plots
    single_plot_dir = os.path.join(output_dir, "experiment_plots", "single_plots")
    combined_plot_dir = os.path.join(output_dir, "experiment_plots", "combined_plots")
    mean_plot_dir = os.path.join(output_dir, "experiment_plots", "mean_plots")
    ensure_output_dir(single_plot_dir)
    ensure_output_dir(combined_plot_dir)
    ensure_output_dir(mean_plot_dir)

    # Plot calibration curve
    plt.figure(figsize=(10, 6))
    plt.plot(sample_concentration_values, mean_intensity_calibration, 'o', label='Data points', linewidth=0.75, markersize=5)
    plt.plot(sample_concentration_values, slope * np.array(sample_concentration_values) + intercept, 'r-', label=f'Fit: y = {slope:.2f}x + {intercept:.2f}', linewidth=0.75)
    plt.title('Mean Intensity vs Protein Concentration')
    plt.xlabel('Protein Concentration (ug/ml)')
    plt.ylabel('Mean Intensity')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(single_plot_dir, 'mean_intensity_vs_protein_concentration.png'))
    plt.close()

    # Plot calibration curve (log scale)
    plt.figure(figsize=(10, 6))
    plt.plot(sample_concentration_values, mean_intensity_calibration, 'o', label='Data points', linewidth=0.75, markersize=5)
    plt.plot(sample_concentration_values, slope * np.array(sample_concentration_values) + intercept, 'r-', label=f'Fit: y = {slope:.2f}x + {intercept:.2f}', linewidth=0.75)
    plt.title('Mean Intensity vs Protein Concentration (Log Scale)')
    plt.xlabel('Protein Concentration (ug/ml)')
    plt.ylabel('Mean Intensity')
    plt.yscale('log')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(single_plot_dir, 'mean_intensity_vs_protein_concentration_log.png'))
    plt.close()

    # Time units and concentration units to plot
    time_units = [("Time_s", "Time_s"), ("Time_min", "Time_min"), ("Time_h", "Time_h")]
    protein_concentration_units = [
        ("Protein Concentration_ng_ul", "Protein Concentration_ng_ul"),
        ("Protein Concentration_nM", "Protein Concentration_nM"),
        ("Number of Protein Molecules", "Number of Protein Molecules")
    ]

    # Plot protein concentration over time for each time and concentration unit
    for time_unit, time_label in time_units:
        for conc_unit, conc_label in protein_concentration_units:
            # Individual plots for each condition and subcondition
            for condition in df["Condition"].unique():
                # Create a directory for each condition's single plots
                condition_single_plot_dir = os.path.join(single_plot_dir, f"{condition}_single_plots")
                ensure_output_dir(condition_single_plot_dir)

                for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
                    condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
                    plt.figure(figsize=(10, 6))
                    plt.plot(condition_data[time_unit], condition_data[conc_unit], 'o-', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
                    plt.title(f'{conc_label} vs {time_label} for {condition} {subcondition}')
                    plt.xlabel(time_label)
                    plt.ylabel(conc_label)
                    plt.legend()
                    plt.grid(True)
                    plt.savefig(os.path.join(condition_single_plot_dir, f'{condition}_{subcondition}_{conc_label}_vs_{time_label}.png'))
                    plt.close()

            # Combined plots for all conditions and subconditions
            plt.figure(figsize=(10, 6))
            for condition in df["Condition"].unique():
                for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
                    condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
                    plt.plot(condition_data[time_unit], condition_data[conc_unit], 'o-', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
            plt.title(f'Combined {conc_label} vs {time_label} for all conditions')
            plt.xlabel(time_label)
            plt.ylabel(conc_label)
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(combined_plot_dir, f'combined_{conc_label}_vs_{time_label}.png'))
            plt.close()

            # Combined plots for all conditions and subconditions (log scale)
            plt.figure(figsize=(10, 6))
            for condition in df["Condition"].unique():
                for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
                    condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
                    plt.plot(condition_data[time_unit], condition_data[conc_unit], 'o-', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
            plt.title(f'Combined {conc_label} vs {time_label} for all conditions (Log Scale)')
            plt.xlabel(time_label)
            plt.ylabel(conc_label)
            plt.yscale('log')
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(combined_plot_dir, f'combined_{conc_label}_vs_{time_label}_log.png'))
            plt.close()

            # Mean plots for each condition
            plt.figure(figsize=(10, 6))
            for condition in mean_df["Condition"].unique():
                condition_mean_data = mean_df[mean_df["Condition"] == condition]
                plt.plot(condition_mean_data[time_unit], condition_mean_data[conc_unit], 'o-', label=f'{condition} Mean', linewidth=0.75, markersize=5)
            plt.title(f'Mean {conc_label} vs {time_label} for each condition')
            plt.xlabel(time_label)
            plt.ylabel(conc_label)
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(mean_plot_dir, f'mean_{conc_label}_vs_{time_label}.png'))
            plt.close()

            # Mean plots for each condition (log scale)
            plt.figure(figsize=(10, 6))
            for condition in mean_df["Condition"].unique():
                condition_mean_data = mean_df[mean_df["Condition"] == condition]
                plt.plot(condition_mean_data[time_unit], condition_mean_data[conc_unit], 'o-', label=f'{condition} Mean', linewidth=0.75, markersize=5)
            plt.title(f'Mean {conc_label} vs {time_label} for each condition (Log Scale)')
            plt.xlabel(time_label)
            plt.ylabel(conc_label)
            plt.yscale('log')
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(mean_plot_dir, f'mean_{conc_label}_vs_{time_label}_log.png'))
            plt.close()

    # Plot rate of change of protein molecules
    plt.figure(figsize=(10, 6))
    for condition in df["Condition"].unique():
        for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
            condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
            plt.plot(condition_data["Time_h"], condition_data["Rate of Change of Number of Protein Molecules (PM/s)"], 'o', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
    plt.title('Rate of Change of Number of Protein Molecules vs Time_h')
    plt.xlabel('Time_h')
    plt.ylabel('Rate of Change (PM/s)')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(combined_plot_dir, 'rate_of_change_of_protein_molecules_vs_time_h.png'))
    plt.close()

    # Plot rate of change of protein molecules (log scale)
    plt.figure(figsize=(10, 6))
    for condition in df["Condition"].unique():
        for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
            condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
            plt.plot(condition_data["Time_h"], condition_data["Rate of Change of Number of Protein Molecules (PM/s)"], 'o', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
    plt.title('Rate of Change of Number of Protein Molecules vs Time_h (Log Scale)')
    plt.xlabel('Time_h')
    plt.ylabel('Rate of Change (PM/s)')
    plt.yscale('log')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(combined_plot_dir, 'rate_of_change_of_protein_molecules_vs_time_h_log.png'))
    plt.close()


def create_combined_heatmap_movie_custom_grid(data_path, conditions, subconditions, channel, grid_rows=None, grid_cols=None, frame_rate=30, delete_temp_dirs=False):
    """
    Combines heatmaps from different conditions and subconditions into a single video.
    Allows specifying the number of grid rows and columns or uses an adaptive layout based on subconditions.

    Args:
    - data_path (str): Base path where the heatmaps are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): The specific channel being processed ('cy5' or 'gfp').
    - grid_rows (int, optional): Number of rows in the grid. If None, calculated adaptively.
    - grid_cols (int, optional): Number of columns in the grid. If None, calculated adaptively.
    - frame_rate (int): Frame rate for the output video. Defaults to 30.
    - delete_temp_dirs (bool): If True, deletes temporary heatmap directories after movie creation.
    """
    # Determine grid dimensions if not provided
    total_plots = len(conditions) * len(subconditions)
    
    if grid_rows is None or grid_cols is None:
        if len(subconditions) == 1:
            # Use a rectangular grid with more columns than rows when only one subcondition
            grid_cols = int(np.ceil(np.sqrt(total_plots)))
            grid_rows = int(np.ceil(total_plots / grid_cols))
            
            # Adjust columns and rows to minimize blank spaces
            while grid_cols * grid_rows >= total_plots:
                if (grid_cols - 1) * grid_rows >= total_plots:
                    grid_cols -= 1
                elif grid_cols * (grid_rows - 1) >= total_plots:
                    grid_rows -= 1
                else:
                    break
        else:
            # Use a column-row layout with conditions in columns and subconditions in rows
            grid_rows = len(subconditions)
            grid_cols = len(conditions)
    
    # Define the output directory for temporary images
    temp_img_dir = os.path.join(data_path, "output_data", "temp_images")
    ensure_output_dir(temp_img_dir)

    # Determine the number of frames based on the first condition and subcondition
    sample_image_dir = os.path.join(data_path, "output_data", "movies", f"{conditions[0]}_{subconditions[0]}_heatmaps_{channel}")
    sample_image_files = natsorted(glob.glob(os.path.join(sample_image_dir, "*.png")))
    num_frames = len(sample_image_files)

    if num_frames == 0:
        print(f"No frames to process. Check if the directories exist and contain images.")
        return

    # Loop through each frame
    for frame_index in tqdm(range(num_frames), desc="Creating combined frames"):
        fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(grid_cols * 6, grid_rows * 6))
        plt.subplots_adjust(hspace=0.1, wspace=0.1)  # Adjust spacing

        # Ensure axes is always 2D
        if grid_rows == 1 and grid_cols == 1:
            axes = np.array([[axes]])
        elif grid_rows == 1 or grid_cols == 1:
            axes = np.array(axes).reshape(grid_rows, grid_cols)

        plot_index = 0

        # Loop through each condition and subcondition
        for col_idx, condition in enumerate(conditions):
            for row_idx, subcondition in enumerate(subconditions):
                # Determine the image path
                images_dir = os.path.join(data_path, "output_data", "movies", f"{condition}_{subcondition}_heatmaps_{channel}")
                image_files = natsorted(glob.glob(os.path.join(images_dir, "*.png")))

                if frame_index < len(image_files):
                    image_path = image_files[frame_index]
                    img = io.imread(image_path)

                    # Plot the image in the appropriate subplot
                    ax = axes[row_idx if len(subconditions) > 1 else plot_index // grid_cols,
                              col_idx if len(subconditions) > 1 else plot_index % grid_cols]
                    ax.imshow(img, cmap='gray', vmin=0, vmax=img.max())
                    ax.axis('off')  # Remove axes

                    plot_index += 1

        # Turn off any unused subplots
        for ax in axes.flatten()[plot_index:]:
            ax.axis('off')

        # Save the combined frame
        combined_image_path = os.path.join(temp_img_dir, f"combined_frame_{frame_index:04d}.png")
        plt.savefig(combined_image_path, bbox_inches='tight', pad_inches=0)
        plt.close(fig)

    # Compile the images into a video using OpenCV
    combined_image_files = natsorted(glob.glob(os.path.join(temp_img_dir, "combined_frame_*.png")))

    # Get the resolution of the first image
    first_image = cv2.imread(combined_image_files[0])
    height, width, layers = first_image.shape
    video_resolution = (width, height)

    # Define the codec and create a VideoWriter object
    output_data_dir = os.path.join(data_path, "output_data")
    output_filename = f"combined_heatmap_movie_{channel}.avi"
    output_file = os.path.join(output_data_dir, output_filename)
    ensure_output_dir(output_data_dir)

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(output_file, fourcc, frame_rate, video_resolution)

    for image_file in combined_image_files:
        img = cv2.imread(image_file)
        out.write(img)  # Write the image as a frame in the video

    out.release()
    print(f"Combined video saved to {output_file}")

    # Clean up temporary images
    shutil.rmtree(temp_img_dir)

    # Delete temporary directories if specified
    if delete_temp_dirs:
        for condition in conditions:
            for subcondition in subconditions:
                temp_dir = os.path.join(data_path, "output_data", "movies", f"{condition}_{subcondition}_heatmaps_{channel}")
                if os.path.exists(temp_dir):
                    shutil.rmtree(temp_dir)
                    print(f"Deleted temporary directory: {temp_dir}")


In [2]:
# file management
import glob
import os
import shutil

# data processing
import numpy as np
import pandas as pd

# plotting
import matplotlib.pyplot as plt
from skimage import io
from scipy.ndimage import gaussian_filter1d

# utilities
import multiprocessing as mp
mp.set_start_method('fork', force=True)
from tqdm import tqdm
from natsort import natsorted
import cv2

# Set up logging
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


def reorgTiffsToOriginal(data_path, conditions, subconditions):
    """
    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions.
        
    This function renames the subconditions as PosX and moves the raw data to the "original" folder.
    """
    for condition in conditions:
        # Get the actual subconditions in the directory
        actual_subconditions = [name for name in os.listdir(os.path.join(data_path, condition)) if os.path.isdir(os.path.join(data_path, condition, name))]
        
        # Rename the actual subconditions to match the subconditions in your list
        for i, actual_subcondition in enumerate(sorted(actual_subconditions)):
            os.rename(os.path.join(data_path, condition, actual_subcondition), os.path.join(data_path, condition, subconditions[i]))
        
        for subcondition in subconditions:
            # Construct the path to the subcondition directory
            subcondition_path = os.path.join(data_path, condition, subcondition)
            
            # Create the path for the "original" directory within the subcondition directory
            original_dir_path = os.path.join(subcondition_path, "original")
            
            # Always create the "original" directory
            os.makedirs(original_dir_path, exist_ok=True)
            
            # Iterate over all files in the subcondition directory
            for filename in os.listdir(subcondition_path):
                # Check if the file is a .tif file
                if filename.endswith(".tif"):
                    # Construct the full path to the file
                    file_path = os.path.join(subcondition_path, filename)
                    
                    # Construct the path to move the file to
                    destination_path = os.path.join(original_dir_path, filename)
                    
                    # Move the file to the "original" directory
                    shutil.move(file_path, destination_path)
            print(f"Moved .tif files from {subcondition_path} to {original_dir_path}")


def ensure_output_dir(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)


def calculate_mean_intensity(path):
    """Calculate mean intensity of an image."""
    return io.imread(path).mean()


def calculate_protein_concentration(mean_intensity, intercept, slope):
    """Calculate protein concentration in ng/ul and nM."""
    conc_ng_ul = (mean_intensity - intercept) / slope
    return conc_ng_ul


def calculate_protein_concentration_nM(conc_ng_ul, mw_kda):
    """Convert protein concentration from ng/ul to nM."""
    conc_nM = (conc_ng_ul * 1e-3) / (mw_kda * 1e3) * 1e9
    return conc_nM


def calculate_number_of_protein_molecules(protein_mass, mw_kda):
    """Calculate number of protein molecules."""
    return (protein_mass * 6e14) / (mw_kda * 1e3)


def convert_time_units(time_values_s):
    """Convert time values from seconds to minutes and hours."""
    time_values_min = time_values_s / 60
    time_values_h = time_values_s / 3600
    return time_values_s, time_values_min, time_values_h


def process_image(args):
    image_file, output_directory_path, channel, slope, intercept, vmax, time_interval, i, show_scalebar, min_frame, skip_frames, condition, subcondition = args
    # Read the image into a numpy array
    intensity_matrix = io.imread(image_file)

    if channel == "Cy5":
        matrix_to_plot = intensity_matrix
        # Use raw intensity for cy5 channel
        label = 'Fluorescence Intensity'
    else:
        # Convert intensity values to protein concentration using the calibration curve
        matrix_to_plot = calculate_protein_concentration(intensity_matrix, slope, intercept)
        matrix_to_plot = matrix_to_plot / 27000 * 1E6
        label = 'Protein concentration (nM)'

    # Plot the heatmap
    fig, ax = plt.subplots(figsize=(12, 12))
    im = ax.imshow(matrix_to_plot, cmap='gray', interpolation='nearest', vmin=0, vmax=vmax)

    if show_scalebar:
        plt.colorbar(im, ax=ax, label=label)
    plt.title(f"Time (min): {(i - min_frame) * time_interval * skip_frames / 60:.2f} \nTime (h): {(i - min_frame) * time_interval * skip_frames / 3600:.2f} \n{condition} - {subcondition} - {channel}", fontsize=20)
    plt.xlabel('x [µm]')
    plt.ylabel('y [µm]')
    plt.grid(True, color='#d3d3d3', linewidth=0.5, alpha=0.5)

    # Save the heatmap
    heatmap_filename = f"heatmap_frame_{i}.png"
    heatmap_path = os.path.join(output_directory_path, heatmap_filename)
    plt.savefig(heatmap_path, bbox_inches='tight', pad_inches=0.1, dpi=300)
    plt.close(fig)


def fluorescence_heatmap(data_path, conditions, subconditions, channel, time_interval_list, min_frame, max_frame, vmax, skip_frames=1, calibration_curve_paths=None, show_scalebar=True):
    """
    Reads each image as a matrix, creates, and saves a heatmap representing the normalized pixel-wise fluorescence intensity.

    Args:
    - data_path (str): Base directory where the images are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): Channel specifying the fluorescence ('cy5' or 'gfp').
    - time_interval_list (list): List of time intervals in seconds between frames for each condition.
    - min_frame (int): Minimum frame number to start processing from.
    - max_frame (int): Maximum frame number to stop processing at.
    - vmax (float): Maximum value for color scale in the heatmap.
    - skip_frames (int): Interval to skip frames (default is 1, meaning process every frame).
    - calibration_curve_paths (list): List of file paths for the calibration curve images.
    - show_scalebar (bool): Whether to show the color scale bar in the heatmap.
    """
    output_data_dir = os.path.join(data_path, "output_data", "movies")
    ensure_output_dir(output_data_dir)

    for idx, condition in enumerate(conditions):
        time_interval = time_interval_list[idx]

        for subcondition in subconditions:
            # Determine the directory paths based on the channel
            input_directory_path = os.path.join(data_path, condition, subcondition, "original")
            output_directory_path = os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_{channel}")

            # Create the output directory if it doesn't exist, or clear it if it does
            if os.path.exists(output_directory_path):
                shutil.rmtree(output_directory_path)
            os.makedirs(output_directory_path, exist_ok=True)

            # Get all .tif files in the folder
            image_files = sorted(glob.glob(os.path.join(input_directory_path, f"*{channel}*.tif")))[min_frame:max_frame:skip_frames]

            # Setup calibration curve for non-cy5 channels
            slope, intercept = None, None
            if channel != "Cy5":
                # Calibration curve data and fit
                sample_concentration_values = [0, 2, 5, 10, 20, 40, 80, 160, 320]

                if calibration_curve_paths is None or len(calibration_curve_paths) != len(sample_concentration_values):
                    raise ValueError(f"Mismatch in lengths: {len(calibration_curve_paths)} calibration images, {len(sample_concentration_values)} sample concentrations")

                with mp.Pool(mp.cpu_count()) as pool:
                    mean_intensity_calibration = pool.map(calculate_mean_intensity, calibration_curve_paths)
                slope, intercept = np.polyfit(sample_concentration_values, mean_intensity_calibration, 1)

            # Prepare arguments for multiprocessing
            args = [(image_file, output_directory_path, channel, slope, intercept, vmax, time_interval, i, show_scalebar, min_frame, skip_frames, condition, subcondition) for i, image_file in enumerate(image_files, start=min_frame)]

            # Use multiprocessing to process images
            with mp.Pool(mp.cpu_count()) as pool:
                list(tqdm(pool.imap(process_image, args), total=len(args), desc=f"Processing {condition} - {subcondition}"))


def prepare_conditions(data_path, num_reps):
    # List conditions while ignoring 'output_data'
    conditions = natsorted([
        f for f in os.listdir(data_path) 
        if os.path.isdir(os.path.join(data_path, f)) and f != 'output_data'
    ])
    
    # Generate subconditions list based on num_reps
    subconditions = [f"Rep{x}" for x in range(1, num_reps + 1)]
    
    return conditions, subconditions


def process_video_creation(args):
    condition, subcondition, images_dir, out_path, frame_rate, max_frame = args

    image_files = natsorted(glob.glob(os.path.join(images_dir, "*.png")))

    if not image_files:
        print(f"No images found for subcondition {subcondition}.")
        return

    # Limit the number of files if max_frame is specified
    image_files = image_files[:max_frame] if max_frame is not None else image_files

    # Get the resolution of the first image (assuming all images are the same size)
    first_image = cv2.imread(image_files[0])
    video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(out_path, fourcc, frame_rate, video_resolution)

    for file in tqdm(image_files, desc=f"Creating video for {condition}", leave=False):
        img = cv2.imread(file)
        out.write(img)  # Write the image as a frame in the video

    out.release()
    print(f"Video saved to {out_path}")



def ensure_output_dir(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

def process_video_creation(args):
    condition, subcondition, images_dir, out_path, frame_rate, max_frame = args

    image_files = natsorted(glob.glob(os.path.join(images_dir, "*.png")))

    if not image_files:
        print(f"No images found for subcondition {subcondition}.")
        return

    # Limit the number of files if max_frame is specified
    image_files = image_files[:max_frame] if max_frame is not None else image_files

    # Get the resolution of the first image (assuming all images are the same size)
    first_image = cv2.imread(image_files[0])
    video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(out_path, fourcc, frame_rate, video_resolution)

    for file in tqdm(image_files, desc=f"Creating video for {condition} - {subcondition}", leave=False):
        img = cv2.imread(file)
        out.write(img)  # Write the image as a frame in the video

    out.release()
    print(f"Video saved to {out_path}")

def create_movies(data_path, conditions, subconditions, channel, frame_rate=30, max_frame=None, delete_temp_dirs=False):
    """
    Creates video files from heatmaps stored in the specified directory.

    Args:
    - data_path (str): Base path where the heatmaps are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): The specific channel being processed ('cy5' or 'gfp').
    - frame_rate (int): Frame rate for the output video. Defaults to 30.
    - max_frame (int, optional): Maximum number of frames to be included in the video. If None, all frames are included.
    - delete_temp_dirs (bool): If True, deletes temporary heatmap directories after movie creation.
    """
    output_data_dir = os.path.join(data_path, "output_data", "movies")
    ensure_output_dir(output_data_dir)

    args_list = []

    for condition in conditions:
        for subcondition in subconditions:
            images_dir = os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_{channel}")
            video_filename = f"{condition}_{subcondition}_{channel}.avi"
            out_path = os.path.join(output_data_dir, video_filename)

            # Prepare arguments for multiprocessing
            args_list.append((condition, subcondition, images_dir, out_path, frame_rate, max_frame))

    # Use multiprocessing to process video creation for all conditions and subconditions
    with mp.Pool(mp.cpu_count()) as pool:
        list(tqdm(pool.imap(process_video_creation, args_list), total=len(args_list), desc="Creating videos"))

    # Delete temporary directories if specified
    if delete_temp_dirs:
        for condition in conditions:
            for subcondition in subconditions:
                temp_dir = os.path.join(output_data_dir, f"{condition}_{subcondition}_heatmaps_{channel}")
                if os.path.exists(temp_dir):
                    shutil.rmtree(temp_dir)
                    print(f"Deleted temporary directory: {temp_dir}")


def quantify_tiffiles(data_path, conditions, subconditions, calibration_curve_paths, mw_kda_list, droplet_volume_list, time_interval_s_list):
    """Process images to calculate protein concentration and generate plots."""
    all_data = []

    # Sort the calibration curve paths
    calibration_curve_paths = sorted(calibration_curve_paths)

    # Calibration curve data and fit
    sample_concentration_values = [0, 2, 5, 10, 20, 40, 80, 160, 320]
    with mp.Pool(mp.cpu_count()) as pool:
        mean_intensity_calibration = pool.map(calculate_mean_intensity, calibration_curve_paths)
    slope, intercept = np.polyfit(sample_concentration_values, mean_intensity_calibration, 1)

    for idx, condition in enumerate(conditions):
        # Get condition-specific parameters
        mw_kda = mw_kda_list[idx]
        droplet_volume = droplet_volume_list[idx]
        time_interval_s = time_interval_s_list[idx]

        for subcondition in subconditions:
            # Construct paths based on condition and subcondition
            pattern = os.path.join(data_path, condition, subcondition, "original", "img_*********_4x_GFP_000.tif")
            paths = sorted(glob.glob(pattern))

            if not paths:
                print(f"No image files found for condition {condition}, subcondition {subcondition}.")
                continue

            # Calculate mean intensity for samples
            with mp.Pool(mp.cpu_count()) as pool:
                mean_intensity_list = list(tqdm(pool.imap(calculate_mean_intensity, paths), total=len(paths), desc=f"Calculating intensities for {condition} - {subcondition}"))

            # Calculate protein concentrations in ng/ul
            protein_concentration_list = [calculate_protein_concentration(intensity, intercept, slope) for intensity in mean_intensity_list]

            # Convert to nM
            protein_concentration_nM_list = [calculate_protein_concentration_nM(conc_ng_ul, mw_kda) for conc_ng_ul in protein_concentration_list]

            # Normalize intensities and concentrations
            min_intensity = min(mean_intensity_list)
            mean_intensity_list = np.array(mean_intensity_list) - min_intensity
            protein_concentration_list = np.array(protein_concentration_list) - min(protein_concentration_list)
            protein_concentration_nM_list = np.array(protein_concentration_nM_list) - min(protein_concentration_nM_list)

            # Time values
            time_values_s = np.arange(len(mean_intensity_list)) * time_interval_s
            time_values_s, time_values_min, time_values_h = convert_time_units(time_values_s)
            
            df = pd.DataFrame({
                "Condition": condition,
                "Subcondition": subcondition,
                "Time_s": time_values_s,
                "Time_min": time_values_min,
                "Time_h": time_values_h,
                "Mean Intensity": mean_intensity_list,
                "Protein Concentration_ng_ul": protein_concentration_list,
                "Protein Concentration_nM": protein_concentration_nM_list
            })

            # Calculate number of protein molecules
            protein_mass_list = df["Protein Concentration_ng_ul"] * droplet_volume
            df["Number of Protein Molecules"] = [calculate_number_of_protein_molecules(mass, mw_kda) for mass in protein_mass_list]

            # Calculate rate of change of protein molecules
            t_vals = np.linspace(0, (len(df) - 1) * time_interval_s, len(df))
            dp_dt = gaussian_filter1d(np.gradient(df["Number of Protein Molecules"], t_vals), sigma=2)
            df["Rate of Change of Number of Protein Molecules (PM/s)"] = dp_dt

            # Append the data for this condition and subcondition to the list
            all_data.append(df)

    # Combine all data into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True)

    # Calculate mean for each condition across subconditions
    mean_df = combined_df.groupby(["Condition", "Time_s", "Time_min", "Time_h"]).mean(numeric_only=True).reset_index()

    # Set the output directory within the data path
    output_dir = os.path.join(data_path, "output_data")
    ensure_output_dir(output_dir)

    # Save combined results to CSV
    combined_csv_path = os.path.join(output_dir, "combined_experiment.csv")
    combined_df.to_csv(combined_csv_path, index=False)

    # Save mean results to CSV
    mean_csv_path = os.path.join(output_dir, "mean_experiment.csv")
    mean_df.to_csv(mean_csv_path, index=False)

    # Plotting
    plot_results(combined_df, mean_df, output_dir, sample_concentration_values, mean_intensity_calibration, slope, intercept)

    return combined_csv_path, mean_csv_path


def plot_results(df, mean_df, output_dir, sample_concentration_values, mean_intensity_calibration, slope, intercept):
    """Generate plots based on the processed data."""
    # Create subdirectories for plots
    single_plot_dir = os.path.join(output_dir, "experiment_plots", "single_plots")
    combined_plot_dir = os.path.join(output_dir, "experiment_plots", "combined_plots")
    mean_plot_dir = os.path.join(output_dir, "experiment_plots", "mean_plots")
    ensure_output_dir(single_plot_dir)
    ensure_output_dir(combined_plot_dir)
    ensure_output_dir(mean_plot_dir)

    # Plot calibration curve
    plt.figure(figsize=(10, 6))
    plt.plot(sample_concentration_values, mean_intensity_calibration, 'o', label='Data points', linewidth=0.75, markersize=5)
    plt.plot(sample_concentration_values, slope * np.array(sample_concentration_values) + intercept, 'r-', label=f'Fit: y = {slope:.2f}x + {intercept:.2f}', linewidth=0.75)
    plt.title('Mean Intensity vs Protein Concentration')
    plt.xlabel('Protein Concentration (ug/ml)')
    plt.ylabel('Mean Intensity')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(single_plot_dir, 'mean_intensity_vs_protein_concentration.png'))
    plt.close()

    # Plot calibration curve (log scale)
    plt.figure(figsize=(10, 6))
    plt.plot(sample_concentration_values, mean_intensity_calibration, 'o', label='Data points', linewidth=0.75, markersize=5)
    plt.plot(sample_concentration_values, slope * np.array(sample_concentration_values) + intercept, 'r-', label=f'Fit: y = {slope:.2f}x + {intercept:.2f}', linewidth=0.75)
    plt.title('Mean Intensity vs Protein Concentration (Log Scale)')
    plt.xlabel('Protein Concentration (ug/ml)')
    plt.ylabel('Mean Intensity')
    plt.yscale('log')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(single_plot_dir, 'mean_intensity_vs_protein_concentration_log.png'))
    plt.close()

    # Time units and concentration units to plot
    time_units = [("Time_s", "Time_s"), ("Time_min", "Time_min"), ("Time_h", "Time_h")]
    protein_concentration_units = [
        ("Protein Concentration_ng_ul", "Protein Concentration_ng_ul"),
        ("Protein Concentration_nM", "Protein Concentration_nM"),
        ("Number of Protein Molecules", "Number of Protein Molecules")
    ]

    # Plot protein concentration over time for each time and concentration unit
    for time_unit, time_label in time_units:
        for conc_unit, conc_label in protein_concentration_units:
            # Individual plots for each condition and subcondition
            for condition in df["Condition"].unique():
                # Create a directory for each condition's single plots
                condition_single_plot_dir = os.path.join(single_plot_dir, f"{condition}_single_plots")
                ensure_output_dir(condition_single_plot_dir)

                for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
                    condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
                    plt.figure(figsize=(10, 6))
                    plt.plot(condition_data[time_unit], condition_data[conc_unit], 'o-', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
                    plt.title(f'{conc_label} vs {time_label} for {condition} {subcondition}')
                    plt.xlabel(time_label)
                    plt.ylabel(conc_label)
                    plt.legend()
                    plt.grid(True)
                    plt.savefig(os.path.join(condition_single_plot_dir, f'{condition}_{subcondition}_{conc_label}_vs_{time_label}.png'))
                    plt.close()

            # Combined plots for all conditions and subconditions
            plt.figure(figsize=(10, 6))
            for condition in df["Condition"].unique():
                for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
                    condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
                    plt.plot(condition_data[time_unit], condition_data[conc_unit], 'o-', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
            plt.title(f'Combined {conc_label} vs {time_label} for all conditions')
            plt.xlabel(time_label)
            plt.ylabel(conc_label)
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(combined_plot_dir, f'combined_{conc_label}_vs_{time_label}.png'))
            plt.close()

            # Combined plots for all conditions and subconditions (log scale)
            plt.figure(figsize=(10, 6))
            for condition in df["Condition"].unique():
                for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
                    condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
                    plt.plot(condition_data[time_unit], condition_data[conc_unit], 'o-', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
            plt.title(f'Combined {conc_label} vs {time_label} for all conditions (Log Scale)')
            plt.xlabel(time_label)
            plt.ylabel(conc_label)
            plt.yscale('log')
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(combined_plot_dir, f'combined_{conc_label}_vs_{time_label}_log.png'))
            plt.close()

            # Mean plots for each condition
            plt.figure(figsize=(10, 6))
            for condition in mean_df["Condition"].unique():
                condition_mean_data = mean_df[mean_df["Condition"] == condition]
                plt.plot(condition_mean_data[time_unit], condition_mean_data[conc_unit], 'o-', label=f'{condition} Mean', linewidth=0.75, markersize=5)
            plt.title(f'Mean {conc_label} vs {time_label} for each condition')
            plt.xlabel(time_label)
            plt.ylabel(conc_label)
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(mean_plot_dir, f'mean_{conc_label}_vs_{time_label}.png'))
            plt.close()

            # Mean plots for each condition (log scale)
            plt.figure(figsize=(10, 6))
            for condition in mean_df["Condition"].unique():
                condition_mean_data = mean_df[mean_df["Condition"] == condition]
                plt.plot(condition_mean_data[time_unit], condition_mean_data[conc_unit], 'o-', label=f'{condition} Mean', linewidth=0.75, markersize=5)
            plt.title(f'Mean {conc_label} vs {time_label} for each condition (Log Scale)')
            plt.xlabel(time_label)
            plt.ylabel(conc_label)
            plt.yscale('log')
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(mean_plot_dir, f'mean_{conc_label}_vs_{time_label}_log.png'))
            plt.close()

    # Plot rate of change of protein molecules
    plt.figure(figsize=(10, 6))
    for condition in df["Condition"].unique():
        for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
            condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
            plt.plot(condition_data["Time_h"], condition_data["Rate of Change of Number of Protein Molecules (PM/s)"], 'o', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
    plt.title('Rate of Change of Number of Protein Molecules vs Time_h')
    plt.xlabel('Time_h')
    plt.ylabel('Rate of Change (PM/s)')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(combined_plot_dir, 'rate_of_change_of_protein_molecules_vs_time_h.png'))
    plt.close()

    # Plot rate of change of protein molecules (log scale)
    plt.figure(figsize=(10, 6))
    for condition in df["Condition"].unique():
        for subcondition in df[df["Condition"] == condition]["Subcondition"].unique():
            condition_data = df[(df["Condition"] == condition) & (df["Subcondition"] == subcondition)]
            plt.plot(condition_data["Time_h"], condition_data["Rate of Change of Number of Protein Molecules (PM/s)"], 'o', label=f'{condition} {subcondition}', linewidth=0.75, markersize=5)
    plt.title('Rate of Change of Number of Protein Molecules vs Time_h (Log Scale)')
    plt.xlabel('Time_h')
    plt.ylabel('Rate of Change (PM/s)')
    plt.yscale('log')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(combined_plot_dir, 'rate_of_change_of_protein_molecules_vs_time_h_log.png'))
    plt.close()


def create_combined_heatmap_movie_custom_grid(data_path, conditions, subconditions, channel, grid_rows=None, grid_cols=None, frame_rate=30, delete_temp_dirs=False):
    """
    Combines heatmaps from different conditions and subconditions into a single video.
    Allows specifying the number of grid rows and columns or uses an adaptive layout based on subconditions.

    Args:
    - data_path (str): Base path where the heatmaps are stored.
    - conditions (list): List of conditions defining subdirectories within the data path.
    - subconditions (list): List of subconditions defining further subdirectories.
    - channel (str): The specific channel being processed ('cy5' or 'gfp').
    - grid_rows (int, optional): Number of rows in the grid. If None, calculated adaptively.
    - grid_cols (int, optional): Number of columns in the grid. If None, calculated adaptively.
    - frame_rate (int): Frame rate for the output video. Defaults to 30.
    - delete_temp_dirs (bool): If True, deletes temporary heatmap directories after movie creation.
    """
    # Determine grid dimensions if not provided
    total_plots = len(conditions) * len(subconditions)
    
    if grid_rows is None or grid_cols is None:
        if len(subconditions) == 1:
            # Use a rectangular grid with more columns than rows when only one subcondition
            grid_cols = int(np.ceil(np.sqrt(total_plots)))
            grid_rows = int(np.ceil(total_plots / grid_cols))
            
            # Adjust columns and rows to minimize blank spaces
            while grid_cols * grid_rows >= total_plots:
                if (grid_cols - 1) * grid_rows >= total_plots:
                    grid_cols -= 1
                elif grid_cols * (grid_rows - 1) >= total_plots:
                    grid_rows -= 1
                else:
                    break
        else:
            # Use a column-row layout with conditions in columns and subconditions in rows
            grid_rows = len(subconditions)
            grid_cols = len(conditions)
    
    # Define the output directory for temporary images
    temp_img_dir = os.path.join(data_path, "output_data", "temp_images")
    ensure_output_dir(temp_img_dir)

    # Determine the number of frames based on the first condition and subcondition
    sample_image_dir = os.path.join(data_path, "output_data", "movies", f"{conditions[0]}_{subconditions[0]}_heatmaps_{channel}")
    sample_image_files = natsorted(glob.glob(os.path.join(sample_image_dir, "*.png")))
    num_frames = len(sample_image_files)

    if num_frames == 0:
        print(f"No frames to process. Check if the directories exist and contain images.")
        return

    # Loop through each frame
    for frame_index in tqdm(range(num_frames), desc="Creating combined frames"):
        fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(grid_cols * 6, grid_rows * 6))
        plt.subplots_adjust(hspace=0.1, wspace=0.1)  # Adjust spacing

        # Ensure axes is always 2D
        if grid_rows == 1 and grid_cols == 1:
            axes = np.array([[axes]])
        elif grid_rows == 1 or grid_cols == 1:
            axes = np.array(axes).reshape(grid_rows, grid_cols)

        plot_index = 0

        # Loop through each condition and subcondition
        for col_idx, condition in enumerate(conditions):
            for row_idx, subcondition in enumerate(subconditions):
                # Determine the image path
                images_dir = os.path.join(data_path, "output_data", "movies", f"{condition}_{subcondition}_heatmaps_{channel}")
                image_files = natsorted(glob.glob(os.path.join(images_dir, "*.png")))

                if frame_index < len(image_files):
                    image_path = image_files[frame_index]
                    img = io.imread(image_path)

                    # Plot the image in the appropriate subplot
                    ax = axes[row_idx if len(subconditions) > 1 else plot_index // grid_cols,
                              col_idx if len(subconditions) > 1 else plot_index % grid_cols]
                    ax.imshow(img, cmap='gray', vmin=0, vmax=img.max())
                    ax.axis('off')  # Remove axes

                    plot_index += 1

        # Turn off any unused subplots
        for ax in axes.flatten()[plot_index:]:
            ax.axis('off')

        # Save the combined frame
        combined_image_path = os.path.join(temp_img_dir, f"combined_frame_{frame_index:04d}.png")
        plt.savefig(combined_image_path, bbox_inches='tight', pad_inches=0)
        plt.close(fig)

    # Compile the images into a video using OpenCV
    combined_image_files = natsorted(glob.glob(os.path.join(temp_img_dir, "combined_frame_*.png")))

    # Get the resolution of the first image
    first_image = cv2.imread(combined_image_files[0])
    height, width, layers = first_image.shape
    video_resolution = (width, height)

    # Define the codec and create a VideoWriter object
    output_data_dir = os.path.join(data_path, "output_data")
    output_filename = f"combined_heatmap_movie_{channel}.avi"
    output_file = os.path.join(output_data_dir, output_filename)
    ensure_output_dir(output_data_dir)

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(output_file, fourcc, frame_rate, video_resolution)

    for image_file in combined_image_files:
        img = cv2.imread(image_file)
        out.write(img)  # Write the image as a frame in the video

    out.release()
    print(f"Combined video saved to {output_file}")

    # Clean up temporary images
    shutil.rmtree(temp_img_dir)

    # Delete temporary directories if specified
    if delete_temp_dirs:
        for condition in conditions:
            for subcondition in subconditions:
                temp_dir = os.path.join(data_path, "output_data", "movies", f"{condition}_{subcondition}_heatmaps_{channel}")
                if os.path.exists(temp_dir):
                    shutil.rmtree(temp_dir)
                    print(f"Deleted temporary directory: {temp_dir}")


1.- Read the data and reorganize files

In [3]:
data_path = "../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/"
number_reps = 1
conditions, subconditions = prepare_conditions(data_path, number_reps)

# Define calibration curve paths
calibration_curve_paths = sorted(glob.glob("../../data/calibration_curve/***ugml.tif"))
reorgTiffsToOriginal(data_path, conditions, subconditions)

Moved .tif files from ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/A/Rep1 to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/A/Rep1/original
Moved .tif files from ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/C/Rep1 to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/C/Rep1/original
Moved .tif files from ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/Control/Rep1 to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/Control/Rep1/original
Moved .tif files from ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/E/Rep1 to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/E/Rep1/original
Moved .tif files from ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/F/Rep1 to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/F/Rep1/original
Moved .tif files from ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/G/Rep1 to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/G/Rep1/original
Moved .tif files from ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/H/Rep1 to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/H/Rep1/original


2.1.- Process gfp channel

In [4]:
channel = "GFP"
time_interval_list = [60] * len(conditions)  # time intervals in seconds between frames for each condition
min_frame = 0
max_frame = None
vmax = 500  # Set vmax based on your data's expected concentration range
skip_frames = 32 #### CHANGE 


# Call the function
fluorescence_heatmap(data_path, conditions, subconditions, channel, time_interval_list, min_frame, max_frame, vmax, skip_frames, calibration_curve_paths, show_scalebar=True)

Processing A - Rep1: 100%|██████████| 86/86 [00:18<00:00,  4.64it/s]
Processing C - Rep1: 100%|██████████| 86/86 [00:17<00:00,  4.92it/s]
Processing Control - Rep1: 100%|██████████| 86/86 [00:16<00:00,  5.17it/s]
Processing E - Rep1: 100%|██████████| 86/86 [00:17<00:00,  5.06it/s]
Processing F - Rep1: 100%|██████████| 86/86 [00:18<00:00,  4.71it/s]
Processing G - Rep1: 100%|██████████| 86/86 [00:16<00:00,  5.35it/s]
Processing H - Rep1: 100%|██████████| 86/86 [00:17<00:00,  4.84it/s]


In [5]:
# Example usage
frame_rate = 15  # frames per second
create_movies(data_path, conditions, subconditions, channel, frame_rate=frame_rate)

                                                                            3it/s]

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/H_Rep1_GFP.avi


                                                                                  

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/Control_Rep1_GFP.avi


                                                                            

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/A_Rep1_GFP.avi


                                                                            

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/G_Rep1_GFP.avi


                                                                            

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/F_Rep1_GFP.avi


                                                                            

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/C_Rep1_GFP.avi


                                                                            

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/E_Rep1_GFP.avi


Creating videos: 100%|██████████| 7/7 [00:18<00:00,  2.69s/it]


In [6]:
create_combined_heatmap_movie_custom_grid(data_path, conditions, subconditions, channel, grid_rows=2, grid_cols=5, frame_rate=30)

Creating combined frames: 100%|██████████| 86/86 [06:09<00:00,  4.30s/it]


Combined video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/combined_heatmap_movie_GFP.avi


2.2.- Process cy5 channel

In [7]:

channel = "Cy5"
min_frame = 0
max_frame = None
vmax = 14E3  # Set vmax based on your data's expected concentration range
skip_frames = 32


# Call the function
fluorescence_heatmap(data_path, conditions, subconditions, channel, time_interval_list, min_frame, max_frame, vmax, skip_frames, calibration_curve_paths, show_scalebar=False)

Processing A - Rep1: 100%|██████████| 86/86 [00:25<00:00,  3.36it/s]
Processing C - Rep1: 100%|██████████| 86/86 [00:15<00:00,  5.44it/s]
Processing Control - Rep1: 100%|██████████| 86/86 [00:13<00:00,  6.50it/s]
Processing E - Rep1: 100%|██████████| 86/86 [00:14<00:00,  5.95it/s]
Processing F - Rep1: 100%|██████████| 86/86 [00:14<00:00,  6.06it/s]
Processing G - Rep1: 100%|██████████| 86/86 [00:13<00:00,  6.21it/s]
Processing H - Rep1: 100%|██████████| 86/86 [00:13<00:00,  6.25it/s]


In [8]:
# Example usage
# frame_rate = 1  # frames per second
create_movies(data_path, conditions, subconditions, channel, frame_rate=frame_rate)


                                                                            7it/s]

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/H_Rep1_Cy5.avi


                                                                                  

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/Control_Rep1_Cy5.avi


Creating video for F - Rep1: 100%|██████████| 86/86 [00:18<00:00,  4.83it/s]

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/G_Rep1_Cy5.avi

                                                                            






Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/F_Rep1_Cy5.avi


Creating video for E - Rep1: 100%|██████████| 86/86 [00:18<00:00,  5.12it/s]

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/A_Rep1_Cy5.avi


                                                                            

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/E_Rep1_Cy5.avi

Creating videos:  14%|█▍        | 1/7 [00:18<01:53, 18.97s/it]




                                                                            

Video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/movies/C_Rep1_Cy5.avi


Creating videos: 100%|██████████| 7/7 [00:19<00:00,  2.72s/it]


In [9]:
create_combined_heatmap_movie_custom_grid(data_path, conditions, subconditions, channel, grid_rows=2, grid_cols=5, frame_rate=30)


Creating combined frames: 100%|██████████| 86/86 [06:23<00:00,  4.46s/it]


Combined video saved to ../../data/080224-ACEFGH-RT/4txtl_1mt_2dna/output_data/combined_heatmap_movie_Cy5.avi


3.- Generate .csv files with gene expression data

In [10]:

# Example usage

mw_kda_list = [100] * len(conditions)
droplet_volume_list = [2] * len(conditions)
time_interval_list = [60] * len(conditions)

# Quantify tiff files
quantify_tiffiles(data_path, conditions, subconditions, calibration_curve_paths, mw_kda_list, droplet_volume_list, time_interval_list)

No image files found for condition A, subcondition Rep1.
No image files found for condition C, subcondition Rep1.
No image files found for condition Control, subcondition Rep1.
No image files found for condition E, subcondition Rep1.
No image files found for condition F, subcondition Rep1.
No image files found for condition G, subcondition Rep1.
No image files found for condition H, subcondition Rep1.


ValueError: No objects to concatenate