In [19]:
# File management
import glob
import os
import shutil
import csv

# Data processing
import numpy as np
import pandas as pd

# Plotting
import matplotlib.pyplot as plt
from skimage import io
from scipy.integrate import solve_ivp
from scipy.optimize import curve_fit, minimize
from scipy.ndimage import gaussian_filter1d
from scipy.stats import norm
from PIL import Image, ImageEnhance, ImageOps

# Utilities
import multiprocessing as mp
from multiprocessing import Pool, cpu_count
mp.set_start_method('fork', force=True)
from ipywidgets import interact, FloatSlider, Layout, interactive
import random
from tqdm import tqdm
import itertools
import cv2
from natsort import natsorted
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# Set up logging
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")



def reorgTiffsToOriginal(data_path, conditions, subconditions):
    """
    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions.
        
    This function renames the subconditions as PosX and moves the raw data to the "original" folder.
    """
    for condition in conditions:
        # Get the actual subconditions in the directory
        actual_subconditions = [name for name in os.listdir(os.path.join(data_path, condition)) if os.path.isdir(os.path.join(data_path, condition, name))]
        
        # Rename the actual subconditions to match the subconditions in your list
        for i, actual_subcondition in enumerate(sorted(actual_subconditions)):
            os.rename(os.path.join(data_path, condition, actual_subcondition), os.path.join(data_path, condition, subconditions[i]))
        
        for subcondition in subconditions:
            # Construct the path to the subcondition directory
            subcondition_path = os.path.join(data_path, condition, subcondition)
            
            # Create the path for the "original" directory within the subcondition directory
            original_dir_path = os.path.join(subcondition_path, "original")
            
            # Always create the "original" directory
            os.makedirs(original_dir_path, exist_ok=True)
            
            # Iterate over all files in the subcondition directory
            for filename in os.listdir(subcondition_path):
                # Check if the file is a .tif file
                if filename.endswith(".tif"):
                    # Construct the full path to the file
                    file_path = os.path.join(subcondition_path, filename)
                    
                    # Construct the path to move the file to
                    destination_path = os.path.join(original_dir_path, filename)
                    
                    # Move the file to the "original" directory
                    shutil.move(file_path, destination_path)
            print(f"Moved .tif files from {subcondition_path} to {original_dir_path}")


def prepare_conditions(data_path):
    """
    Prepare conditions and subconditions, renaming subconditions to 'RepX' where X is the index.
    
    Args:
        data_path (str): Path to the data directory.
    
    Returns:
        conditions (list): List of condition names.
        subconditions (list): List of renamed subconditions as 'RepX'.
    """
    # List conditions while ignoring 'output_data'
    conditions = natsorted([
        f for f in os.listdir(data_path) 
        if os.path.isdir(os.path.join(data_path, f)) and f != 'output_data'
    ])
    
    # Determine the number of subconditions by counting directories in the first condition
    num_subconditions = len([
        f for f in os.listdir(os.path.join(data_path, conditions[0])) 
        if os.path.isdir(os.path.join(data_path, conditions[0], f))
    ])
    
    # Rename subconditions to 'RepX' where X is the index (1-based)
    subconditions = [f'Rep{i+1}' for i in range(num_subconditions)]
    
    return conditions, subconditions




def reorgTiffs_Split_dapi(data_path, conditions, subconditions, file_interval=None):
    """
    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions.
        file_interval (int or None): Option to copy every nth file. If None, this feature is not used.

    This function copies 'DAPI' images from the 'original' folder into
    the 'DAPI' folder, using the specified interval.
    """
    for condition in conditions:
        for subcondition in subconditions:
            # Construct the path to the 'original' directory within the subcondition
            original_dir_path = os.path.join(data_path, condition, subcondition, "original")

            if not os.path.exists(original_dir_path):
                print(f"Error: The original directory {original_dir_path} does not exist.")
                continue

            # Create the directory for the DAPI channel
            dapi_dir = os.path.join(data_path, condition, subcondition, f"dapi-{file_interval}x")
            os.makedirs(dapi_dir, exist_ok=True)

            # Check if the expected output is already there
            expected_files = [f for f in sorted(os.listdir(original_dir_path))
                              if f.lower().endswith(".tif") and "dapi" in f.lower()]
            expected_output_files = expected_files[::file_interval or 1]
            already_copied_files = set(os.listdir(dapi_dir))

            # If all expected files are already copied, skip this subcondition
            if all(file in already_copied_files for file in expected_output_files):
                print(f"Skipping {subcondition} as the expected output is already present.")
                continue

            # Separate list for DAPI files
            dapi_files = []

            # Iterate over all files in the original directory
            file_list = sorted(os.listdir(original_dir_path))
            for filename in file_list:
                # Check if the file is a .tif file and contains 'DAPI' (case insensitive)
                if filename.lower().endswith(".tif") and "dapi" in filename.lower():
                    dapi_files.append(filename)

            # Copy files based on the file_interval
            if file_interval is None:
                file_interval = 1  # Copy all files if no interval is set

            for idx, filename in enumerate(dapi_files):
                if idx % file_interval == 0:
                    file_path = os.path.join(original_dir_path, filename)
                    shutil.copy(file_path, os.path.join(dapi_dir, filename))

            print(f"Copied every {file_interval}th 'DAPI' file from {original_dir_path} into {dapi_dir}.")



# Convert a single image (helper function for multiprocessing)
def process_single_image(file_name, output_dir, brightness_factor, contrast_factor, num_digits, i):
    image = Image.open(file_name).convert("L")
    image_resized = image.resize((2048, 2048), Image.LANCZOS)

    enhancer = ImageEnhance.Brightness(image_resized)
    image_brightened = enhancer.enhance(brightness_factor)
    enhancer = ImageEnhance.Contrast(image_brightened)
    image_contrasted = enhancer.enhance(contrast_factor)

    padded_index = str(i + 1).zfill(num_digits)
    base_file_name = f'converted_image_{padded_index}.tif'
    processed_image_path = os.path.join(output_dir, base_file_name)
    image_contrasted.save(processed_image_path, format='TIFF', compression='tiff_lzw')


# Convert PIVlab images to the right size using multiprocessing
def convert_images(data_path, conditions, subconditions, max_frame, brightness_factor=1, contrast_factor=1, skip_frames=1):
    for condition in tqdm(conditions, desc="Conditions", leave=False):
        for subcondition in tqdm(subconditions, desc="Subconditions", leave=False):
            input_dir = os.path.join(data_path, condition, subcondition, "piv_movie")
            output_dir = os.path.join(data_path, condition, subcondition, "piv_movie_converted")

            os.makedirs(output_dir, exist_ok=True)

            input_files = natsorted(glob.glob(os.path.join(input_dir, '*.jpg')))

            if max_frame:
                input_files = input_files[:max_frame]

            # Apply frame skipping
            input_files = input_files[::skip_frames]

            output_files = natsorted(glob.glob(os.path.join(output_dir, '*.tif')))
            if len(input_files) <= len(output_files):
                print(f"Conversion might already be completed or partial for {output_dir}. Continuing...")
                # Optional: Add logic to check and continue incomplete work.

            num_digits = len(str(len(input_files)))

            # Use all available cores
            with Pool(cpu_count()) as pool:
                list(tqdm(pool.starmap(process_single_image, [(file_name, output_dir, brightness_factor, contrast_factor, num_digits, i) for i, file_name in enumerate(input_files)]), total=len(input_files), desc="Converting Images", leave=False))


# Helper function to plot autocorrelation
def plot_autocorrelation_values(data_path, condition, subcondition, frame_id, lambda_tau, results, fitted_values, intervector_distance_microns):
    output_directory_dfs = os.path.join(data_path, condition, subcondition, "autocorrelation_plots")
    os.makedirs(output_directory_dfs, exist_ok=True)

    plt.figure(figsize=(10, 6))

    x_values = np.arange(len(results)) * intervector_distance_microns * 1E6

    plt.plot(x_values, results, label='Autocorrelation Values', marker='o', linestyle='-', markersize=5)
    plt.plot(x_values, fitted_values, label='Fitted Exponential Decay', linestyle='--', color='red')
    plt.axvline(x=lambda_tau, color='green', linestyle='-.', label=f'Correlation Length = {lambda_tau:.2f} µm')

    plt.xlabel('Scaled Lag (µm)')
    plt.ylabel('Autocorrelation')
    plt.title(f'Autocorrelation Function and Fitted Exponential Decay (Frame {frame_id})')
    plt.legend()
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)
    # plt.ylim(0, 1.1)

    plt.tight_layout()

    filename = os.path.join(output_directory_dfs, f'autocorrelation_frame_{frame_id}.jpg')
    plt.savefig(filename, dpi=200, format='jpg')
    plt.close()


# Helper function to calculate correlation length
def correlation_length(data_frame):
    # Reshaping the data frame to a 2D grid and normalizing
    v = data_frame.pivot(index='y [m]', columns='x [m]', values="velocity magnitude [m/s]").values
    v -= np.mean(v)  # Centering the data

    # FFT to find the power spectrum and compute the autocorrelation
    fft_v = np.fft.fft2(v)
    autocorr = np.fft.ifft2(fft_v * np.conj(fft_v))
    autocorr = np.real(autocorr) / np.max(np.real(autocorr))  # Normalize the autocorrelation

    # Preparing to extract the autocorrelation values along the diagonal
    r_values = min(v.shape) // 2
    results = np.zeros(r_values)
    for r in range(r_values):
        # Properly average over symmetric pairs around the center
        autocorrelation_value = (autocorr[r, r] + autocorr[-r, -r]) / 2
        results[r] = autocorrelation_value

    # Normalize the results to start from 1
    results /= results[0]

    # Exponential decay fitting to extract the correlation length
    def exponential_decay(x, A, B, C):
        return A * np.exp(-x / B) + C

    # Fit parameters and handling potential issues with initial parameter guesses
    try:
        params, _ = curve_fit(exponential_decay, np.arange(len(results)), results, p0=(1, 10, 0), maxfev=5000)
    except RuntimeError:
        # Handle cases where the curve fit does not converge
        params = [np.nan, np.nan, np.nan]  # Use NaN to indicate the fit failed

    A, B, C = params
    fitted_values = exponential_decay(np.arange(r_values), *params)

    # Calculate the correlation length
    intervector_distance_microns = ((data_frame["y [m]"].max() - data_frame["y [m]"].min()) / v.shape[0])
    if B > 0 and A != C:  # Ensure valid values for logarithmic calculation
        lambda_tau = -B * np.log((0.3 - C) / A) * intervector_distance_microns
    else:
        lambda_tau = np.nan  # Return NaN if parameters are not suitable for calculation

    return lambda_tau, results, fitted_values, intervector_distance_microns


# Load PIV data from PIVlab into dataframes
def load_piv_data(data_path, condition, subcondition, min_frame=0, max_frame=None, skip_frames=1):
    input_piv_data = os.path.join(data_path, condition, subcondition, "piv_data", "PIVlab_****.txt")
    
    # Using a for loop instead of list comprehension
    dfs = []
    for file in tqdm(sorted(glob.glob(input_piv_data))[min_frame:max_frame:skip_frames], desc=f"Loading PIV data for {condition} {subcondition}", leave=False):
        df = pd.read_csv(file, skiprows=2).fillna(0).rename(columns={
            "magnitude [m/s]": "velocity magnitude [m/s]",
            "simple shear [1/s]": "shear [1/s]",
            "simple strain [1/s]": "strain [1/s]",
            "Vector type [-]": "data type [-]"
        })
        dfs.append(df)

    return dfs

# Generate dataframes from PIV data with time intervals applied
def generate_dataframes_from_piv_data(data_path, condition, subcondition, min_frame=0, max_frame=None, skip_frames=1, plot_autocorrelation=True, time_interval=1):
    output_directory_dfs = os.path.join(data_path, condition, subcondition, "dataframes_PIV")
    os.makedirs(output_directory_dfs, exist_ok=True)

    # Load PIV data
    data_frames = load_piv_data(data_path, condition, subcondition, min_frame, max_frame, skip_frames)

    # Calculating mean values with valid vectors only
    mean_values = []
    for frame_id, data_frame in enumerate(tqdm(data_frames, desc=f"Generating dataframes for {condition} {subcondition}", leave=False)):
        lambda_tau, results, fitted_values, intervector_distance_microns = correlation_length(data_frame)
        if plot_autocorrelation:
            plot_autocorrelation_values(data_path, condition, subcondition, frame_id, lambda_tau * 1E6, results, fitted_values, intervector_distance_microns)
        data_frame["correlation length [m]"] = lambda_tau
        data_frame = data_frame[data_frame["data type [-]"] == 1]
        mean_values.append(data_frame.mean(axis=0))

    # Creating mean DataFrame
    mean_data_frame = pd.DataFrame(mean_values)
    mean_data_frame.reset_index(drop=False, inplace=True)
    mean_data_frame.rename(columns={'index': 'frame'}, inplace=True)

    # Subtract the minimum row value for each column from the entire column for velocity magnitude
    mean_data_frame["velocity magnitude [m/s]"] = mean_data_frame["velocity magnitude [m/s]"] - mean_data_frame["velocity magnitude [m/s]"].min()
    
    # add a column with total distance travelled
    mean_data_frame["distance [m]"] = mean_data_frame["velocity magnitude [m/s]"].cumsum() * time_interval
    mean_data_frame["distance [m]"] = mean_data_frame["distance [m]"] - mean_data_frame["distance [m]"].min()

    # Calculate power and add to DataFrame
    volume = 2E-9  # µl --> m^3
    viscosity = 1E-3  # mPa*S
    mean_data_frame["power [W]"] = volume * viscosity * (mean_data_frame["velocity magnitude [m/s]"]/mean_data_frame["correlation length [m]"])**2

    # Scale time appropriately using the provided time_interval
    mean_data_frame["time (s)"] = mean_data_frame["frame"] * time_interval
    mean_data_frame["time (min)"] = mean_data_frame["time (s)"] / 60
    mean_data_frame["time (h)"] = mean_data_frame["time (min)"] / 60

    # Creating pivot matrices for each feature
    features = data_frames[0].columns[:-1]
    pivot_matrices = {feature: [] for feature in features}

    for data_frame in data_frames:
        temporary_dictionary = {feature: data_frame.pivot(index='y [m]', columns='x [m]', values=feature).values for feature in features}
        for feature in features:
            pivot_matrices[feature].append(temporary_dictionary[feature])

    pivot_data_frame = pd.DataFrame(pivot_matrices)

    # Adjusting column names in mean_data_frame
    mean_data_frame.columns = [f"{column}_mean" if column not in ["frame", "time (s)", "time (min)", "time (h)"] else column for column in mean_data_frame.columns]
    
    # Adding time column to pivot_data_frame
    pivot_data_frame["frame"] = mean_data_frame["frame"].values
    
    # subtract the minimum row value for each column from the entire column in 
    
    # Save DataFrames to CSV
    mean_df_output_path = os.path.join(output_directory_dfs, "mean_values.csv")
    mean_data_frame.to_csv(mean_df_output_path, index=False)

    pivot_df_output_path = os.path.join(output_directory_dfs, "features_matrices.csv")
    pivot_data_frame.to_csv(pivot_df_output_path, index=False)

    return mean_data_frame, pivot_data_frame



# Plot the PIVlab output as heatmaps
def generate_heatmaps_from_dataframes(df, data_path, condition, subcondition, feature_limits, time_interval=3):
    for feature, limits in feature_limits.items():
        vmin, vmax = limits

        for j in tqdm(range(len(df)), desc=f"Generating heatmaps for {condition} {subcondition} {feature}", leave=False):
            vals = df.iloc[j, df.columns.get_loc(feature)]

            output_directory_heatmaps = os.path.join(data_path, condition, subcondition, "heatmaps_PIV", f"{feature.split()[0]}", f"{feature.split()[0]}_heatmap_{j}.jpg")
            image_files_pattern = f"{data_path}/{condition}/{subcondition}/piv_movie_converted/converted_image_****.tif"
            image_files = sorted(glob.glob(image_files_pattern))[j]
            image = Image.open(image_files)

            plt.figure(figsize=(10, 6))
            plt.imshow(image, cmap=None, extent=[-2762/2, 2762/2, -2762/2, 2762/2]) # piv image
            im = plt.imshow(vals, cmap='inferno', origin='upper', alpha=0.7, extent=[-2762/2, 2762/2, -2762/2, 2762/2], vmin=vmin, vmax=vmax) # heatmap
            plt.xlabel('x [um]')
            plt.ylabel('y [um]')
            cbar = plt.colorbar(im)
            cbar.set_label(feature)
            time = df.iloc[j, -1]
            plt.title(f"PIV - {feature}  ||  time: {int(time * time_interval/60)} min -- {int(time * time_interval/3600)} hours")

            os.makedirs(os.path.dirname(output_directory_heatmaps), exist_ok=True)
            plt.savefig(output_directory_heatmaps, format='jpg', dpi=250)
            plt.close()


def create_movies_PIV(data_path, condition, subcondition, frame_rate, feature_limits=None, max_frame=None):
    plots_dir = f"{data_path}/{condition}/{subcondition}/heatmaps_PIV/"
    for feature in feature_limits.keys():
        feature_name_for_file = feature.split()[0]
        heatmap_dir = os.path.join(data_path, condition, subcondition, "heatmaps_PIV", f"{feature.split()[0]}", f"{feature.split()[0]}_heatmap_****.jpg")
        image_files = natsorted(glob.glob(heatmap_dir))

        if not image_files:
            print(f"No images found for feature {feature_name_for_file}.")
            continue

        # Limit the number of files if max_frame is specified
        image_files = image_files[:max_frame] if max_frame is not None else image_files

        # Get the resolution of the first image (assuming all images are the same size)
        first_image = cv2.imread(image_files[0])
        video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

        # Define the codec and create VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out_path = f'{plots_dir}{feature_name_for_file}.avi'
        out = cv2.VideoWriter(out_path, fourcc, frame_rate, video_resolution)

        for file in tqdm(image_files, desc=f"Creating movie for {condition} {subcondition} {feature}", leave=False):
            img = cv2.imread(file)
            out.write(img)  # Write the image as is, without resizing

        out.release()
        print(f"Video saved to {out_path}")


# Process PIV data for all conditions and subconditions, then average and save results
def process_piv_data(data_path, conditions, subconditions, feature_limits, time_intervals, skip_frames, min_frame=0, max_frame=None, plot_autocorrelation=True, frame_rate=120, heatmaps=True):
    for i, condition in tqdm(enumerate(conditions), desc="Processing PIV data", total=len(conditions), leave=True):
        time_interval = time_intervals[i] * skip_frames
        results = []
        for subcondition in tqdm(subconditions, desc=f"Processing subconditions for {condition}", leave=False):
            m, p = generate_dataframes_from_piv_data(data_path, condition, subcondition, min_frame, max_frame, skip_frames, plot_autocorrelation, time_interval)
            results.append(m)

            if heatmaps == True:
                convert_images(data_path, conditions, subconditions, max_frame=None, brightness_factor=1, contrast_factor=1, skip_frames=skip_frames)
                generate_heatmaps_from_dataframes(p, data_path, condition, subcondition, feature_limits, time_interval)
                create_movies_PIV(data_path, condition, subcondition, frame_rate, feature_limits=feature_limits, max_frame=max_frame)

        # Averaging and saving the results for the current condition
        save_path = os.path.join(data_path, condition, 'averaged')
        average_df = sum(results) / len(results)
        
        os.makedirs(save_path, exist_ok=True)  # Ensure the directory exists
        average_df.to_csv(os.path.join(save_path, f"{condition}_average.csv"))
        
        
        
        
# Generate PCA from PIVlab output
def plot_pca(dfs, data_path, conditions, subconditions, features):
    # Perform PCA and Plot
    plt.figure(figsize=(10, 6))

    # Get colors from Seaborn's "colorblind" color palette
    sns.set_palette("colorblind", color_codes=True)
    colors = sns.color_palette("colorblind", n_colors=len(conditions))

    for group_index, (df, condition, subcondition) in enumerate(zip(dfs, conditions, subconditions)):
        pca = PCA(n_components=2)
        principalComponents = pca.fit_transform(df.loc[:, features])
        principalDf = pd.DataFrame(data=principalComponents, columns=['principal component 1', 'principal component 2'])

        # Scaling alpha to increase with respect to the frame index
        num_points = principalDf.shape[0]
        alphas = np.linspace(0.001, 1, num_points)  # Alpha values linearly spaced from 1 to 0.01
        
        # Plotting each line segment with increasing alpha
        for i in range(1, num_points):
            plt.plot(principalDf['principal component 1'][i-1:i+1], principalDf['principal component 2'][i-1:i+1], 
                     alpha=alphas[i], linestyle='-', linewidth=2, color=colors[group_index])

        # Plotting the points
        plt.scatter(principalDf['principal component 1'], principalDf['principal component 2'], 
                    alpha=0.5, label=f'{condition}_{subcondition}', s=10, color=colors[group_index])

    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.title('PCA of PIV Features (All Samples)')
    plt.legend()
    plt.grid(True)

    output_dir_pca = os.path.join(data_path, conditions[-1], subconditions[-1], "plots_PIV", "PCA.jpg")
    os.makedirs(os.path.dirname(output_dir_pca), exist_ok=True)
    plt.savefig(output_dir_pca, format='jpg', dpi=250)
    plt.close()



def plot_PIV_features(data_path, conditions, subconditions, features_pca, sigma=10, min_frame=0, max_frame=None):
    for condition in tqdm(conditions, desc="Plotting PIV features", leave=True):
        dfs = []
        
        for subcondition in subconditions:
            # Construct the file path
            file_path = os.path.join(data_path, condition, subcondition, "dataframes_PIV", "mean_values.csv")
            df = pd.read_csv(file_path)

            # Apply Gaussian filter
            df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))

            # Rename columns
            df = df.rename(columns={
                "data type [-]_mean": "work [J]",
                "correlation length [m]_mean": "correlation length [um]",
                "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"
            })

            # Calculate cumulative work
            df["work [J]"] = df["power [W]_mean"].cumsum()

            # Slice the dataframe if min_frame and max_frame are provided
            df = df.iloc[min_frame:max_frame, :]

            dfs.append(df)

        # Plot PCA
        plot_pca(dfs, data_path, [condition] * len(subconditions), subconditions, features_pca)

        # Plot individual features
        for feature in dfs[0].columns[5:-3]:
            plt.figure(figsize=(12, 8))
            for df, subcondition in zip(dfs, subconditions):
                output_directory_plots = os.path.join(data_path, condition, subcondition, "plots_PIV")
                
                # Ensure the directory exists
                os.makedirs(output_directory_plots, exist_ok=True)
                
                plt.plot(df["time (h)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1, label=f'{condition}_{subcondition}')
                plt.xlabel('Time (hours)')
                plt.ylabel(feature)
                plt.title(f"PIV - {feature}")
                plt.grid(True, which='both', linestyle='--', linewidth=0.5)
                plt.legend()
                plt.savefig(os.path.join(output_directory_plots, f"{feature.split()[0]}_h.jpg"), format='jpg', dpi=300)
                plt.close()

                plt.plot(df["time (min)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1, label=f'{condition}_{subcondition}')
                plt.xlabel('Time (minutes)')
                plt.ylabel(feature)
                plt.title(f"PIV - {feature}")
                plt.grid(True, which='both', linestyle='--', linewidth=0.5)
                plt.legend()
                plt.savefig(os.path.join(output_directory_plots, f"{feature.split()[0]}_min.jpg"), format='jpg', dpi=300)
                plt.close()



def plot_PIV_features_averaged(data_path, conditions, features_pca, sigma=2, min_frame=0, max_frame=None):
    """
    Generate PIV plots averaged over subconditions and save them in the 'PIV_plots_averaged' folder.
    
    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        features_pca (list): List of features to include in PCA and plotting.
        sigma (float): The standard deviation for Gaussian kernel applied for smoothing.
        min_frame (int): The minimum frame to include in the analysis.
        max_frame (int): The maximum frame to include in the analysis.
    """
    for condition in tqdm(conditions, desc="Plotting averaged PIV features", leave=True):
        # Path to the averaged data
        averaged_data_path = os.path.join(data_path, condition, "averaged")
        averaged_df_file = os.path.join(averaged_data_path, f"{condition}_average.csv")
        
        if not os.path.exists(averaged_df_file):
            print(f"Error: Averaged dataframe {averaged_df_file} does not exist.")
            continue
        
        # Load the averaged dataframe
        df = pd.read_csv(averaged_df_file)
        
        # Apply Gaussian smoothing
        df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))
        
        # Rename columns as necessary for consistency in plotting
        df = df.rename(columns={"data type [-]_mean": "work [J]", 
                                "correlation length [m]_mean": "correlation length [um]", 
                                "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"})
        
        df["work [J]"] = df["power [W]_mean"].cumsum()
        
        # Limit the frames if specified
        df = df.iloc[min_frame:max_frame, :]
        
        # Prepare output directory
        output_directory_plots = os.path.join(averaged_data_path, "PIV_plots_averaged")
        os.makedirs(output_directory_plots, exist_ok=True)
        
        # PCA and feature plotting
        for feature in features_pca:
            plt.figure(figsize=(12, 8))
            plt.plot(df["time (h)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1)
            plt.xlabel('Time (hours)')
            plt.ylabel(feature)
            plt.title(f"Averaged PIV - {feature} over Subconditions")
            plt.grid(True, which='both', linestyle='--', linewidth=0.5)
            plt.savefig(os.path.join(output_directory_plots, f"{feature.split()[0]}_h.jpg"), format='jpg', dpi=300)
            plt.close()
            
            plt.plot(df["time (min)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1)
            plt.xlabel('Time (minutes)')
            plt.ylabel(feature)
            plt.title(f"Averaged PIV - {feature} over Subconditions")
            plt.grid(True, which='both', linestyle='--', linewidth=0.5)
            plt.savefig(os.path.join(output_directory_plots, f"{feature.split()[0]}_min.jpg"), format='jpg', dpi=300)
            plt.close()




def plot_PIV_features_combined(data_path, conditions, features_pca, sigma=2, min_frame=0, max_frame=None):
    """
    Generate combined PIV plots across all conditions and save them in the 'combined_PIV_plots' folder.

    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        features_pca (list): List of features to include in PCA and plotting.
        sigma (float): The standard deviation for Gaussian kernel applied for smoothing.
        min_frame (int): The minimum frame to include in the analysis.
        max_frame (int): The maximum frame to include in the analysis.
    """
    # Prepare output directory for combined plots
    combined_output_dir = os.path.join(data_path, "PIV_plots", "averaged_conditions")
    os.makedirs(combined_output_dir, exist_ok=True)
    
    # Dictionary to store data for combined plots
    combined_data = {feature: {} for feature in features_pca}
    
    for condition in tqdm(conditions, desc="Collecting PIV data", leave=True):
        # Path to the averaged data
        averaged_data_path = os.path.join(data_path, condition, "averaged")
        averaged_df_file = os.path.join(averaged_data_path, f"{condition}_average.csv")
        
        if not os.path.exists(averaged_df_file):
            print(f"Error: Averaged dataframe {averaged_df_file} does not exist.")
            continue
        
        # Load the averaged dataframe
        df = pd.read_csv(averaged_df_file)
        
        # Apply Gaussian smoothing
        df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))
        
        # Rename columns as necessary for consistency in plotting
        df = df.rename(columns={"data type [-]_mean": "work [J]", 
                                "correlation length [m]_mean": "correlation length [um]", 
                                "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"})
        
        df["work [J]"] = df["power [W]_mean"].cumsum()
        
        # Limit the frames if specified
        df = df.iloc[min_frame:max_frame, :]
        
        # Store data for combined plots
        for feature in features_pca:
            combined_data[feature][condition] = (df["time (h)"], df[feature])
    
    # Generate combined plots
    for feature in features_pca:
        plt.figure(figsize=(12, 8))
        
        for condition, (time, values) in combined_data[feature].items():
            plt.plot(time, values, marker='o', linestyle='-', markersize=1, linewidth=1, label=condition)
        
        plt.xlabel('Time (hours)')
        plt.ylabel(feature)
        plt.title(f"Combined PIV - {feature} across Conditions")
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend()
        plt.savefig(os.path.join(combined_output_dir, f"combined_{feature.split()[0]}_h.jpg"), format='jpg', dpi=300)
        plt.close()

        plt.figure(figsize=(12, 8))
        
        for condition, (time, values) in combined_data[feature].items():
            time_in_min = time * 60  # Convert hours to minutes for the second plot
            plt.plot(time_in_min, values, marker='o', linestyle='-', markersize=1, linewidth=1, label=condition)
        
        plt.xlabel('Time (minutes)')
        plt.ylabel(feature)
        plt.title(f"Combined PIV - {feature} across Conditions")
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend()
        plt.savefig(os.path.join(combined_output_dir, f"combined_{feature.split()[0]}_min.jpg"), format='jpg', dpi=300)
        plt.close()


def plot_PIV_features_all_conditions_subconditions(data_path, conditions, subconditions, features_pca, sigma=2, min_frame=0, max_frame=None):
    """
    Generate PIV plots that display all conditions and subconditions together on the same plot for each feature.

    Args:
        data_path (str): Path to the data directory.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions for each condition.
        features_pca (list): List of features to include in plotting.
        sigma (float): The standard deviation for Gaussian kernel applied for smoothing.
        min_frame (int): The minimum frame to include in the analysis.
        max_frame (int): The maximum frame to include in the analysis.
    """
    # Prepare output directory for combined plots
    combined_output_dir = os.path.join(data_path, "PIV_plots", "all_conditions_subconditions")
    os.makedirs(combined_output_dir, exist_ok=True)
    
    for feature in features_pca:
        plt.figure(figsize=(12, 8))
        
        for condition in conditions:
            for subcondition in subconditions:
                # Path to the subcondition data
                subcondition_data_path = os.path.join(data_path, condition, subcondition, "dataframes_PIV", "mean_values.csv")
                
                if not os.path.exists(subcondition_data_path):
                    print(f"Error: Data file {subcondition_data_path} does not exist.")
                    continue
                
                # Load the subcondition dataframe
                df = pd.read_csv(subcondition_data_path)
                
                # Apply Gaussian smoothing
                df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))
                
                # Rename columns for consistency in plotting
                df = df.rename(columns={"data type [-]_mean": "work [J]", 
                                        "correlation length [m]_mean": "correlation length [um]", 
                                        "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"})
                
                df["work [J]"] = df["power [W]_mean"].cumsum()
                
                # Limit the frames if specified
                df = df.iloc[min_frame:max_frame, :]
                
                # Plot each subcondition on the same figure
                plt.plot(df["time (h)"], df[feature], marker='o', linestyle='-', markersize=1, linewidth=1, label=f'{condition} - {subcondition}')
        
        plt.xlabel('Time (hours)')
        plt.ylabel(feature)
        plt.title(f"All Conditions and Subconditions Combined - {feature}")
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend(loc='best', fontsize='small', ncol=2)  # Adjust legend to fit all entries
        plt.savefig(os.path.join(combined_output_dir, f"combined_{feature.split()[0]}_h.jpg"), format='jpg', dpi=300)
        plt.close()

        plt.figure(figsize=(12, 8))
        
        for condition in conditions:
            for subcondition in subconditions:
                # Load the subcondition dataframe again
                df = pd.read_csv(os.path.join(data_path, condition, subcondition, "dataframes_PIV", "mean_values.csv"))
                df.iloc[:, 1:-3] = df.iloc[:, 1:-3].apply(lambda x: gaussian_filter1d(x, sigma=sigma))
                df = df.rename(columns={"data type [-]_mean": "work [J]", 
                                        "correlation length [m]_mean": "correlation length [um]", 
                                        "velocity magnitude [m/s]_mean": "velocity magnitude [um/s]"})
                df["work [J]"] = df["power [W]_mean"].cumsum()
                df = df.iloc[min_frame:max_frame, :]
                
                time_in_min = df["time (h)"] * 60  # Convert hours to minutes for the second plot
                plt.plot(time_in_min, df[feature], marker='o', linestyle='-', markersize=1, linewidth=1, label=f'{condition} - {subcondition}')
        
        plt.xlabel('Time (minutes)')
        plt.ylabel(feature)
        plt.title(f"All Conditions and Subconditions Combined - {feature}")
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        plt.legend(loc='best', fontsize='small', ncol=2)  # Adjust legend to fit all entries
        plt.savefig(os.path.join(combined_output_dir, f"combined_{feature.split()[0]}_min.jpg"), format='jpg', dpi=300)
        plt.close()


In [20]:
# Example usage
calibration_curve_paths = sorted(glob.glob("../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/main/calibration_curve/***ugml.tif"))

data_path = "../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/test/3ulTMB-0p5ulDNA_all50nM_/"
conditions, subconditions = prepare_conditions(data_path)

print("Conditions:", conditions)
print("Subconditions:", subconditions)
reorgTiffsToOriginal(data_path, conditions, subconditions)

Conditions: ['kif3-beads1in10', 'kif3-beads1in100']
Subconditions: ['Rep1']
Moved .tif files from ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1/kif3-beads1in10/Rep1 to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1/kif3-beads1in10/Rep1/original
Moved .tif files from ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1/kif3-beads1in100/Rep1 to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1/kif3-beads1in100/Rep1/original


In [23]:
reorgTiffs_Split_dapi(data_path, conditions, subconditions, file_interval=3)

Copied every 3th 'DAPI' file from ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1/kif3-beads1in10/Rep1/original into ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1/kif3-beads1in10/Rep1/dapi-3x.
Copied every 3th 'DAPI' file from ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1/kif3-beads1in100/Rep1/original into ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1/kif3-beads1in100/Rep1/dapi-3x.


In [10]:
# Define feature limits and other parameters
v = 2E-7
velocity_limits = (0, v)
other_limits = (-0.0005, 0.0005)
time_interval_list = [6]  # time intervals in seconds between frames for each condition
skip_frames = 4 ### CHANGE THIS TO SKIP FRAMES
 

velocity_limits = (None, None)
other_limits = (None, None)


feature_limits = {
    # 'u [m/s]': (-v, v), 
    # 'v [m/s]': (-v, v), 
    # 'data type [-]': (None, None),
    'velocity magnitude [m/s]': velocity_limits,
    'vorticity [1/s]': other_limits,
    'divergence [1/s]': other_limits,
    # 'dcev [1]': (0, 250),
    'shear [1/s]': other_limits,
    'strain [1/s]': other_limits,
    'vector direction [degrees]': (-180, 180),
}


# Features for PCA and plotting
features_pca = [
    "vorticity [1/s]_mean",
    "velocity magnitude [um/s]",
    "divergence [1/s]_mean",
    "shear [1/s]_mean",
    "strain [1/s]_mean",
    "correlation length [um]", 
    "power [W]_mean",
    "work [J]",
]


In [11]:
# Process PIV data
process_piv_data(
    data_path, 
    conditions, 
    subconditions, 
    feature_limits, 
    time_interval_list, 
    min_frame=0, 
    max_frame=None, 
    skip_frames=skip_frames, 
    plot_autocorrelation=False, 
    frame_rate=1, 
    heatmaps=False
    )


Processing PIV data:   0%|          | 0/1 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A


[A[A[A


[A[A[A

[A[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A


Video saved to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1//kif3/Rep1/heatmaps_PIV/velocity.avi



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

Video saved to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1//kif3/Rep1/heatmaps_PIV/vorticity.avi



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

Video saved to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1//kif3/Rep1/heatmaps_PIV/divergence.avi



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

Video saved to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1//kif3/Rep1/heatmaps_PIV/shear.avi



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

Video saved to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1//kif3/Rep1/heatmaps_PIV/strain.avi



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
Processing PIV data: 100%|██████████| 1/1 [04:08<00:00, 248.71s/it]

Video saved to ../../../../../../mnt/c/Users/Admin/Thomson Lab Dropbox/David Larios/activedrops/microscope/082624-dapibeads-kif3/4ultxtl-1ulMT-1ulDNA50nM-1ulbeads1in10_1//kif3/Rep1/heatmaps_PIV/vector.avi





In [None]:
# Plot features and PCA
plot_PIV_features(
    data_path, 
    conditions, 
    subconditions, 
    features_pca, 
    min_frame=0, 
    max_frame=None
    )


In [None]:
# Plot features and PCA
plot_PIV_features_averaged(
    data_path, 
    conditions, 
    features_pca, 
    min_frame=0, 
    max_frame=None
    )

In [None]:
# Plot features and PCA
plot_PIV_features_combined(
    data_path, 
    conditions, 
    features_pca, 
    min_frame=0, 
    max_frame=None
    )

In [None]:
# Plot features and PCA
plot_PIV_features_all_conditions_subconditions(
    data_path, 
    conditions, 
    subconditions, 
    features_pca, 
    min_frame=0, 
    max_frame=None
    )


In [15]:
def delete_outputs(data_path, conditions, subconditions, output_dirs=None):
    """
    Deletes all output files and directories for the given conditions and subconditions.

    Args:
        data_path (str): Base directory for PIV data and output.
        conditions (list): List of conditions.
        subconditions (list): List of subconditions.
        output_dirs (list, optional): Specific output directories to delete. If None, delete all known output directories.
    """
    # Default output directories to remove
    if output_dirs is None:
        output_dirs = [
            "piv_movie_converted",
            "autocorrelation_plots",
            "dataframes_PIV",
            "heatmaps_PIV",
            "plots_PIV",
            "averaged",
            os.path.join("PIV_plots", "averaged_conditions"),
            os.path.join("PIV_plots", "all_conditions_subconditions"),
            "combined_PIV_plots",
            "all_conditions_subconditions_combined_plots"
        ]

    for condition in conditions:
        for subcondition in subconditions:
            for output_dir in output_dirs:
                dir_path = os.path.join(data_path, condition, subcondition, output_dir)
                if os.path.exists(dir_path):
                    try:
                        shutil.rmtree(dir_path)
                        print(f"Deleted directory: {dir_path}")
                    except Exception as e:
                        print(f"Error deleting directory {dir_path}: {e}")

        # Remove the averaged directory at the condition level
        averaged_dir = os.path.join(data_path, condition, "averaged")
        if os.path.exists(averaged_dir):
            try:
                shutil.rmtree(averaged_dir)
                print(f"Deleted directory: {averaged_dir}")
            except Exception as e:
                print(f"Error deleting directory {averaged_dir}: {e}")

    # Remove the combined plots directories at the top level
    combined_dirs = [
        "combined_PIV_plots",
        os.path.join("PIV_plots", "averaged_conditions"),
        os.path.join("PIV_plots", "all_conditions_subconditions")
    ]

    for combined_dir in combined_dirs:
        combined_dir_path = os.path.join(data_path, combined_dir)
        if os.path.exists(combined_dir_path):
            try:
                shutil.rmtree(combined_dir_path)
                print(f"Deleted directory: {combined_dir_path}")
            except Exception as e:
                print(f"Error deleting directory {combined_dir_path}: {e}")

    # Remove the PIV_plots directory in the data_path
    piv_plots_dir = os.path.join(data_path, "PIV_plots")
    if os.path.exists(piv_plots_dir):
        try:
            shutil.rmtree(piv_plots_dir)
            print(f"Deleted directory: {piv_plots_dir}")
        except Exception as e:
            print(f"Error deleting directory {piv_plots_dir}: {e}")

# Example usage
delete_outputs(data_path, conditions, subconditions)
