In [1]:
# Import standard libraries
import os
import re
import sys
import glob

# Import data processing libraries
import pandas as pd
import numpy as np

# Import image processing libraries
import cv2
from PIL import Image, ImageEnhance, ImageOps
from scipy.ndimage import gaussian_filter

# Import plotting and visualization libraries
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import colorcet as cc
from scipy.optimize import curve_fit
from ipywidgets import interact, FloatSlider

# Additional utilities
from natsort import natsorted  # For natural sorting
from sklearn.decomposition import PCA


# Default RP plotting style
def set_plotting_style():
    """
    Formats plotting environment to that used in Physical Biology of the Cell,
    2nd edition. To format all plots within a script, simply execute
    `mwc_induction_utils.set_plotting_style() in the preamble.
    """
    rc = {'lines.linewidth': 1.25,
          'axes.labelsize': 8,
          'axes.titlesize': 9,
          'axes.facecolor': '#E3DCD0',
          'xtick.labelsize': 7,
          'ytick.labelsize': 7,
        #   'font.family': 'Lucida Sans Unicode',
          'grid.linestyle': '-',
          'grid.linewidth': 0.1,
          'grid.color': '#ffffff',
          'legend.fontsize': 9}
    plt.rc('text.latex', preamble=r'\usepackage{sfmath}')
    plt.rc('xtick.major', pad=-1)
    plt.rc('ytick.major', pad=-1)
    plt.rc('mathtext', fontset='stixsans', sf='sansserif')
    plt.rc('figure', figsize=[3.5, 2.5])
    plt.rc('svg', fonttype='none')
    plt.rc('legend', title_fontsize='8', frameon=True, 
           facecolor='#E3DCD0', framealpha=1)
    sns.set_style('darkgrid', rc=rc)
    sns.set_palette("colorblind", color_codes=True)
    sns.set_context('notebook', rc=rc)

set_plotting_style()

In [2]:
def plot_autocorrelation_values(lambda_tau, results, fitted_values, filename=None):
    """
    Plots the autocorrelation values and the fitted exponential decay with scaled x-axis.
    
    Parameters:
    - lambda_tau (float): Correlation length.
    - results (array): Array of autocorrelation values.
    - fitted_values (array): Array of fitted values.
    - filename (str, optional): If provided, the plot will be saved to this filename.
    
    Returns:
    - None
    """
    plt.figure(figsize=(10, 6))
    intervector_distance_microns = 21.745

    # Scale x-axis by 20
    x_values = np.arange(len(results)) * intervector_distance_microns  # Generate scaled x-coordinates by intervector distance

    # Plot autocorrelation values and fitted exponential decay with scaled x-axis
    plt.plot(x_values, results, label='Autocorrelation Values', marker='o', linestyle='-', markersize=5)
    plt.plot(x_values, fitted_values, label='Fitted Exponential Decay', linestyle='--', color='red')
    plt.axvline(x=lambda_tau, color='green', linestyle='-.', label=f'Correlation Length = {lambda_tau:.2f} µm')

    # Adding labels, title, and legend
    plt.xlabel('Scaled Lag (µm)')
    plt.ylabel('Autocorrelation')
    plt.title('Autocorrelation Function and Fitted Exponential Decay')
    plt.legend()
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)
    plt.ylim(0, 1.1)

    plt.tight_layout()

    # If filename is provided, save the plot
    if filename:
        directory = os.path.dirname(filename)
        if not os.path.exists(directory):
            os.makedirs(directory)
        plt.savefig(filename, dpi=200, format='jpg')
        plt.close()
    else:
        plt.show()
        plt.close()


def df_piv(file, volume, plot_dir):
    """
    Processes a PIV (Particle Image Velocimetry) data file. It reads the data, computes various 
    parameters, and adds computed columns related to velocity, power, and correlation length.

    Args:
    - file (str): Path to the PIV data file.
    - volume (float): The volume in microliters for power calculation.
    - plot_dir (str): Directory to save the autocorrelation plot.

    Returns:
    - DataFrame: A DataFrame with added columns for computed parameters like velocity magnitudes,
      correlation length, power, and mean velocity.
    """


    # Read and preprocess the dataframe
    df = pd.read_csv(file, skiprows=2).fillna(0)
    
    # Convert measurements to micrometers and micrometers per second
    df['x [um]'] = df['x [m]'] * 1E6
    df['y [um]'] = df['y [m]'] * 1E6
    df['u [um/s]'] = df['u [m/s]'] * 1E6
    df['v [um/s]'] = df['v [m/s]'] * 1E6
    df['magnitude [um/s]'] = df['magnitude [m/s]'] * 1E6

    # Obtain square grid of velocity magnitudes
    v = df.pivot(index='y [um]', columns='x [um]', values="magnitude [um/s]").values

    # Calculate intervector distance
    intervector_distance_microns = (df["y [um]"].max() - df["y [um]"].min()) / v.shape[0]

    # Calculate the autocorrelation function with Fourier transform
    full_product = np.fft.fft2(v) * np.conj(np.fft.fft2(v))
    inverse = np.real(np.fft.ifft2(full_product)) # Real part of the inverse Fourier transform
    normalized_inverse = inverse / inverse[0, 0]   # Normalize the autocorrelation function

    # Define the number of r values and initialize an array for the results
    r_values = v.shape[0] // 2
    results = np.zeros(r_values)

    # Compute the autocorrelation for each r value
    for r in range(r_values):
        autocorrelation_value = (normalized_inverse[r, r] + normalized_inverse[-r, -r]) / (v.shape[0] * v.shape[1])
        results[r] = autocorrelation_value 

    # Normalize the results array
    results = results / results[0]

    # Fit the results to an exponential decay model
    def exponential_decay(tau, A, B, C):
        return A * np.exp(-tau / B) + C

    params, _ = curve_fit(exponential_decay, np.arange(len(results)), results, maxfev=5000)
    A, B, C = params
    fitted_values = exponential_decay(np.arange(r_values), A, B, C)

    # Compute correlation length and other parameters
    lambda_tau = -B * np.log((0.3 - C) / A) * intervector_distance_microns
    df["correlation length (µm)"] = lambda_tau

    # Calculate power
    v0 = volume * 1E-9 # µl --> m^3
    µ = 1E-3        # mPa*S
    correlation_length = lambda_tau * 1E-6 # µm --> m

    # Calculate power using the mean velocity magnitude of non-zero vectors
    df["Power (W)"] = v0 * µ * (df[df["magnitude [m/s]"] > 0]["magnitude [m/s]"].mean() / correlation_length)**2
    
    # # Calculate mean of top 30% velocity magnitudes
    # n = int(0.3 * len(df))  # Top 30% of the vectors
    # df["mean velocity [um/s]"] = df["magnitude [um/s]"].nlargest(n).mean()

    # Calculate the mean of non-zero velocity magnitudes
    df["mean velocity [um/s]"] = df[df["magnitude [um/s]"] > 0]["magnitude [um/s]"].mean()

    # # Calculate drag force
    # df["drag force (pN)"] = 6 * np.pi * µ * lambda_tau * df["magnitude [m/s]"].mean()

    # Add file name column
    df["file name"] = os.path.basename(file).split('.')[0]

    # Reorganize DataFrame
    df = pd.concat([df.iloc[:, 12:], df.iloc[:, 4:12]], axis=1)

    # Plot and save autocorrelation values
    plot_filename = os.path.join(plot_dir, os.path.basename(file).split('.')[0] + '_autocorrelation.jpg')
    plot_autocorrelation_values(lambda_tau, results, fitted_values, filename=plot_filename)

    return df



def process_and_save_piv_files(data_path, condition, subcondition, volume=2, max_frame=None, save_csv=True):
    """
    Processes PIV data files and optionally saves them into separate CSV files. The function reads files 
    from a specified directory, processes each using the df_piv function, and conditionally saves each 
    resulting DataFrame as a CSV file in a specified output directory.

    Args:
    - data_path (str): Base directory where PIV data files are stored.
    - condition (str): The specific condition (subdirectory) under which the PIV data is stored.
    - subcondition (str): The subcondition (sub-subdirectory) under which the PIV data is stored.
    - volume (float, optional): Volume parameter for the df_piv function. Defaults to 2.
    - max_frame (int, optional): Maximum number of files to process. If None, all files are processed.
    - save_csv (bool, optional): Flag to decide whether to save the processed DataFrames as CSV files. Defaults to True.

    Returns:
    - List[DataFrame]: A list of DataFrames, each corresponding to a processed PIV file.
    """
    
    # Find input directory
    input_piv_data = os.path.join(data_path, condition, subcondition, "piv_data", "PIVlab_****.txt")

    # Define output directory based on input parameters
    output_dir = os.path.join(data_path, condition, subcondition, "dataframes_PIV")
    
    # Define plot directory
    plot_dir = os.path.join(data_path, condition, subcondition, "plots/autocorrelation")
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    # Ensure the output directory exists
    if save_csv and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Find and process files
    files = sorted(glob.glob(input_piv_data))
    dataframes = []
    for i, file in enumerate(files[:max_frame]):
        df = df_piv(file, volume, plot_dir)  # Pass plot_dir to df_piv function
        if save_csv:
            df.to_csv(os.path.join(output_dir, f"PIV_dataframe_{i}.csv"), index=False)
        dataframes.append(df)

    return dataframes



def convert_images(data_path, condition, subcondition, max_frame=None, brightness_factor=1, contrast_factor=1):
    """
    Converts, resizes, and adjusts the brightness and contrast of images located in a specified 
    directory and saves the processed images in a new directory. The function identifies images based on 
    a specified data path, condition, and subcondition.

    This function is specifically tailored for converting PIV (Particle Image Velocimetry) movie images. 
    It supports adjustments in brightness and contrast, and checks to avoid re-processing already 
    converted images.

    Args:
    - data_path (str): Base directory where the original PIV movie images are stored.
    - condition (str): Specific condition defining a subdirectory within the data path.
    - subcondition (str): Specific subcondition defining a sub-subdirectory within the condition directory.
    - max_frame (int, optional): Maximum number of images to process. If None, all images in the directory are processed.
    - brightness_factor (float, optional): Factor to adjust the brightness of the images. Defaults to 1 (no change).
    - contrast_factor (float, optional): Factor to adjust the contrast of the images. Defaults to 1 (no change).
    """

    # Construct input and output directories based on provided path, condition, and subcondition
    input_dir = f"{data_path}{condition}/{subcondition}/piv_movie/"
    output_dir = f"{data_path}{condition}/{subcondition}/piv_movie_converted/"

    # Create the output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)

    # Gather all JPEG images from the input directory
    input_files = natsorted(glob.glob(os.path.join(input_dir, '*.jpg')))

    # Limit the processing to max_frame if specified
    input_files = input_files[:max_frame] if max_frame is not None else input_files

    # Check if the output directory already has the converted files
    output_files = natsorted(glob.glob(os.path.join(output_dir, '*.tif')))
    if len(input_files) == len(output_files):
        print(f"Conversion already completed for {output_dir}. Skipping...")
        return

    # Prepare for filename formatting
    num_digits = len(str(len(input_files)))

    # Process each image
    for i, file_name in enumerate(input_files):
        # Open and convert image to grayscale
        image = Image.open(file_name).convert("L")

        # Resize image to 2048x2048 pixels
        image_resized = image.resize((2048, 2048), Image.LANCZOS)

        # Adjust brightness and contrast
        enhancer = ImageEnhance.Brightness(image_resized)
        image_brightened = enhancer.enhance(brightness_factor)
        enhancer = ImageEnhance.Contrast(image_brightened)
        image_contrasted = enhancer.enhance(contrast_factor)

        # Prepare the filename and save the processed image
        padded_index = str(i + 1).zfill(num_digits)
        base_file_name = f'converted_image_{padded_index}.tif'
        processed_image_path = os.path.join(output_dir, base_file_name)
        image_contrasted.save(processed_image_path, format='TIFF', compression='tiff_lzw')



def overlay_heatmap_on_image(image_file, df, heatmap_data, feature, vmin, vmax, time_in_minutes, output_dir=None):
    """
    Overlays a heatmap on an image and either saves or displays the combined visualization.

    Args:
    - image_file (str): Path to the image file.
    - df (DataFrame): The DataFrame containing the PIV data.
    - heatmap_data (np.array): Data for the heatmap.
    - feature (str): Name of the feature for which the heatmap is generated.
    - vmin (float): Minimum value for colormap scaling.
    - vmax (float): Maximum value for colormap scaling.
    - time_in_minutes (float): Time in minutes for the current frame.
    - output_dir (str, optional): Directory to save the plot. If None, the plot is displayed.

    The function loads the image, applies the heatmap on top with specified parameters, and either
    saves or displays the combined image, based on the provided output directory.
    """

    # Load the image
    image = Image.open(image_file)

    # Create a plot to overlay the heatmap on the image
    plt.figure(figsize=(10, 6))
    plt.imshow(image, cmap=None, extent=[-2762/2, 2762/2, -2762/2, 2762/2])  # Display the image
    im = plt.imshow(heatmap_data, cmap='inferno', origin='lower', alpha=0.7, extent=[-2762/2, 2762/2, -2762/2, 2762/2], vmin=vmin, vmax=vmax)  # Overlay the heatmap
    plt.xlabel('x [um]')
    plt.ylabel('y [um]')
    cbar = plt.colorbar(im)
    cbar.set_label(feature)
    plt.title(f'PIV Heatmap - {df["file name"][0]} || Time: {time_in_minutes:.2f} min')

    # Save or show the plot
    if output_dir:
        os.makedirs(os.path.dirname(output_dir), exist_ok=True)
        plt.savefig(output_dir, format='jpg', dpi=250)
        plt.close()
    else:
        plt.show()



def piv_heatmap(df, feature, vmin, vmax, time_in_minutes, output_dir=None, image_file=None):
    """
    Generates a heatmap for a specific feature from PIV data and optionally overlays it on an image.

    Args:
    - df (DataFrame): The DataFrame containing the PIV data.
    - feature (str): The feature for which to generate the heatmap.
    - vmin (float): Minimum value for colormap scaling.
    - vmax (float): Maximum value for colormap scaling.
    - time_in_minutes (float): Time in minutes for the current frame.
    - output_dir (str, optional): Directory to save the plot. If None, the plot is displayed.
    - image_file (str, optional): Path to an image file on which to overlay the heatmap.

    The function creates a heatmap from the provided DataFrame and feature. If an image file is provided,
    the heatmap is overlaid on the image; otherwise, a standalone heatmap is generated.
    """

    # Extract values for the heatmap
    vals = df.pivot(index='y [um]', columns='x [um]', values=feature).values

    if image_file:
        # Overlay the heatmap on the image if an image file is provided
        overlay_heatmap_on_image(image_file, df, vals, feature, vmin, vmax, time_in_minutes, output_dir)
    else:
        # Generate a standalone heatmap
        plt.figure(figsize=(10, 6))
        im = plt.imshow(vals, cmap='viridis', origin='lower', extent=[-2762/2, 2762/2, -2762/2, 2762/2], vmin=vmin, vmax=vmax)
        plt.xlabel('x [um]')
        plt.ylabel('y [um]')
        cbar = plt.colorbar(im)
        cbar.set_label(feature)
        plt.title(f'PIV Heatmap - {df["file name"][0]} || Time: {time_in_minutes:.2f} min')

        if output_dir:
            os.makedirs(os.path.dirname(output_dir), exist_ok=True)
            plt.savefig(output_dir, format='jpg', dpi=250)
            plt.close()
        else:
            plt.show()



def generate_heatmaps_for_features(data_path, condition, subcondition, feature_limits, dfs):
    """
    Automates the generation of heatmaps for multiple features across different data frames.

    Args:
    - data_path (str): Base directory where PIV data and images are stored.
    - condition (str): Specific condition defining a subdirectory within the data path.
    - subcondition (str): Specific subcondition defining a sub-subdirectory within the condition directory.
    - feature_limits (dict): Dictionary mapping features to their corresponding value limits (vmin, vmax).
    - dfs (List[DataFrame]): List of DataFrames containing PIV data.

    This function iterates over each DataFrame in 'dfs', generating heatmaps for each feature specified
    in 'feature_limits'. The heatmaps are either saved or displayed based on the provided output directory.
    """

    # Retrieve the list of converted image files
    image_files_pattern = f"{data_path}{condition}/{subcondition}/piv_movie_converted/converted_image_***.tif"
    image_files = sorted(glob.glob(image_files_pattern))

    # Iterate over each DataFrame and feature to generate heatmaps
    for i, df in enumerate(dfs):
        for feature, limits in feature_limits.items():
            vmin, vmax = limits
            feature_name_for_file = re.sub(r"\s*\[.*?\]\s*", "", feature).replace(" ", "_").lower()
            heatmap_output = f"{data_path}{condition}/{subcondition}/plots/{feature_name_for_file}/heatmap_{i}.jpg"
            piv_heatmap(df, feature, vmin=vmin, vmax=vmax, time_in_minutes=i * 180/60, image_file=image_files[i], output_dir=heatmap_output)


def create_heatmap_movies(data_path, condition, subcondition, feature_limits, frame_rate=120, max_frame=None):
    """
    Creates heatmap video files from heatmap images stored in a specified directory.

    Args:
    - data_path (str): Base path where the heatmap images are stored.
    - condition (str): Condition under which the heatmap images are stored.
    - subcondition (str): Subcondition under which the heatmap images are stored.
    - feature_limits (dict): Dictionary specifying the limits for each feature.
    - frame_rate (int, optional): Frame rate for the output video. Defaults to 120.
    - max_frame (int, optional): Maximum number of frames to be included in the video. If None, all frames are included.

    The function reads heatmap images from the specified directory and creates a video file for each feature.
    """

    plots_dir = f"{data_path}{condition}/{subcondition}/plots/"
    for feature in feature_limits.keys():
        feature_name_for_file = re.sub(r"\s*\[.*?\]\s*", "", feature).replace(" ", "_").lower()
        heatmap_dir = f"{data_path}{condition}/{subcondition}/plots/{feature_name_for_file}/"
        heatmap_files = natsorted(glob.glob(f"{heatmap_dir}heatmap_*.jpg"))

        if not heatmap_files:
            continue

        # Limit the number of files if max_frame is specified
        heatmap_files = heatmap_files[:max_frame] if max_frame is not None else heatmap_files

        # Get the resolution of the first image (assuming all images are the same size)
        first_image = cv2.imread(heatmap_files[0])
        video_resolution = (first_image.shape[1], first_image.shape[0])  # Width x Height

        # Define the codec and create VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(f'{plots_dir}{feature_name_for_file}.avi', fourcc, frame_rate, video_resolution)

        for file in heatmap_files:
            img = cv2.imread(file)
            out.write(img)  # Write the image as is, without resizing

        out.release()




def calculate_mean_over_time(dfs, seconds_interval):
    """
    Calculates the mean of specific columns over time from a list of DataFrames.

    Args:
    - dfs (List[DataFrame]): List of DataFrames to process.
    - seconds_interval (int): Interval in seconds between each DataFrame in the list.

    Returns:
    - DataFrame: A DataFrame containing the mean values of specific columns from each DataFrame,
      along with a corresponding time column in minutes.
    """

    means_list = []

    # Iterate over each DataFrame, calculating mean for specific columns
    for df in dfs:
        # Selecting specific columns and calculating the mean
        means = df.iloc[:, 5:8].join(df.iloc[:, 11:17]).mean(axis=0)
        means_list.append(means)

    # Concatenate all Series in the list into a single DataFrame
    result_df = pd.concat(means_list, axis=1).T

    # Reset index and convert index to time in minutes
    result_df = result_df.reset_index().rename(columns={'index': 'time [min]'})
    result_df['time [min]'] = result_df['time [min]'] * seconds_interval / 60

    return result_df




def plot_mean_over_time(df, feature, output_dir=None, sigma=1):
    """
    Plots a specific feature over time from a DataFrame with a Gaussian filter applied.

    Args:
    - df (DataFrame): DataFrame containing the mean values of specific columns over time.
    - feature (str): The feature to plot.
    - output_dir (str, optional): Directory to save the plot. If None, the plot is displayed.
    - sigma (float, optional): Standard deviation of the Gaussian filter.
    """

    # Apply Gaussian filter to the feature values
    filtered_values = gaussian_filter(df[feature], sigma=sigma)

    # Create the plot
    plt.figure(figsize=(10, 6))
    plt.plot(df['time [min]'], filtered_values)
    plt.xlabel('Time [min]')
    plt.ylabel(feature)
    plt.title(f'{feature} over time')

    # Save or show the plot
    if output_dir:
        os.makedirs(os.path.dirname(output_dir), exist_ok=True)
        plt.savefig(output_dir, format='jpg', dpi=250)
        plt.close()
    else:
        plt.show()


def plot_mean_over_time_dfs(df, data_path, condition, subcondition, sigma=0):
    """
    Plots specific features over time from a list of DataFrames with a Gaussian filter applied.

    Args:
    - df (DataFrame): DataFrame containing the mean values of specific columns over time.
    - data_path (str): Base directory for saving the plots.
    - condition (str): Condition label to be included in the plot directory.
    - subcondition (str): Subcondition label to be included in the plot directory.
    - sigma (float, optional): Standard deviation of the Gaussian filter. Default is 0.
    """

    for feature in df.columns[1:]:  # Assuming the first column is 'time [min]'
        output_dir = os.path.join(data_path, condition, subcondition, "plots", "features_vs_time", f"{feature.split()[0]}.jpg")
        plot_mean_over_time(df, feature, output_dir, sigma)



def perform_pca_and_plot(dataframe, data_path, condition, subcondition):
    """
    Performs Principal Component Analysis (PCA) on the provided DataFrame and plots the first two principal components.
    Optionally, it can save the plot in the specified output directory.

    Args:
    - dataframe (DataFrame): The DataFrame to perform PCA on.
    - data_path (str): Base path for the output directory.
    - condition (str): Condition label to be included in the plot title.
    - subcondition (str): Subcondition label to be included in the plot title.

    The function applies PCA to reduce the dimensionality of the data to two principal components and then plots these components.
    The plot is titled with the provided condition and subcondition, and saved in the specified output directory.
    """

    output_dir = f"{data_path}{condition}/{subcondition}/plots/"

    # Perform PCA on the DataFrame
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(dataframe.iloc[:, 2:])
    principalDf = pd.DataFrame(data=principalComponents, columns=['principal component 1', 'principal component 2'])

    # Plot the PCA results
    plt.figure(figsize=(10, 6))
    plt.scatter(principalDf['principal component 1'], principalDf['principal component 2'])
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.title(f'PCA of PIV Data - Condition: {condition}, Subcondition: {subcondition}')

    # Save the plot
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    plt.savefig(os.path.join(output_dir, "PCA.jpg"), format='jpg', dpi=250)
    plt.close()



def process_piv_data(data_path, condition, subcondition, max_frame, feature_limits, frame_rate=120):
    """
    Processes PIV data, converts images, generates heatmaps for features, and creates heatmap movies.

    Args:
    - data_path (str): Base path where the PIV data files and images are stored.
    - condition (str): Specific condition defining a subdirectory within the data path.
    - subcondition (str): Specific subcondition defining a sub-subdirectory within the condition directory.
    - max_frame (int): Maximum number of frames/files to process.
    - feature_limits (dict): Dictionary specifying the limits for each feature.
    - frame_rate (int, optional): Frame rate for the output video. Defaults to 120.
    """

    # Process and save PIV files
    process_and_save_piv_files(data_path, condition, subcondition, max_frame=max_frame, save_csv=True)

    # Load all CSV files into a list of dataframes
    saved_processed_dfs = f"{data_path}{condition}/{subcondition}/dataframes_PIV/PIV_dataframe_*.csv"
    dfs = [pd.read_csv(file) for file in sorted(glob.glob(saved_processed_dfs))]

    # Convert images
    convert_images(data_path, condition, subcondition, max_frame=max_frame)

    # Generate heatmaps for features
    generate_heatmaps_for_features(data_path, condition, subcondition, feature_limits, dfs)

    # Create heatmap movies
    create_heatmap_movies(data_path, condition, subcondition, feature_limits, max_frame=max_frame, frame_rate=frame_rate)

    return dfs


In [3]:
# Example Usage
data_path = "../../data/02-ActiveDROPSfig2-K401_Kif3/"
condition = 'k401'
subcondition = 'rep1'
max_frame = 10
frame_rate=120
seconds_interval=180

# velocity_limits = (0, 3)
# other_limits = (-0.05, 0.05)

velocity_limits = (None, None)
other_limits = (None, None)

feature_limits = {
    'magnitude [um/s]': velocity_limits,
    'vorticity [1/s]': other_limits,
    'divergence [1/s]': other_limits,
    'dcev [1]': (0, 250),
    'simple shear [1/s]': other_limits,
    'simple strain [1/s]': other_limits,
    'vector direction [degrees]': (-180, 180),
}

df_k401 = process_piv_data(data_path, condition, subcondition, max_frame, feature_limits, frame_rate=frame_rate)
df_k401

Conversion already completed for ../../data/02-ActiveDROPSfig2-K401_Kif3/k401/rep1/piv_movie_converted/. Skipping...


[            x [um]       y [um]  u [um/s]  v [um/s]  magnitude [um/s]  \
 0        23.287672    23.287672       0.0       0.0               0.0   
 1        23.287672    45.205481       0.0       0.0               0.0   
 2        23.287672    67.123285       0.0       0.0               0.0   
 3        23.287672    89.041096       0.0       0.0               0.0   
 4        23.287672   110.958907       0.0       0.0               0.0   
 ...            ...          ...       ...       ...               ...   
 16124  2784.931567  2697.260352       0.0       0.0               0.0   
 16125  2784.931567  2719.178097       0.0       0.0               0.0   
 16126  2784.931567  2741.095843       0.0       0.0               0.0   
 16127  2784.931567  2763.013588       0.0       0.0               0.0   
 16128  2784.931567  2784.931567       0.0       0.0               0.0   
 
        correlation length (µm)     Power (W)  mean velocity [um/s]  \
 0                   584.365695  1.2790

In [4]:
calculated_df_k401 = calculate_mean_over_time(df_k401, seconds_interval=seconds_interval)
calculated_df_k401

Unnamed: 0,time [min],correlation length (µm),Power (W),mean velocity [um/s],magnitude [m/s],divergence [1/s],dcev [1],simple shear [1/s],simple strain [1/s],vector direction [degrees]
0,0.0,584.365695,1.279096e-19,0.147782,4.595908e-08,3.2e-05,0.812363,-2.826084e-07,4.848941e-07,-21.614065
1,3.0,695.832591,1.8105649999999998e-20,0.066206,2.956259e-08,5.2e-05,2.937912,-3.453804e-06,-2.89827e-06,-23.990815
2,6.0,697.425771,7.792045e-21,0.043532,2.11196e-08,4.4e-05,3.091198,-2.055994e-06,-4.805635e-06,-23.361984
3,9.0,676.195683,4.8788419999999996e-21,0.033398,1.655491e-08,3.2e-05,3.168687,-2.694821e-06,-2.932018e-06,-24.81521
4,12.0,655.462042,3.072747e-21,0.025692,1.287222e-08,2.2e-05,4.519826,2.145324e-07,-1.11829e-06,-22.020229
5,15.0,738.255731,1.238132e-21,0.018369,9.264572e-09,1.5e-05,2.626691,1.354159e-07,-1.733756e-06,-20.234477
6,18.0,727.661201,8.643151e-22,0.015127,7.663339e-09,1.1e-05,3.748872,-2.487166e-08,-3.393816e-06,-29.264546
7,21.0,820.712839,2.55042e-22,0.009268,4.710668e-09,6e-06,2.540194,-3.22554e-08,-1.40638e-06,-19.768872
8,24.0,841.503196,1.662301e-22,0.007672,3.907471e-09,4e-06,2.049815,1.842483e-07,-1.670069e-06,-21.61366
9,27.0,868.466097,9.072494e-23,0.005849,2.994443e-09,3e-06,1.991725,1.045573e-07,-5.799651e-07,-7.958818


In [5]:
plot_mean_over_time_dfs(calculated_df_k401, data_path, condition, subcondition, sigma=1)

In [6]:
perform_pca_and_plot(calculated_df_k401, data_path, condition, subcondition)


In [7]:
# Example Usage
data_path = "../../data/02-ActiveDROPSfig2-K401_Kif3/"
condition = 'kif3'
subcondition = 'rep1'
# max_frame = 100
frame_rate=120
seconds_interval=3

# velocity_limits = (0, 3)
# other_limits = (-0.05, 0.05)

velocity_limits = (None, None)
other_limits = (None, None)

feature_limits = {
    'magnitude [um/s]': velocity_limits,
    'vorticity [1/s]': other_limits,
    'divergence [1/s]': other_limits,
    'dcev [1]': (0, 250),
    'simple shear [1/s]': other_limits,
    'simple strain [1/s]': other_limits,
    'vector direction [degrees]': (-180, 180),
}

df_kif3 = process_piv_data(data_path, condition, subcondition, max_frame, feature_limits, frame_rate=frame_rate)
df_kif3

Conversion already completed for ../../data/02-ActiveDROPSfig2-K401_Kif3/kif3/rep1/piv_movie_converted/. Skipping...


[            x [um]       y [um]  u [um/s]  v [um/s]  magnitude [um/s]  \
 0        23.287672    23.287672       0.0       0.0               0.0   
 1        23.287672    45.205481       0.0       0.0               0.0   
 2        23.287672    67.123285       0.0       0.0               0.0   
 3        23.287672    89.041096       0.0       0.0               0.0   
 4        23.287672   110.958907       0.0       0.0               0.0   
 ...            ...          ...       ...       ...               ...   
 16124  2784.931567  2697.260352       0.0       0.0               0.0   
 16125  2784.931567  2719.178097       0.0       0.0               0.0   
 16126  2784.931567  2741.095843       0.0       0.0               0.0   
 16127  2784.931567  2763.013588       0.0       0.0               0.0   
 16128  2784.931567  2784.931567       0.0       0.0               0.0   
 
        correlation length (µm)     Power (W)  mean velocity [um/s]  \
 0                   624.230036  9.0639

In [8]:
calculated_df_kif3 = calculate_mean_over_time(df_kif3, seconds_interval=180)
calculated_df_kif3

Unnamed: 0,time [min],correlation length (µm),Power (W),mean velocity [um/s],magnitude [m/s],divergence [1/s],dcev [1],simple shear [1/s],simple strain [1/s],vector direction [degrees]
0,0.0,624.230036,9.063998e-19,0.420232,1.866802e-07,0.000276,3.527272,5.1e-05,0.000139,22.300495
1,3.0,742.307574,5.067127e-19,0.373637,1.662822e-07,0.000211,3.513746,0.000109,1.9e-05,24.978964
2,6.0,732.723514,5.425147999999999e-19,0.38162,1.70876e-07,0.000209,3.145665,7.9e-05,-3e-05,29.019467
3,9.0,700.582171,6.890765e-19,0.411223,1.842589e-07,0.000223,3.366418,0.000122,5.5e-05,31.457099
4,12.0,709.585372,6.300487e-19,0.398269,1.793434e-07,0.000209,2.460078,0.00012,0.000101,34.862073
5,15.0,729.42111,6.194782999999999e-19,0.405954,1.834581e-07,0.000163,2.912725,0.000109,3.5e-05,33.893235
6,18.0,740.744113,6.219272999999999e-19,0.413069,1.868531e-07,0.000174,3.164158,7.6e-05,-1.7e-05,37.401712
7,21.0,752.691711,4.112442e-19,0.341312,1.547746e-07,0.000138,1.812872,8.3e-05,6e-06,33.551946
8,24.0,735.365705,4.53031e-19,0.349987,1.590556e-07,0.000139,3.257732,6.6e-05,2.4e-05,35.670133
9,27.0,745.582489,3.1860089999999995e-19,0.29758,1.356999e-07,0.000148,2.978333,7.1e-05,1.5e-05,33.117855


In [9]:
plot_mean_over_time_dfs(calculated_df_kif3, data_path, condition, subcondition, sigma=1)

perform_pca_and_plot(calculated_df_kif3, data_path, condition, subcondition)


In [11]:

def plot_mean_over_time_dfs(calculated_dfs, data_path, condition, subcondition, sigma=1):
    """
    Plots specific features over time from multiple calculated DataFrames with a Gaussian filter applied.

    Args:
    - calculated_dfs (List[DataFrame]): List of calculated DataFrames containing the mean values of specific columns over time.
    - data_path (str): Base directory for saving the plots.
    - condition (str): Condition label to be included in the plot directory.
    - subcondition (str): Subcondition label to be included in the plot directory.
    - sigma (float, optional): Standard deviation of the Gaussian filter. Default is 1.
    """
    features = calculated_dfs[0].columns[1:]  # Assuming the first column is 'time [min]'

    for feature in features:
        plt.figure(figsize=(10, 6))
        for i, df in enumerate(calculated_dfs):
            filtered_values = gaussian_filter(df[feature], sigma=sigma)
            plt.plot(df['time [min]'], filtered_values, label=f'Sample_{i+1}')
        
        plt.xlabel('Time [min]')
        plt.ylabel(feature)
        plt.title(f'{feature} over time (all samples)')
        plt.legend()
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)

        output_dir = os.path.join(data_path, condition, subcondition, "plots", "combined_features", f"{feature.split()[0]}.jpg")
        os.makedirs(os.path.dirname(output_dir), exist_ok=True)
        plt.savefig(output_dir, format='jpg', dpi=250)
        plt.close()

def perform_pca_and_plot(calculated_dfs, data_path, condition, subcondition):
    """
    Performs PCA on concatenated calculated DataFrames and plots the first two principal components.

    Args:
    - calculated_dfs (List[DataFrame]): List of calculated DataFrames.
    - data_path (str): Base path for the output directory.
    - condition (str): Condition label to be included in the plot title.
    - subcondition (str): Subcondition label to be included in the plot title.
    """
    combined_df = pd.concat(calculated_dfs)
    combined_df = combined_df.drop('time [min]', axis=1)  # Drop time column for PCA

    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(combined_df)
    principalDf = pd.DataFrame(data=principalComponents, columns=['principal component 1', 'principal component 2'])

    plt.figure(figsize=(10, 6))
    plt.scatter(principalDf['principal component 1'], principalDf['principal component 2'])
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.title(f'PCA of Combined Data - Condition: {condition}, Subcondition: {subcondition}')

    # Construct the output path with the filename
    output_file_name = "PCA_combined.jpg"
    output_dir = os.path.join(data_path, condition, subcondition, "plots", output_file_name)

    # Print the output directory for debugging
    print("Saving plot to:", output_dir)

    # Check if the directory exists, if not, create it
    if not os.path.exists(os.path.dirname(output_dir)):
        os.makedirs(os.path.dirname(output_dir))

    # Save the plot
    plt.savefig(output_dir, format='jpg', dpi=250)
    plt.close()

calculated_df1 = calculate_mean_over_time(df_k401, seconds_interval=1)
calculated_df2 = calculate_mean_over_time(df_kif3, seconds_interval=1)
plot_mean_over_time_dfs([calculated_df1, calculated_df2], data_path, condition, subcondition, sigma=1)
perform_pca_and_plot([calculated_df1, calculated_df2], data_path, condition, subcondition)


Saving plot to: ../../data/02-ActiveDROPSfig2-K401_Kif3/kif3/rep1/plots/PCA_combined.jpg
