In [1]:
import os
import pandas as pd
## Import relevant libraries
import sys
import glob

sys.path.append('../../py_files/')
import quadrop as qd

# qd.set_plotting_style()

In [None]:
def concatenate_merged_expression_piv(data_paths, output_save_folder=None):
    """
    Concatenate the merged_expression_PIV.csv files from multiple data paths into one file.

    Parameters:
    - data_paths: List of paths where the merged_expression_PIV.csv files are located.
    - output_folder: Folder where the merged_expression_PIV.csv files are stored.
    - merged_file_name: Name of the merged expression PIV file to read.
    - output_file_name: Name of the output concatenated file to save.
    - output_save_folder: Folder where the concatenated file will be saved. If None, save in the first data path's output folder.
    
    The concatenated file will be saved in the specified `output_save_folder`, or the first path in `data_paths` if not provided.
    """
    
    output_folder="output_data"
    merged_file_name="merged_expression_PIV.csv"
    output_file_name="merged_expression_PIV.csv"
    
    # List to store DataFrames from each path
    dataframes = []

    # Loop over each data path
    for path in data_paths:
        merged_file_path = os.path.join(path, output_folder, merged_file_name)
        
        # Check if the merged file exists
        if os.path.exists(merged_file_path):
            # Read the merged_expression_PIV.csv file
            df = pd.read_csv(merged_file_path)
            dataframes.append(df)
            print(f"Loaded data from: {merged_file_path}")
        else:
            print(f"File not found: {merged_file_path}")

    if not dataframes:
        print("No valid files found to concatenate.")
        return

    # Concatenate all DataFrames
    concatenated_df = pd.concat(dataframes, ignore_index=True)

    # Determine the output directory
    if output_save_folder is None:
        output_save_folder = os.path.join(data_paths[0], output_folder)
    
    os.makedirs(output_save_folder, exist_ok=True)  # Ensure the output directory exists
    
    # Save the concatenated DataFrame to the output directory
    output_file_path = os.path.join(output_save_folder, output_folder, output_file_name)
    concatenated_df.to_csv(output_file_path, index=False)

    conditions = list(concatenated_df['condition'].unique())
    columns = list(concatenated_df.columns)

    print(f"Concatenated DataFrame saved to: {output_file_path}")

    return conditions, columns
    


chimeras_RT = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/090124-chimeras100nM-RT/2p5ul_TMBmix-1ulDNA100nM_/"
chimeras_29C = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/090124-chimeras100nM-29C/2p5ul_TMBmix-1ulDNA100nM_"
burst = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/090824-burstMotors-RT_29C/2p5TMB-1ulDNA100nM9kif3-heal-neg-nagr_4/"
sustained_RT = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/091024-sustainedMotors-RT/2p5TMB-1ulDNA100nM_/"
sustained_29C = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/090824-sustainedMotors-29C/2p5TMB-1ulDNA100nM_2/"
 

output_save_folder = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/"

# Example usage of the function:
conditions, columns = concatenate_merged_expression_piv(
    data_paths=[chimeras_RT, chimeras_29C, burst, sustained_RT, sustained_29C],
    output_save_folder = output_save_folder
)


In [9]:
x_column = "time (h)"  # Example x-axis column
y_column = "Protein Concentration_nM"  # Example y-axis column

# x_column = "Protein Concentration_nM"  # Example y-axis column
# y_column = "velocity magnitude [m/s]_mean"  # Example y-axis column

# x_column = "time (h)"  # Example x-axis column
# y_column = "velocity magnitude [m/s]_mean"  # Example y-axis column

# x_column = "time (h)"  # Example x-axis column
# y_column = "power [W]_mean"  # Example y-axis column

# x_column = "time (h)"  # Example x-axis column
# y_column = "work [J]_mean"  # Example y-axis column

# x_column = "time (h)"  # Example x-axis column
# y_column = "distance [m]_mean"  # Example y-axis column

output_save_folder = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/"


df = qd.plot_expression_piv(
    output_save_folder,
    ['A-RT', 'C-RT', 'D-RT', 'E-RT', 'G-RT', 'H-RT'],
    x_column, 
    y_column, 
    sigma_x=0.1, 
    sigma_y=10, 
    x_log=False, 
    y_log=False, 
    min_frame=0, 
    max_frame=None, 
    individual_plots=False
    )

Combined plot saved at ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/output_data/expression_piv_plots/time_(h)_vs_distance_m_mean_All_Conditions.png


In [8]:
df['condition'].unique() 

Unnamed: 0,condition,subcondition,time (s),Time_min,Time_h,Mean Intensity,Protein Concentration_ng_ul,Protein Concentration_nM,Number of Protein Molecules,Rate of Change of Protein Molecules per Second,...,dcev [1]_mean,shear [1/s]_mean,strain [1/s]_mean,vector direction [degrees]_mean,correlation length [m]_mean,distance [m]_mean,power [W]_mean,work [J]_mean,time (min),time (h)
0,A-RT,Rep1,0,0.000000,0.000000,0.376219,0.013725,0.308437,3.701249e+08,,...,15.756792,3.077541e-07,-1.858536e-07,-49.269162,0.000407,0.000000,2.668220e-21,2.668220e-21,0.000000,0.000000
4,A-RT,Rep1,160,2.666667,0.044444,0.390446,0.014244,0.320101,3.841213e+08,,...,20.310783,-9.548041e-08,-1.719639e-07,6.307186,0.000401,0.000001,7.122923e-22,3.380512e-21,2.666667,0.044444
8,A-RT,Rep1,320,5.333333,0.088889,0.460589,0.016803,0.377607,4.531281e+08,,...,21.967326,2.490213e-07,-1.393348e-07,-3.702144,0.000396,0.000002,2.511426e-22,3.631655e-21,5.333333,0.088889
12,A-RT,Rep1,480,8.000000,0.133333,0.726785,0.026515,0.595844,7.150122e+08,1.019823e+06,...,20.717637,-2.388502e-08,-7.656110e-08,41.708252,0.000390,0.000002,1.505119e-22,3.782167e-21,8.000000,0.133333
16,A-RT,Rep1,640,10.666667,0.177778,0.898246,0.032770,0.736413,8.836956e+08,1.017033e+06,...,23.892987,-1.008724e-09,2.759452e-08,-75.608392,0.000384,0.000003,5.573512e-22,4.339518e-21,10.666667,0.177778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93335,TiLa-29C,Rep1,44775,746.250000,12.437500,187.609889,6.844500,68.445003,8.213400e+10,-4.780217e+05,...,18.218730,-2.027834e-07,1.674991e-08,-149.345272,0.000373,0.000515,5.780155e-22,1.281437e-17,746.250000,12.437500
93336,TiLa-29C,Rep1,44820,747.000000,12.450000,187.610163,6.844510,68.445103,8.213412e+10,-6.504494e+05,...,9.176167,3.021321e-07,-1.183941e-06,-33.481257,0.000375,0.000517,1.675946e-20,1.283113e-17,747.000000,12.450000
93337,TiLa-29C,Rep1,44865,747.750000,12.462500,187.450609,6.838689,68.386894,8.206427e+10,-7.647250e+05,...,13.086143,-5.320987e-07,5.731274e-08,121.374871,0.000378,0.000517,1.051549e-21,1.283218e-17,747.750000,12.462500
93338,TiLa-29C,Rep1,44910,748.500000,12.475000,186.787342,6.814492,68.144916,8.177390e+10,-5.726964e+05,...,9.385167,-1.143063e-06,2.398268e-06,149.178348,0.000380,0.000518,9.494846e-21,1.284168e-17,748.500000,12.475000


In [None]:
y_columns = [
    "Protein Concentration_nM", 
    "velocity magnitude [m/s]_mean", 
    "distance [m]_mean",
    "Rate of Change of Protein Molecules per Second",
    "Translation Rate aa_s",
    "correlation length [m]_mean"
    ]

for y_column in y_columns:
    qd.plot_expression_piv(
        output_save_folder,
        ["H-RT", "H-29C",],
        x_column, 
        y_column, 
        sigma_x=0.1, 
        sigma_y=10, 
        x_log=False, 
        y_log=False, 
        min_frame=0, 
        max_frame=None, 
        individual_plots=False
        )
    

y_columns = [
    "power [W]_mean", 
    "work [J]_mean", 
    # "Number of Protein Molecules",
    ]

for y_column in y_columns:
    qd.plot_expression_piv(
        output_save_folder,
        ["H-RT", "H-29C",],
        x_column, 
        y_column, 
        sigma_x=0.1, 
        sigma_y=10, 
        x_log=False, 
        y_log=True, 
        min_frame=0, 
        max_frame=None, 
        individual_plots=False
        )

In [None]:

# List of features for PCA
features_pca = [
    "vorticity [1/s]_mean",
    # "velocity magnitude [m/s]_mean",
    # "distance [m]_mean",
    "divergence [1/s]_mean",
    "shear [1/s]_mean",
    "strain [1/s]_mean",
    # "correlation length [m]_mean", 
    "power [W]_mean",
    # "work [J]_mean",
    'vector direction [degrees]_mean',
    "Protein Concentration_nM", 
]

# Run PCA and save plot (with all conditions and subconditions in the same plot)
qd.plot_pca_expression_piv(output_save_folder, conditions=conditions, subconditions=['Rep1'], features=features_pca, sigma=1)
