In [1]:
import os
import pandas as pd
## Import relevant libraries
import sys
import glob

sys.path.append('../../py_files/')
import quadrop as qd

# qd.set_plotting_style()

In [2]:
def concatenate_merged_expression_piv(data_paths, output_save_folder=None):
    """
    Concatenate the merged_expression_PIV.csv files from multiple data paths into one file.

    Parameters:
    - data_paths: List of paths where the merged_expression_PIV.csv files are located.
    - output_folder: Folder where the merged_expression_PIV.csv files are stored.
    - merged_file_name: Name of the merged expression PIV file to read.
    - output_file_name: Name of the output concatenated file to save.
    - output_save_folder: Folder where the concatenated file will be saved. If None, save in the first data path's output folder.
    
    The concatenated file will be saved in the specified `output_save_folder`, or the first path in `data_paths` if not provided.
    """
    
    output_folder="output_data"
    merged_file_name="merged_expression_PIV.csv"
    output_file_name="merged_expression_PIV.csv"
    
    # List to store DataFrames from each path
    dataframes = []

    # Loop over each data path
    for path in data_paths:
        merged_file_path = os.path.join(path, output_folder, merged_file_name)
        
        # Check if the merged file exists
        if os.path.exists(merged_file_path):
            # Read the merged_expression_PIV.csv file
            df = pd.read_csv(merged_file_path)
            dataframes.append(df)
            print(f"Loaded data from: {merged_file_path}")
        else:
            print(f"File not found: {merged_file_path}")

    if not dataframes:
        print("No valid files found to concatenate.")
        return

    # Concatenate all DataFrames
    concatenated_df = pd.concat(dataframes, ignore_index=True)

    # Determine the output directory
    if output_save_folder is None:
        output_save_folder = os.path.join(data_paths[0], output_folder)
    
    os.makedirs(output_save_folder, exist_ok=True)  # Ensure the output directory exists
    
    # Save the concatenated DataFrame to the output directory
    output_file_path = os.path.join(output_save_folder, output_folder, output_file_name)
    concatenated_df.to_csv(output_file_path, index=False)

    conditions = list(concatenated_df['condition'].unique())
    columns = list(concatenated_df.columns)

    print(f"Concatenated DataFrame saved to: {output_file_path}")

    return conditions, columns
    


k401 = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/101324-k401-titration-rt/2p5TMB-1ulDNA_/"
kif3 = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/100624-kif3-titration-RT/2p5ulTMB-1ulDNAXnM_/"
chimeraD = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/110324-D_titration-RT/2p5TMB-1ulDNA_1/"
chimerasCEG = "../../../../Thomson Lab Dropbox/David Larios/activedrops/ubuntu/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/"

output_save_folder = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/"

# Example usage of the function:
conditions, columns = concatenate_merged_expression_piv(
    data_paths=[k401, kif3, chimeraD, chimerasCEG],
    output_save_folder = output_save_folder
)


Loaded data from: ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/101324-k401-titration-rt/2p5TMB-1ulDNA_/output_data/merged_expression_PIV.csv
Loaded data from: ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/100624-kif3-titration-RT/2p5ulTMB-1ulDNAXnM_/output_data/merged_expression_PIV.csv
Loaded data from: ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/110324-D_titration-RT/2p5TMB-1ulDNA_1/output_data/merged_expression_PIV.csv
Loaded data from: ../../../../Thomson Lab Dropbox/David Larios/activedrops/ubuntu/111624-C-E-G-RT/2p5ulTMB-0p5MT-1ulDNA_/output_data/merged_expression_PIV.csv
Concatenated DataFrame saved to: ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/output_data/merged_expression_PIV.csv


In [3]:
conditions

['K401_0p625nM-RT',
 'K401_1p25nM-RT',
 'K401_2p5nM-RT',
 'K401_5nM-RT',
 'K401_10nM-RT',
 'K401_20nM-RT',
 'K401_40nM-RT',
 'K401_80nM-RT',
 'K401_160nM-RT',
 'negative',
 'Kif3_0p625nM_1-RT',
 'Kif3_0p625nM_2-RT',
 'Kif3_1p25nM_1-RT',
 'Kif3_1p25nM_2-RT',
 'Kif3_2p5nM_1-RT',
 'Kif3_2p5nM_2-RT',
 'Kif3_5nM_1-RT',
 'Kif3_5nM_2-RT',
 'Kif3_10nM_1-RT',
 'Kif3_10nM_2-RT',
 'Kif3_20nM_1-RT',
 'Kif3_20nM_2-RT',
 'Kif3_40nM_1-RT',
 'Kif3_40nM_2-RT',
 'Kif3_80nM_1-RT',
 'Kif3_80nM_2-RT',
 'Kif3_160nM_1-RT',
 'Kif3_160nM_2-RT',
 'D_160nM',
 'D_80nM',
 'D_40nM',
 'D_20nM',
 'D_10nM',
 'D_5nM',
 'D_2p5nM',
 'D_1p25nM',
 'D_0p625nM',
 'D_0p312nM',
 'D_0p156nM',
 'C_1p25nM',
 'C_2p5nM',
 'C_5nM',
 'C_10nM',
 'C_20nM',
 'C_40nM',
 'C_80nM',
 'C_160nM',
 'E_1p25nM',
 'E_2p5nM',
 'E_5nM',
 'E_10nM',
 'E_20nM',
 'E_40nM',
 'E_80nM',
 'E_160nM',
 'G_1p25nM',
 'G_2p5nM',
 'G_5nM',
 'G_10nM',
 'G_20nM',
 'G_40nM',
 'G_80nM',
 'G_160nM']

In [4]:
x_column = "time (h)"  # Example x-axis column
y_column = "Protein Concentration_nM"  # Example y-axis column

# x_column = "Protein Concentration_nM"  # Example y-axis column
# y_column = "velocity magnitude [m/s]_mean"  # Example y-axis column

# x_column = "time (h)"  # Example x-axis column
# y_column = "velocity magnitude [m/s]_mean"  # Example y-axis column

# x_column = "time (h)"  # Example x-axis column
# y_column = "power [W]_mean"  # Example y-axis column

# x_column = "time (h)"  # Example x-axis column
# y_column = "work [J]_mean"  # Example y-axis column

# x_column = "time (h)"  # Example x-axis column
# y_column = "distance [m]_mean"  # Example y-axis column

output_save_folder = "../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/"


df = qd.plot_expression_piv(
    output_save_folder,
    conditions,
    x_column, 
    y_column, 
    sigma_x=0.1, 
    sigma_y=10, 
    x_log=False, 
    y_log=False, 
    min_frame=0, 
    max_frame=None, 
    individual_plots=False
    )

No data available for condition: K401_0p625nM-RT
No data available for condition: K401_1p25nM-RT
No data available for condition: K401_2p5nM-RT
No data available for condition: K401_5nM-RT
No data available for condition: K401_10nM-RT
No data available for condition: negative
No data available for condition: Kif3_0p625nM_1-RT
No data available for condition: Kif3_0p625nM_2-RT
No data available for condition: D_0p312nM
No data available for condition: D_0p156nM
No data available for condition: E_1p25nM
No data available for condition: E_2p5nM
No data available for condition: E_5nM
No data available for condition: E_10nM
No data available for condition: E_20nM
No data available for condition: E_40nM
No data available for condition: E_80nM
No data available for condition: E_160nM
Combined plot saved at ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/output_data/expression_piv_plots/time_(h)_vs_Protein_Concentration_nM_All_Conditions.png


In [6]:
df['condition'].unique() 

array(['K401_20nM-RT', 'K401_40nM-RT', 'K401_80nM-RT', 'K401_160nM-RT',
       'Kif3_1p25nM_1-RT', 'Kif3_1p25nM_2-RT', 'Kif3_2p5nM_1-RT',
       'Kif3_2p5nM_2-RT', 'Kif3_5nM_1-RT', 'Kif3_5nM_2-RT',
       'Kif3_10nM_1-RT', 'Kif3_10nM_2-RT', 'Kif3_20nM_1-RT',
       'Kif3_20nM_2-RT', 'Kif3_40nM_1-RT', 'Kif3_40nM_2-RT',
       'Kif3_80nM_1-RT', 'Kif3_80nM_2-RT', 'Kif3_160nM_1-RT',
       'Kif3_160nM_2-RT', 'D_160nM', 'D_80nM', 'D_40nM', 'D_20nM',
       'D_10nM', 'D_5nM', 'D_2p5nM', 'D_1p25nM', 'D_0p625nM', 'C_1p25nM',
       'C_2p5nM', 'C_5nM', 'C_10nM', 'C_20nM', 'C_40nM', 'C_80nM',
       'C_160nM', 'G_1p25nM', 'G_2p5nM', 'G_5nM', 'G_10nM', 'G_20nM',
       'G_40nM', 'G_80nM', 'G_160nM'], dtype=object)

In [None]:
df['condition'].unique() 

In [10]:
y_columns = [
    "Protein Concentration_nM", 
    "velocity magnitude [m/s]_mean", 
    "distance [m]_mean",
    "Rate of Change of Protein Molecules per Second",
    "Translation Rate aa_s",
    "correlation length [m]_mean"
    ]

for y_column in y_columns:
    qd.plot_expression_piv(
        output_save_folder,
        conditions,
        x_column, 
        y_column, 
        sigma_x=0.1, 
        sigma_y=10, 
        x_log=False, 
        y_log=False, 
        min_frame=0, 
        max_frame=None, 
        individual_plots=False
        )
    

# y_columns = [
#     "power [W]_mean", 
#     "work [J]_mean", 
#     # "Number of Protein Molecules",
#     ]

# for y_column in y_columns:
#     qd.plot_expression_piv(
#         output_save_folder,
#         ["H-RT", "H-29C",],
#         x_column, 
#         y_column, 
#         sigma_x=0.1, 
#         sigma_y=10, 
#         x_log=False, 
#         y_log=True, 
#         min_frame=0, 
#         max_frame=None, 
#         individual_plots=False
#         )

No data available for condition: K401_0p625nM-RT
No data available for condition: K401_1p25nM-RT
No data available for condition: K401_2p5nM-RT
No data available for condition: K401_5nM-RT
No data available for condition: K401_10nM-RT
No data available for condition: negative
No data available for condition: Kif3_0p625nM_1-RT
No data available for condition: Kif3_0p625nM_2-RT
No data available for condition: D_0p312nM
No data available for condition: D_0p156nM
No data available for condition: E_1p25nM
No data available for condition: E_2p5nM
No data available for condition: E_5nM
No data available for condition: E_10nM
No data available for condition: E_20nM
No data available for condition: E_40nM
No data available for condition: E_80nM
No data available for condition: E_160nM
Combined plot saved at ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/output_data/expression_piv_plots/time_(h)_vs_Protein_Concentration_nM_All_Conditions.png
No data available for condition: K

In [8]:

# List of features for PCA
features_pca = [
    "vorticity [1/s]_mean",
    # "velocity magnitude [m/s]_mean",
    # "distance [m]_mean",
    "divergence [1/s]_mean",
    "shear [1/s]_mean",
    "strain [1/s]_mean",
    # "correlation length [m]_mean", 
    "power [W]_mean",
    # "work [J]_mean",
    'vector direction [degrees]_mean',
    "Protein Concentration_nM", 
]

# Run PCA and save plot (with all conditions and subconditions in the same plot)
qd.plot_pca_expression_piv(output_save_folder, conditions=conditions, subconditions=['Rep1'], features=features_pca, sigma=1)


Available columns in the DataFrame: Index(['condition', 'subcondition', 'time (s)', 'Time_min', 'Time_h',
       'Mean Intensity', 'Protein Concentration_ng_ul',
       'Protein Concentration_nM', 'Number of Protein Molecules',
       'Rate of Change of Protein Molecules per Second',
       'Translation Rate aa_s', 'Unnamed: 0', 'frame', 'x [m]_mean',
       'y [m]_mean', 'u [m/s]_mean', 'v [m/s]_mean', 'data type [-]_mean',
       'vorticity [1/s]_mean', 'velocity magnitude [m/s]_mean',
       'divergence [1/s]_mean', 'dcev [1]_mean', 'shear [1/s]_mean',
       'strain [1/s]_mean', 'vector direction [degrees]_mean',
       'correlation length [m]_mean', 'distance [m]_mean', 'power [W]_mean',
       'work [J]_mean', 'time (min)', 'time (h)'],
      dtype='object')
PCA plot with gradient lines saved at ../../../../Thomson Lab Dropbox/David Larios/activedrops/main/all/output_data/expression_piv_plots/PCA_plot_all_conditions_gradient.jpg
Individual PCA plot for K401_160nM-RT_Rep1 saved at

In [14]:
x = 1000

In [15]:
x

1000