In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter
import os
import glob
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.io import output_notebook
# Enable Bokeh to display plots in Jupyter Notebook
output_notebook()


This notebook shows the Principal Component Analysis of K401-biotin (classical assay). This was done with three different K401-biotin concentrations (125mM, 250mM, and 500 mM). The mixture consisted on 4µl energy mix, 1µl K401-biotin, 1µl 12mM GMP-cpp microtubules, 0.5µl 30% fluorobright YFP beads, 0.5µl 1mg/ml Streptavidin. 

The imaging data was taken in a confocal microscope with RFP and YFP channels. The RFP channel was used to track the microtubules and the YFP channel was used to track the beads. The images were taken every 3 seconds for 143 minutes. The imaging data was processed with PIVlab in Matlab. The data was then analyzed with the following notebook.

In [2]:
import pandas as pd
import glob
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool

def mean_values_PIV(file_pattern, seconds_interval):
    """
    This function takes a file pattern and returns a dataframe with the mean values of each feature for each file.
    Each file is a vector field of one frame, which captures features
    """

    # Get a list of all the txt files
    files = sorted(glob.glob(file_pattern))

    # Dataframe to store the mean values of each feature for each file
    mean_values_df = pd.DataFrame()

    # Iterating through each file
    for file in files:
        df = pd.read_csv(file, skiprows=2).fillna(0)
        mean_values = df.mean()
        mean_values_df = pd.concat([mean_values_df, mean_values.to_frame().T], ignore_index=True)

    # reset index and turn into a column named 'time' and convert to minutes
    mean_values_df = mean_values_df.reset_index().rename(columns={'index':'time [min]'})
    mean_values_df['time [min]'] = mean_values_df['time [min]'] * seconds_interval / 60

    return mean_values_df


def calculate_alpha(time, min_time, max_time):
    """Calculate alpha value based on time, ranging from 0.1 to 1."""
    normalized_time = (time - min_time) / (max_time - min_time)
    return 0.1 + 0.5 * normalized_time

def process_and_plot_pca(file_patterns, colors, legend_labels, seconds_intervals):
    output_notebook()
    p = figure(title='PCA of K401-Biotin. Velocity, Vorticity, Divergence, Simple Shear, Simple Strain',
               x_axis_label='Principal Component 1', y_axis_label='Principal Component 2',
               width=800, height=600)

    features = ['v [m/s]', 'vorticity [1/s]', 'magnitude [m/s]', 'divergence [1/s]', 'simple shear [1/s]', 'simple strain [1/s]']

    for file_pattern, color, label, seconds_interval in zip(file_patterns, colors, legend_labels, seconds_intervals):
        df = mean_values_PIV(file_pattern, seconds_interval)
        standardized_data = StandardScaler().fit_transform(df[features])
        pca = PCA(n_components=2)
        pca_result = pca.fit_transform(standardized_data)
        pca_df = pd.DataFrame(data=pca_result, columns=['PC1', 'PC2'])
        pca_df['time [min]'] = df['time [min]']

        # Calculate alpha values based on time
        min_time = pca_df['time [min]'].min()
        max_time = pca_df['time [min]'].max()
        alphas = pca_df['time [min]'].apply(lambda t: calculate_alpha(t, min_time, max_time))

        # Add alpha to ColumnDataSource
        source = ColumnDataSource(data=dict(PC1=pca_df['PC1'], PC2=pca_df['PC2'], time=pca_df['time [min]'], alpha=alphas))

        # Plot points with varying alpha
        p.circle('PC1', 'PC2', color=color, legend_label=label, size=2.5, alpha='alpha', source=source)

        hover = HoverTool(tooltips=[('Time [min]', '@time'), ('Alpha', '@alpha')])
        p.add_tools(hover)

    p.legend.click_policy = 'hide'
    show(p)

# Example usage
file_patterns = [
    "../../data/01-k401-biotin_strep/125nM/2ul/piv_data/PIVlab_****.txt",
    "../../data/01-k401-biotin_strep/250nM/2ul/piv_data/PIVlab_****.txt",
    "../../data/01-k401-biotin_strep/500nM/2ul/piv_data/PIVlab_****.txt"
]
colors = ['green', 'blue', 'red']
legend_labels = ['125mM', '250mM', '500mM']
seconds_intervals = [3, 3, 3]  # Assuming the same interval for simplicity

process_and_plot_pca(file_patterns, colors, legend_labels, seconds_intervals)

In [3]:
file_patterns = [
    "../../data/02-ActiveDROPSfig2-K401_Kif3/k401/rep1/piv_data/PIVlab_****.txt",
    "../../data/02-ActiveDROPSfig2-K401_Kif3/kif3/rep1/piv_data/PIVlab_****.txt",
]
colors = ['blue', 'orange']
legend_labels = ['k401', 'kif3']
seconds_intervals = [180, 3]  # Different seconds intervals for each file pattern

process_and_plot_pca(file_patterns, colors, legend_labels, seconds_intervals)
