In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from collections import Counter
import ast 
import seaborn as sns
from scipy.optimize import curve_fit
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import statsmodels.api as sm
from sympy import symbols, integrate, lambdify, diff
import scipy.integrate as spi
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import f_oneway, kruskal
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import statsmodels.api as sm
from matplotlib.lines import Line2D
from matplotlib.ticker import FuncFormatter
from mpl_toolkits.axes_grid1 import make_axes_locatable


sns.set_palette("pastel")

### Normalize df

In [None]:
def scale_csv_data(file_path, columns_to_scale):
    data = pd.read_csv(file_path)
    scaler = MinMaxScaler()

    def scale_column(column):
        lists = column.apply(ast.literal_eval)
        flattened = np.concatenate(lists.values)
        scaled_flattened = scaler.fit_transform(flattened.reshape(-1, 1)).flatten()
        scaled_lists = []
        start = 0
        for lst in lists:
            end = start + len(lst)
            scaled_lists.append(scaled_flattened[start:end].tolist())
            start = end
        return [str(lst) for lst in scaled_lists]
    
    for column in columns_to_scale:
        data[column] = scale_column(data[column])

    return data

### Histogram for Germination Frame Frequency

In [None]:
def germination_frequency(plots_folder: str, GERMINATION: list, output: int = 1) -> None:

  germination_frames_list = GERMINATION.apply(lambda x: x.index(1)) #list of germination frames as indices
  frames_shown: int = 200

  #print times frame germination occurs
  frame_counts = Counter(germination_frames_list)
  sorted_frame_counts = sorted(frame_counts.items())
  for frame_number, count in sorted_frame_counts:
      if output == 1:
        print(f"Frame: {frame_number}, Frequency: {count}")
      
  #histogram plot 
  fig, ax1 = plt.subplots()
  bin_number = 24
  #germinant intervals
  ax1.grid(color='silver', linewidth=1, linestyle = "dashed", which = "both", axis = "x")
  #histogram
  sns.histplot(germination_frames_list, bins = bin_number, label = f"Germination Events", ax = ax1, color = "powderblue")
  #axis labels 
  ax1.set_xlabel("Hour", fontsize = 16)
  ax1.set_ylabel("Germination Event Frequency", fontsize = 16)
  #x axis limit and ticks
  ax1.set_xlim([0, frames_shown])
  ax1.set_xticks([11, 35, 59, 83, 107, 131, 155, 179, 203], [1, 3, 5, 7, 9, 11, 13, 15, 17])


  #overlaid percentage plot 
  total_spores = sum(count for frame, count in sorted_frame_counts)
  spores_count = total_spores
  frames = [0]
  percents = [100]
  percent_plot = [100]
  frame_dict = dict(sorted_frame_counts)

  percent = 100  

  for frame_number in range(1, frames_shown+1):
    if frame_number in frame_dict:
      count = frame_dict[frame_number]
      spores_count -= count
      percent = spores_count / total_spores * 100
      percent_plot.append(percent)
    percents.append(percent)
    frames.append(frame_number)

  #percent plot
  ax2 = ax1.twinx()
  sns.lineplot(x=frames, y=percents, ax = ax2, linestyle = "--", linewidth = 2, label = "Dormant Percentage", color = "tomato")

  #custom germinant label
  ax2.axvline(x=11, color='silver', label='Germinant Exposure', linewidth = 1, linestyle = "dashed")

  #percent y axis formatting
  ax2.set_ylabel("Dormant Percentage", fontsize = 16)
  ax2.set_yticks([0, 25, 50, 75, 100])
  ax2.tick_params(axis='y', labelsize=12)
  ax2.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{int(y)}%'))
  
  #legends
  handles1, labels1 = ax1.get_legend_handles_labels()
  handles2, labels2 = ax2.get_legend_handles_labels()
  ax2.legend(handles=handles1 + handles2, labels=labels1 + labels2, loc='best', fontsize=12)


  plt.tight_layout()
  plt.savefig(plots_folder + "germination_percentage.jpg")
  print(f"Percentages: {percent_plot}")

### Descriptive Statistics

Group together spores and create a boxplot at each timestep for all spores

In [None]:
def group_by_germination(df, feature_str: list[str]):
  ''' 
  Takes in original data frame and list of features to analyze and group together
  returns grouped data
  '''
  data_stats = df.copy()

  for feature in feature_str:
    data_stats[feature] = data_stats[feature].apply(ast.literal_eval)

  data_stats['Germination Frame'] = data_stats['GERMINATION'].apply(lambda x: x.index(1)) #get germination frame where first "1" occurs 
  grouped_data = data_stats.groupby("Germination Frame")

  return grouped_data, data_stats

In [None]:
def get_min_max(df, feature: str) -> int:
    data_max = []
    data_min = []
    data = df[feature].apply(ast.literal_eval) 
    for row in data:
        data_max.append(max(row))
        data_min.append(min(row))
    return min(data_min), max(data_max)


def box_plot_statistics(germ_time, data, column: str, feature: str, y_minmax: [int, int], show_plot = 1):
    min_feature = y_minmax[0]
    max_feature = y_minmax[1]
    
    aligned_data = [pd.Series(data) for data in data[column]]
    aligned_df = pd.DataFrame(aligned_data)
    melted_aligned_df = aligned_df.melt(var_name='Frame', value_name=feature) #long format
    
    # plotting
    plt.figure(figsize=(4, 3))
    sns.boxplot(x='Frame', y=feature, data=melted_aligned_df)
    plt.axvline(x=germ_time, color='red', linestyle='--', label=f'Germination Frame {germ_time}')
    plt.title(f'{feature} Statistics for Spores Germinated at Frame {germ_time}')
    plt.xlabel('Frame')
    plt.xticks([0, 50, 100, 150, 200, 150, 200, 250])
    plt.ylabel(feature)
    plt.ylim([min_feature, max_feature])
    plt.legend()
    plt.savefig(f"{plots_folder} boxplot_ {feature + germ_time} .jpg")
    plt.close()
    if show_plot != 1:
        plt.close()

### Spatial

In [None]:
def plot_xy_germinationframe(df, show_plot = 1):  
  '''
  plots x and y location with germination frame as z value 
  '''
  plt.figure(figsize=(6, 3))
  #plot
  scatter = plt.scatter(df['X_POSITION'], df['Y_POSITION'], c = df['Germination Frame'], cmap = 'gist_ncar', s=30)
  plt.colorbar(scatter, label='Germination Frame')
  plt.xlabel('X')
  plt.ylabel('Y')

  plt.gca().invert_yaxis()
  plt.title('Spores by Germination Frame')
  plt.grid(True)
  plt.savefig(f"{plots_folder}germinationframe_location.jpg")

  plt.close()
  if show_plot != 1:
    plt.close()
  ###VALIDATE THIS overlay on tiff

### Signal Summation

In [None]:
def total_signal(df, INTENSITY, GERMINATION):
  '''
  sums all intensities until germination and plots germinatino frame as x value and total sum as y 
  '''
  germination_totalintensity = []

  df['Germination Frame'] = GERMINATION.apply(lambda x: x.index(1)) #get germination frame where first "1" occurs 

  for i in range(len(df)):
    spore_intensities = INTENSITY[i]
    spore_germination_frame = df.loc[i, "Germination Frame"] 
    intensities_before_germination = spore_intensities[0: spore_germination_frame]
    total_intensity = np.sum(intensities_before_germination)
    germination_totalintensity.append(np.array([spore_germination_frame, total_intensity]))

  germination_totalintensity_np = np.array(germination_totalintensity)

  plt.figure(figsize=(4, 3))
  plt.scatter(germination_totalintensity_np[:, 0], germination_totalintensity_np[:, 1])
  plt.xlabel("Germination Frame")
  plt.ylabel("Sum Intensity")
  plt.savefig(plots_folder + "germinationframe_sumsignal.jpg")
  plt.close()

### Swarmplot of feature for initial, last, and average value before germination

In [None]:
def subplot_swarmplot(arrays, y_label, subplot_title, min_max, framerange, show_plots=1):
    avg_array, initial_array, last_array = arrays

    # Difference between initial value and last value before germination 
    diff_array = avg_array.copy()
    diff_array[:, 1] = last_array[:, 1] - initial_array[:, 1]
    

    fig, axs = plt.subplots(1,1,figsize = (10,10))
    subplot_ylabel = [f"diff {y_label}"]
    palette = sns.color_palette("pastel", len(framerange))

    data = pd.DataFrame(diff_array, columns=['Germination Frame', 'Value'])
    data['Germination Frame'] = data['Germination Frame'].astype(int)  # Convert to integers
    for j, frames in enumerate(framerange):
        group_indices = data['Germination Frame'].between(frames[0], frames[1])
        color = palette[j]
        sns.boxplot(x='Germination Frame', y='Value', data=data[group_indices], ax=axs, color=color, width=0.8)
        sns.swarmplot(x='Germination Frame', y='Value', data=data[group_indices], ax=axs, color=color, edgecolor='black', size=4)
    
    axis_fontsize = 32
    axs.set_xlabel(f"Germination Frame", fontsize = axis_fontsize)
    axs.set_ylim([.1, .9])
    axs.tick_params(axis='x', labelsize=16)  # Change x-axis tick size
    axs.tick_params(axis='y', labelsize=16)  # Change y-axis tick size
    #axis font size     
    axs.set_ylabel(f"Change in {subplot_title}", fontsize = axis_fontsize)
    #legend
    for j, frames in enumerate(framerange):
        if j != len(framerange)-1:
            color = palette[j]
            axs.plot([], [], 'o', label=f'Germinant Exposure: {j+1}', color=color, markersize=5)
        if j == len(framerange)-1:
            color = palette[len(framerange)-1]
            axs.plot([], [], 'o', label=f'Germinant Exposure: {8}', color=color, markersize=5) #HARDCODED


    # Adjust y-axis limits for the difference plot
    axs.set_ylim([min(diff_array[:, 1]), max(diff_array[:, 1])])
    axs.legend(fontsize = 20, loc = "best", shadow = True)

    plt.tight_layout()
    plt.savefig(f"{plots_folder}boxplots_swarmplots_diff_{subplot_title}.jpg")
    plt.show()

    return diff_array

### Retrieve Average before Germination, Initial, and Last Value before Germinaiton of a feature

In [None]:
def germinationframe_feature(df, FEATURE: list, GERMINATION: list):
  '''
  takes in a FEATURE and GERMINATION and returns avg feature, initial value of feature, and last value of feature (all before germination)
  '''
  avg_germination_feature = []
  initial_germination_feature = []
  last_germination_feature = []

  df['Germination Frame'] = GERMINATION.apply(lambda x: x.index(1)) #get germination frame where first "1" occurs 

  for i in range(len(df)):
    spore_feature = FEATURE[i]
    spore_germination_frame = df.loc[i, "Germination Frame"]
    feature_before_germination = spore_feature[0: spore_germination_frame]


    avg_feature = np.mean(feature_before_germination)
    initial_feature = np.mean(feature_before_germination[0: 11]) #averaged over first and last frames
    last_feature = np.mean(feature_before_germination[-5: -1])

    avg_germination_feature.append(np.array([spore_germination_frame, avg_feature]))
    initial_germination_feature.append(np.array([spore_germination_frame, initial_feature]))
    last_germination_feature.append(np.array([spore_germination_frame, last_feature]))

  return [np.array(avg_germination_feature), np.array(initial_germination_feature), np.array(last_germination_feature)]
    

### Fraction of two features

In [None]:
def intensity_area_ratio(df, INTENSITY, AREA):
  '''
  get intensity over all frames divided by area
  return df of ratios
  '''
  
  if isinstance(AREA, pd.Series):
    INTENSITY = INTENSITY.to_list()
    AREA = AREA.to_list()
    
  if isinstance(AREA, pd.DataFrame):
    AREA = AREA.values.tolist()

  
  RATIOS = []
  for spore_index in range(len(INTENSITY)):
    spore_intensity = INTENSITY[spore_index]
    spore_area = AREA[spore_index]

    # if passing pd.dataframe insteaed of series
    if len(spore_area) == 1:
      spore_area = spore_area[0]

    spore_ratios = []
    for frame_index in range(len(INTENSITY[spore_index])):
      ratio = spore_intensity[frame_index]/spore_area[frame_index]
      spore_ratios.append(ratio)
    RATIOS.append(spore_ratios)  
  return pd.DataFrame(RATIOS)

### Fitting a Linear Model to each spore

In [None]:
def polynomial_model(x, *coefficients):
    summ = 0
    for index, coefficient in enumerate(coefficients):
        summ += coefficient * x ** index
    return summ

def fit_polynomial_model(df, INTENSITY, GERMINATION, yaxis, degree, show_plots=1):
    plt.clf()
    df['Germination Frame'] = GERMINATION.apply(lambda x: x.index(1))
    INTENSITY_LIST = INTENSITY.to_list()

    df["INTEGRAL"] = None
    df["DERIVATIVE"] = None
    df["FITTED_POLYNOMIAL"] = None

    grouped = df.groupby('Germination Frame')
    for germination_frame, group in grouped:
        fig, axs = plt.subplots(1, 3, figsize=(12, 3))  
        sns.set_palette("pastel")

        for spore_index in group.index:
            intensities = INTENSITY_LIST[spore_index]
            frames = list(range(len(intensities)))

            frames_until_germination = list(range(germination_frame + 1))
            intensity_until_germination = intensities[:germination_frame + 1]

            # fitting
            initial_guess = [1] * (degree + 1)
            polynomial_lambda = lambda x, *params: polynomial_model(x, *params)
            parameters_allframes, _ = curve_fit(polynomial_lambda, frames, intensities, p0=initial_guess)

            polynomial_lambda_until_germination = lambda x, *params: polynomial_model(x, *params)
            parameters_untilgermination, _ = curve_fit(polynomial_lambda_until_germination, frames_until_germination, intensity_until_germination, p0=initial_guess)

            fitted_line_values = np.array([frames, polynomial_model(np.array(frames), *parameters_allframes)])
            fitted_line_values_until_germination = np.array([frames_until_germination, polynomial_model(np.array(frames_until_germination), *parameters_untilgermination)])

            #get derivative up to and including germinaiton frame
            derivatives = []
            integrals = []
            for frame in frames_until_germination:
                derivative_fcn = derivative_function(parameters_untilgermination)
                derivative_at_frame = derivative_fcn(frame)
                derivatives.append(derivative_at_frame)
            df.loc[spore_index, "DERIVATIVE"] = str(derivatives)

            #integral up to germination frame
            for frame in frames_until_germination[0: -1]:
                integral_fcn = integral_function(parameters_untilgermination)
                integral_at_frame = integral_fcn(frame) - integral_fcn(0)#? 
                integrals.append(integral_at_frame)       
            df.loc[spore_index, "INTEGRAL"] = str(integrals)         
            
            # plotting fitted line 
            sns.lineplot(ax = axs[0], x=frames, y=intensities, color="lightgrey")#, label="Data", linewidth=1)
            #sns.lineplot(x=fitted_line_values[0, :], y=fitted_line_values[1, :], label='Fitted to all data')
            sns.lineplot(ax = axs[0], x=fitted_line_values_until_germination[0, :], y=fitted_line_values_until_germination[1, :])#, label='Fitted until germination frame + 1')
            axs[0].axvline(germination_frame, color="black", linestyle="--")#, label="Germination")
            
            #plotting derivative
            sns.lineplot(ax = axs[1], x = frames_until_germination, y = derivatives)
            axs[1].axvline(germination_frame, color="black", linestyle="--")#, label="Germination")

            #plotting integral
            sns.lineplot(ax = axs[2], x = frames_until_germination[0: -1], y = integrals)


        axs[0].set_xlabel('Frames')
        axs[0].set_ylabel('Intensity')
        axs[0].set_ylim(yaxis)
        axs[0].set_title(f'Fitted Polynomials for GF {germination_frame}')
        
        axs[1].set_xlabel('Frames')
        axs[1].set_xlim([0, len(INTENSITY[0])])
        axs[1].set_ylabel("Intensity Derivative")
        axs[1].set_ylim([-5, 25])
        axs[1].set_title(f'Derivatives for GF {germination_frame}')

        axs[2].set_xlim([0, len(INTENSITY[0])]) 
        axs[2].set_ylim([0, 3000])
        axs[2].set_title(f'Integrals for GF {germination_frame}')
        plt.tight_layout()

        plt.savefig(f"{plots_folder} germinationframe {germination_frame}_fittedpolynomial.jpg")

        plt.close(fig)
    if show_plots != 1:
        plt.close()
    if show_plots == 1:
        plt.show()
    plt.clf()
    return df

def integral_function(parameters):
    # polynomial expression as symbolic variable
    x = symbols('x')
    polynomial_expr = sum(p * x**i for i, p in enumerate(parameters))

    # integrate
    integral_expr = integrate(polynomial_expr, x)

    # integral in [0, germintaion_frame]
    integral_func = lambdify(x, integral_expr, 'numpy')
    return integral_func

def derivative_function(parameters):
    x = symbols("x")
    polynomial_expr = sum(p * x**i for i, p in enumerate(parameters))
    derivative_expr = diff(polynomial_expr, x)
    derivative_func = lambdify(x, derivative_expr, 'numpy')
    return derivative_func

### Heatmaps

In [None]:
def heatmap(df, arrays, DIFF, feature, show_plots = 1):
    AVG, INITIAL, LAST = arrays
    germination_frames = list(AVG[:, 0])
    avg = list(AVG[:, 1])
    initial = list(INITIAL[:, 1])
    last = list(LAST[:, 1])
    diff = list(DIFF[:, 1])
    
    df = pd.DataFrame({
        "Germination Frame": germination_frames,
        f"Average": avg,
        f"Initial": initial, 
        f"Last": last,
        f"Difference": diff
    })

    correlation = df.corr()
    
    # plot heatmap
    plt.clf()
    plt.figure(figsize=(5, 3))
    sns.heatmap(correlation, annot=True, vmin=-1, vmax=1)
    plt.title(f"{feature} Correlation before Germination")
    if show_plots == 1:
        plt.show()

    if show_plots != 1:
        plt.close()
    plt.savefig(f"{plots_folder}interfeature_heatmap.jpg")
    plt.clf()

Pearson Correaltion Coefficient: 

### Main Heatmap

In [None]:
def plain_heatmap(df, features_list, plot_title, custom_labels, bold_labels):
  '''takes in df in format where each row denotes a spore and each column is a feature and each cell is feature over time in a list'''
  plt.clf()

  correlation_matrices = []

  for spore_index in range(len(df)):
    spore_features = []
    for feature in features_list:
      spore_feature = ast.literal_eval(df.loc[spore_index, feature])
      spore_features.append(spore_feature)
    spore_df = pd.DataFrame(spore_features).T
    spore_df.columns = features_list

    spore_feature_correlation = spore_df.corr()
    #heatmap for individual spore: 
    #sns.heatmap(spore_feature_correlation, annot = True, vmin = 0, vmax = 1, cmap = "coolwarm")
    
    #heatmap for all spores: 
    correlation_matrices.append(spore_feature_correlation)
  
  avg_correlation_matrix = np.mean(correlation_matrices, axis=0)
  avg_correlation_df = pd.DataFrame(avg_correlation_matrix, index=features_list, columns=features_list)
  avg_correlation_df.rename(columns=custom_labels, index=custom_labels, inplace=True)

  # Plot the heatmap for the average correlation matrix
  plt.figure(figsize = ((14,13)))
  ax = sns.heatmap(avg_correlation_df, annot=True, vmin=0, vmax=1, cmap="coolwarm", 
                   xticklabels=True, yticklabels=True, annot_kws={"size": 15}, #numbers in heatmap
                   cbar_kws={'label': 'Pearson Correlation Coefficient'}, cbar = False)

  ax.set_aspect('equal')

  # Customize tick labels
  for tick in ax.get_xticklabels():
      if bold_labels and tick.get_text() in bold_labels:
          tick.set_weight('bold')
      tick.set_fontsize(24) #labels size
  
  for tick in ax.get_yticklabels():
      if bold_labels and tick.get_text() in bold_labels:
          tick.set_weight('bold')
      tick.set_fontsize(24)
      
  #padding between labels and heatmap
  ax.tick_params(axis='x', pad=10)
  ax.tick_params(axis='y', pad=10)

  #colorbar with the same height as the heatmap
  divider = make_axes_locatable(ax)
  cax = divider.append_axes("right", size="5%", pad=0.5)
  cbar = plt.colorbar(ax.collections[0], cax=cax)

  # Customize colorbar label size
  cbar.ax.tick_params(labelsize=20)
  cbar.ax.yaxis.label.set_size(25)
  cbar.ax.yaxis.label.set_text('Pearson Correlation Coefficient')
  cbar.ax.yaxis.labelpad = 15

  #save
  plt.tight_layout()
  plt.savefig(f"{plots_folder + plot_title}.jpg")
  plt.show()

In [None]:
def initial_value_heatmap(df, features_list, plot_title):
  correlation_matrix = df.corr()
  plt.clf()
  ax = sns.heatmap(correlation_matrix, annot=True, vmin=-1, vmax=1, cmap="coolwarm",
                   cbar_kws={'label': 'Pearson Correlation Coefficient'})
  #plt.title(plot_title)
  ax.set_aspect('equal')
  plt.tight_layout()
  plt.savefig(f"{plots_folder + plot_title}.jpg")

Turkey Analysis Function: 

In [None]:
def tukey_analysis(df, feature: list[str], frameranges: list[tuple]):
        #feature_index = df.columns.get_loc(feature)
        print(f"doing ANOVA and tukey analysis for {feature}...")
        dataframe_rows = []
        frame_groups = []
        for group_index in range(len(frameranges)):
            frame_groups.append([])

        for group_i, framerange in enumerate(frameranges):
            for spore_index in range(len(df)):
                spore_info = df.loc[spore_index, feature]
                spore_germ = df.loc[spore_index, "GERMINATION"]
                if spore_germ in range(framerange[0], framerange[1]):
                    frame_groups[group_i].append(spore_info)
                    dataframe_rows.append([spore_info, str(framerange)])
        anova_result = f_oneway(*frame_groups)
        print(f"ANOVA one-way results for {feature}:")
        print(f"{anova_result}")
        
        df = pd.DataFrame(dataframe_rows, columns = [feature, "Frame_Group"])
        tukey = pairwise_tukeyhsd(endog=df[feature],
                          groups=df["Frame_Group"],
                          alpha=0.05)
        print(f"Tukey Analysis:")

### Summed Intensity by Frame

In [None]:
def sum_intensity(GERMINATION, INTENSITY, time_between_frames: int):
  '''summed intensity takes in GERMINATION and INTENSITY as pd.Series
  and sums intensity over time and plots each sum as a function of time
  '''
  summed_intensity_allspores = []
  for spore_index, spore_row in enumerate(INTENSITY):
    germination_frame = GERMINATION.apply(lambda x: x.index(1))
    summed_intensity: int = 0 
    summed_intensities: list[int] = []
    for intensity_index, intensity_value in enumerate(spore_row[0: int(germination_frame[spore_index])]):
      summed_intensity += intensity_value * time_between_frames
      summed_intensities.append(summed_intensity)
    summed_intensity_allspores.append(summed_intensities)
  
  sum_intensity_df = pd.DataFrame(summed_intensity_allspores)
  plt.clf()
  for row_index in range(len(sum_intensity_df)):
    row = list(sum_intensity_df.iloc[row_index, :])
    xs = range(len(row))
    sns.lineplot(x = xs, y = row)
  plt.savefig(f"{plots_folder}time_summation.jpg")
  plt.clf()


### Plot Features Lineplots by Germination Frame and over Time 

In [None]:
def plot_feature_germ_frames(GERMINATION, FEATURE, feature: str):
    unique_germination_frames = GERMINATION.apply(lambda x: x.index(1)).unique()
    
    # Create a plot for each unique germination frame
    for germination_frame in unique_germination_frames:
        plt.figure()
        plt.title(f"Germination Frame: {germination_frame}")
        for spore_index, spore_row in enumerate(FEATURE):
            if GERMINATION.iloc[spore_index].index(1) == germination_frame:
                sns.lineplot(x = range(len(spore_row)), y = spore_row)# color=color_map[germination_frame], linewidth=0.5)
        plt.axvline(x = germination_frame, color = "lightgrey", linestyle = '--', label = "Germination")
        plt.xlabel("Frame")
        plt.ylabel(feature)
        plt.show()

### Sample Features

In [None]:
def plot_feature_example(GERMINATION, FEATURE, feature: str, frames: list[int]):
    #plot initializing
    plt.clf()
    palette = sns.color_palette('Spectral', n_colors=len(frames))
    custom_handles = []
    spore_labels = []
    plt.figure(figsize=(12,8))

    #germinant intervals
    for frame in frames[0:5]:
      plt.axvline(x = frame - 1, color='silver', linestyle='--', linewidth = 2) 
    custom_handles.append(plt.Line2D([0], [0], color='silver', linestyle='dashed', label="Germinant Exposure", linewidth = 2))

    #---
    for frame in frames[:: -1]: 
      tmp = 0
      for spore_index, feature_row in enumerate(FEATURE):
        germination_frame = GERMINATION.iloc[spore_index].index(1)
        if germination_frame == frame:
          tmp += 1
          if tmp == 3:
            frame_index = frames.index(frame)
            colorline = palette[frame_index % len(palette)]
            #before germination
            sns.lineplot(x=range(germination_frame), y=feature_row[:germination_frame], linewidth=10, color = colorline, label = f"Spore {frame_index + 1}")#, label=f'Germinated at L-alanine Exposure {frames.index(frame) + 1}', )
            #after germination
            sns.lineplot(x = range(germination_frame-1, len(feature_row)), y=feature_row[germination_frame-1:], linestyle = '-', color = colorline, linewidth = 3)
            #legend for spore labels
            custom_handles.append(plt.Line2D([0], [0], color=colorline, lw=4, linestyle='None', marker='o', label=f"Spore {frame_index + 1}"))
            spore_labels.append((f"Spore {frame_index + 1}", colorline))  
            break  

    #axis fonts size
    axis_fontsize = 32
    plt.xlabel('Frame', fontsize=axis_fontsize) 
    plt.ylabel(f"{feature}", fontsize=axis_fontsize)  
   
    #axis ticks and limits
    plt.xticks(fontsize = 20)  
    plt.yticks(fontsize = 20)
   # plt.xlim(0, 95)
    plt.ylim(0, 1)

    #legend
    custom_handles = custom_handles[::-1]
    plt.legend(handles=custom_handles, loc='best', shadow = True, fontsize=20)  
    plt.tight_layout(rect=[0, 0, 0.85, 1])  

    plt.savefig(f"{plots_folder}sample_{feature}.jpg")
    plt.show()

### Plot Features Against Each Other Contour Plot


In [None]:
def plot_features(FEATURE1: np.array, FEATURE2:np.array, features_list: list[str], plot_title):
  #for spore_index, spore_row in range(len(FEATURE1)):
    plt.figure(figsize = (10,10))
    germination_frame = FEATURE1[:, 0]
    feature1 = FEATURE1[:, 1]
    feature2 = FEATURE2[:, 1]
    scatter = plt.scatter(feature1, feature2, c = germination_frame, cmap = "turbo", s  = 60, edgecolor = "black")
    plt.colorbar(scatter)
    plt.xlabel(features_list[0], fontsize = 16)
    plt.ylabel(features_list[1], fontsize = 16)
    #plt.title(f"{features_list[0]} against {features_list[1]} for {plot_title}s")
    plt.savefig(f"{plots_folder}contourplot_{features_list[0]}_against{features_list[1]}.jpg")
    plt.show()

### Main 

In [None]:
def Main(df, plots_dir, time_between_frames, combined_df = 0):
  global plots_folder
  plots_folder = plots_dir
  print(f"data has columns {list(df.columns)}...")
  print(f"analyzing {len(df)} spores...")


  framerange = [(0, 25), (25, 50), (50, 75), (75, 100), (100, 300)]
  # turning model data into df of lists instead of strings that look like lists 
  INTENSITY: list[list] = df["INTENSITY"].apply(ast.literal_eval) 

  AREA: list[list] = df["AREA"].apply(ast.literal_eval)
  GERMINATION: list[list] = df["GERMINATION"].apply(ast.literal_eval)
  ELLIPSE_MINOR: list[list] = df["ELLIPSE MINOR"].apply(ast.literal_eval)
  ELLIPSE_MAJOR: list[list] = df["ELLIPSE MAJOR"].apply(ast.literal_eval)
  PERIMETER: list[list] = df["PERIMETER"].apply(ast.literal_eval)
  CIRCULARITY: list[int] = df["CIRCULARITY"].apply(ast.literal_eval)
  GERMINANT_EXPOSURE: list[int] = df["GERMINANT EXPOSURE"].apply(ast.literal_eval)
  ELLIPSE_RATIO: list[int] = df["ELLIPSE ASPECT RATIO"].apply(ast.literal_eval)


  # germination frequency 
  germination_frequency(plots_folder, GERMINATION, 1)

  # group data by germination frame
  # groups, data_stats = group_by_germination(df, ["INTENSITY", "AREA", "GERMINATION"])

  # used for plot limits 
  min_intensity, max_intensity = get_min_max(df, "INTENSITY")
  min_area, max_area = get_min_max(df, "AREA")
  min_ellipse_minor, max_ellipse_minor = get_min_max(df, "ELLIPSE MINOR")
  min_ellipse_major, max_ellipse_major = get_min_max(df, "ELLIPSE MAJOR")
  min_perimeter, max_perimeter = get_min_max(df, "PERIMETER")
  min_circularity, max_circularity = get_min_max(df, "CIRCULARITY")
  min_ellipse, max_ellipse = get_min_max(df, "ELLIPSE ASPECT RATIO")

  #plot x, y, and germination frame  
  #if combined_df == 0:
    #plot_xy_germinationframe(data_stats)  


  #---------------------feature boxplots 

  #intensity
  intensity_arrays = germinationframe_feature(df, INTENSITY, GERMINATION)
  diff_intensity = subplot_swarmplot(intensity_arrays, "Intensity", "Electrochemical Potential", [min_intensity, max_intensity], framerange)

  #plot measure of area 
  area_arrays = germinationframe_feature(df, AREA, GERMINATION) 
  diff_area = subplot_swarmplot(area_arrays, "Area", "Size", [min_area, max_area], framerange)

  # plot measure of ellipse minors 
  minor_arrays = germinationframe_feature(df, ELLIPSE_MINOR, GERMINATION)
  diff_minor = subplot_swarmplot(minor_arrays, "Ellipse Minor", "Ellipse Minor", [min_ellipse_minor, max_ellipse_minor], framerange)

  # plot measure of ellipse majors
  major_arrays = germinationframe_feature(df, ELLIPSE_MAJOR, GERMINATION)
  diff_minor = subplot_swarmplot(minor_arrays, "Ellipse Major", "Ellipse Major", [min_ellipse_major, max_ellipse_major], framerange, 0)

  #ellipse aspect
  ellipse_arrays = germinationframe_feature(df, ELLIPSE_RATIO, GERMINATION)
  diff_ellipseratio = subplot_swarmplot(ellipse_arrays, "Ellipse Aspect Ratio", "Ellipse Aspect Ratio", [min_ellipse, max_ellipse], framerange, 0)

  #perimeter
  perimeter_arrays = germinationframe_feature(df, PERIMETER, GERMINATION)
  diff_perim = subplot_swarmplot(perimeter_arrays, "Perimeter", "Perimeter", [min_perimeter, max_perimeter], framerange, 0)

  #circularity
  circularity_arrays = germinationframe_feature(df, CIRCULARITY, GERMINATION)
  diff_circularity = subplot_swarmplot(circularity_arrays, "Circularity", "Circularity", [min_circularity, max_circularity], framerange, 0)

  #--------------------------- FITTED POLYNOMIAL
  df_integrals = fit_polynomial_model(df, INTENSITY, GERMINATION, [min_intensity, max_intensity], 7)
  INTEGRAL: list[list] = df["INTEGRAL"].apply(ast.literal_eval) 
  DERIVATIVE : list[list] = df["DERIVATIVE"].apply(ast.literal_eval)

  INTENSITY = df["INTENSITY"].apply(ast.literal_eval)
  for i in range(len(DERIVATIVE)):
    sns.lineplot(x = INTENSITY[i][:len(DERIVATIVE[i])], y = DERIVATIVE[i])
  plt.show()

  '''
  Columns: 
       ['Unnamed: 0', 'INTENSITY', 'AREA', 'GERMINANT EXPOSURE', 'GERMINATION',
       'ELLIPSE MINOR', 'ELLIPSE MAJOR', 'PERIMETER', 'CIRCULARITY',
       'FRAME LIST', 'X_POSITION', 'Y_POSITION', 'Germination Frame',
       'INTEGRAL', 'DERIVATIVE']
  '''

  #heatmap between average correlations between features over all time
  heatmap_labels = {
    'GERMINATION': 'Germination',
    'INTENSITY': 'Intensity',
    "AREA": "Area",
    "ELLIPSE MINOR": "Ellipse Minor",
    "ELLIPSE MAJOR": "Ellipse Major",
    "PERIMETER": "Perimeter",
    "CIRCULARITY": "Circularity",
    "GERMINANT EXPOSURE": "Germinant Exposure",
    "ELLIPSE ASPECT RATIO": "Ellipse Aspect Ratio"
    }
  bold_labels = ["Intensity", "Area", "Ellipse Minor", "Ellipse Major", "Perimeter"]
  plain_heatmap(df, ["GERMINATION", "INTENSITY", "AREA", "ELLIPSE MINOR", "ELLIPSE MAJOR", "PERIMETER", "CIRCULARITY", "GERMINANT EXPOSURE", "ELLIPSE ASPECT RATIO"], "Feature Correlations over Time", heatmap_labels, bold_labels)
  

  #heatmap between initial feature values and germination frames
  features_columns = ["GERMINATION", "INTENSITY", "AREA", "ELLIPSE MINOR", "ELLIPSE MAJOR", "PERIMETER", "CIRCULARITY", "ELLIPSE ASPECT RATIO"]

  germination_array = intensity_arrays[1][:, 0]
  df_initial_features = pd.DataFrame({
    'GERMINATION': germination_array, 
    "INTENSITY": intensity_arrays[1][:, 1],
    "AREA": area_arrays[1][:, 1],
    "ELLIPSE_MINOR": minor_arrays[1][:, 1],
    "ELLIPSE_MAJOR": major_arrays[1][:, 1],
    "PERIMETER":perimeter_arrays[1][:, 1],
    "CIRCULARITY":circularity_arrays[1][:, 1],
    "ELLIPSE_ASPECT_RATIO": ellipse_arrays[1][:, 1]
  })
  initial_value_heatmap(df_initial_features, features_columns, "Initial Feature Correlations")
  
  for feature in df_initial_features.columns.tolist():
    tukey_analysis(df_initial_features, feature, framerange)
  
  df_last_features = pd.DataFrame({
  'GERMINATION': germination_array, 
  "INTENSITY": intensity_arrays[2][:, 1],
  "AREA": area_arrays[2][:, 1],
  "ELLIPSE_MINOR": minor_arrays[2][:, 1],
  "ELLIPSE_MAJOR": major_arrays[2][:, 1],
  "PERIMETER":perimeter_arrays[2][:, 1],
  "CIRCULARITY":circularity_arrays[2][:, 1],
  "ELLIPSE_ASPECT_RATIO": ellipse_arrays[2][:, 1]
  })

  #heatmap between last feature values and germination frames
  initial_value_heatmap(df_last_features, features_columns, "Last Value of Feature before Germination Correlations")
  
  #sum intensity
  sum_intensity(GERMINATION, INTENSITY, time_between_frames)

  #plot features grouped by germination frame 
  #plot_feature_germ_frames(GERMINATION, INTENSITY, "Intensity")

  ### plot one from each frame
  #plot_feature_example(GERMINATION, INTENSITY, "Electrochemical Potential", [14, 38, 62, 86])
  #plot_feature_example(GERMINATION, AREA, "Size", [14, 38, 62, 86])
  #plot_feature_example(GERMINATION, ELLIPSE_MINOR, "Ellipse Minor", [14, 38, 62, 86])
  #plot_feature_example(GERMINATION, ELLIPSE_MAJOR, "Ellipse Major", [14, 38, 62, 86])
  #plot_feature_example(GERMINATION, PERIMETER, "Perimeter", [14, 38, 62, 86])
  #plot_feature_example(GERMINATION, CIRCULARITY, "Circularity", [14, 38, 62, 86])
  #plot_feature_example(GERMINATION, ELLIPSE_RATIO, "Ellipse Aspect Ratio", [14, 38, 62, 86])

  #plot_features(intensity_arrays[1], area_arrays[1], ["Initial Intensity", "Initial Area"], "Initial Values")
  #plot_features(intensity_arrays[1], minor_arrays[1], ["Initial Intensity", "Initial Ellipse Minor"], "Initial Values")

  #plot_features(intensity_arrays[2], area_arrays[2], ["Final Intensity", "Final Area"], "Last Values")
  #plot_features(intensity_arrays[2], minor_arrays[2], ["Final Intensity", "Final Ellipse Minor"], "Last Values")

  #plot_features(diff_intensity, diff_area, ["Difference in Final and Initial Intensity", "Difference in Final and Initial Area"], "Difference Values")
  #plot_features(diff_intensity, diff_minor, ["Difference in Final and Initial Intensity", "Difference in Final and Initial Ellipse Minor"], "Difference Values")

### M4581_s1 Analysis

In [None]:
csv_path = "/Users/alexandranava/Desktop/Spores/M4581_s1/Analysis/V3/M4581_s1_Model_Data.csv"
plots_folder = "/Users/alexandranava/Desktop/Spores/M4581_s1/Analysis/V3/Plots/"
time_between_frames = 5

columns_to_scale = ['INTENSITY', 'AREA', 'GERMINANT EXPOSURE', 'GERMINATION',
                    'ELLIPSE MINOR', 'ELLIPSE MAJOR', 'PERIMETER', 'CIRCULARITY', "ELLIPSE ASPECT RATIO"]

M4581_s1_df  = scale_csv_data(csv_path, columns_to_scale)

#M4581_s1_df = pd.read_csv(csv_path)
Main(M4581_s1_df, plots_folder, time_between_frames)


### M4576_s2 Analysis

In [None]:
csv_path2 = "/Users/alexandranava/Desktop/Spores/M4576_s2/M4576_s2_Model_Data.csv"
plots_folder2 = "/Users/alexandranava/Desktop/Spores/M4576_s2/Analysis/Plots/"
time_between_frames2 = 5

columns_to_scale = ['INTENSITY', 'AREA', 'GERMINANT EXPOSURE', 'GERMINATION',
                    'ELLIPSE MINOR', 'ELLIPSE MAJOR', 'PERIMETER', 'CIRCULARITY', "ELLIPSE ASPECT RATIO"]

M4576_s2_df  = scale_csv_data(csv_path2, columns_to_scale)

#M4567_s2_df = pd.read_csv(csv_path2)
Main(M4576_s2_df, plots_folder2, time_between_frames2)

### Both datasets: 

In [None]:
plots_folder_combined = "/Users/alexandranava/Desktop/Spores/M4581_s1andM4576_s2/plots/"

columns_to_scale = ['INTENSITY', 'AREA', 'GERMINANT EXPOSURE', 'GERMINATION',
                    'ELLIPSE MINOR', 'ELLIPSE MAJOR', 'PERIMETER', 'CIRCULARITY', "ELLIPSE ASPECT RATIO"]

M4581_s1_scaleddf  = scale_csv_data(csv_path, columns_to_scale)
M4567_s2_scaleddf = scale_csv_data(csv_path2, columns_to_scale)

df_stacked = pd.concat([M4581_s1_scaleddf, M4567_s2_scaleddf], axis=0)
df_stacked.reset_index(drop=True, inplace=True)

Main(df_stacked, plots_folder_combined, 5, combined_df = 1)