In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import ast 
import math 
from sklearn.preprocessing import MinMaxScaler


In [None]:
df = pd.read_csv("/Users/alexandranava/Desktop/Spores/M4581_s1/Analysis/V3/M4581_s1_Model_Data.csv")
df_test = "/Users/alexandranava/Desktop/Spores/M4576_s1/Analysis/V3/M4581_s1_Model_Data.csv"


### Initializing

In [None]:
time_between_frames = 5
initial_exposure = 60
time_between_exposure = 120
num_frames = 276
columns_to_scale = ['INTENSITY', 'AREA', 'GERMINANT EXPOSURE', 'GERMINATION',
                    'ELLIPSE MINOR', 'ELLIPSE MAJOR', 'PERIMETER', 'CIRCULARITY', "ELLIPSE ASPECT RATIO"]
def scale_csv_data(file_path: str, columns_to_scale: list[str]):
  data = pd.read_csv(file_path)   
  scaler = MinMaxScaler()

  def scale_column(column):
      lists = column.apply(ast.literal_eval)
      flattened = np.concatenate(lists.values)
      scaled_flattened = scaler.fit_transform(flattened.reshape(-1, 1)).flatten()
      scaled_lists = []
      start = 0
      for lst in lists:
          end = start + len(lst)
          scaled_lists.append(scaled_flattened[start:end].tolist())
          start = end
      return [str(lst) for lst in scaled_lists]
  
  for column in columns_to_scale:
      data[column] = scale_column(data[column])

  return data

csv_path = "/Users/alexandranava/Desktop/Spores/M4581_s1/Analysis/V3/M4581_s1_Model_Data.csv"
df  = scale_csv_data(csv_path, columns_to_scale)

csv_path_test = csv_path = "/Users/alexandranava/Desktop/Spores/M4576_s2/M4576_s2_Model_Data.csv"
df_test = scale_csv_data(csv_path_test, columns_to_scale)

def calculate_exposure(num_frames, initial_exposure, time_between_exposure, time_between_frames) -> list[int]:

  frames_between_exposures = time_between_exposure/time_between_frames
  first_frame_exposure = initial_exposure/time_between_frames - 1

  print(f"first germinant exposure: {first_frame_exposure}")
  print(f"time between germinant exposures: {frames_between_exposures}")

  exposure_frames = [first_frame_exposure]

  while exposure_frames[-1] + frames_between_exposures < num_frames:
    exposure_frames.append(exposure_frames[-1] + frames_between_exposures)

  return first_frame_exposure, frames_between_exposures, exposure_frames

# function call 
first_frame_exposure, frames_between_exposures, germinant_exposures = calculate_exposure(num_frames, initial_exposure, time_between_exposure, time_between_frames)
print(f"Germinant exposures: {germinant_exposures}")

sample_germination_frames = [frame + 2 for frame in germinant_exposures]
def add_germination_index(df):
    GERMINATION: list[list] = df["GERMINATION"].apply(ast.literal_eval)
    germination_frames_list = GERMINATION.apply(lambda x: x.index(1))

    df["GERMINATION_INDEX"] = germination_frames_list
    #df["GERMINATION_INDEX"] = df["GERMINATION_INDEX"].astype(float)

add_germination_index(df)
add_germination_index(df_test)
def add_germinant_exposures(df):
  EXPOSURE_COLUMN = [] # {exposure number: spores germinated with that exposure}

  for frame_number in df["GERMINATION_INDEX"]:
    exposure_count = math.floor((frame_number - first_frame_exposure)/frames_between_exposures) + 1
    EXPOSURE_COLUMN.append(exposure_count)
  df["GERMINANT_EXPOSURES"] = EXPOSURE_COLUMN
  return df

df = add_germinant_exposures(df)
df_test = add_germinant_exposures(df_test)


### Plot three features, with a fourth as color

In [None]:
%matplotlib widget

from skspatial.objects import Points
from skspatial.objects import Plane
from skspatial.plotting import plot_3d
from sklearn.metrics import mean_squared_error



def size_stats_relationship(df, input_features, frames_considered):
  ellipse_minor = []
  area = []
  perimeter = []
  intensity = []
  germination = []
  exposure_until_germination = []

  for spore_i in range(len(df)):
    #exposure_until_germination += [exposure] * len(ast.literal_eval(df["GERMINATION"].iloc[spore_i]))
    germination += ast.literal_eval(df["GERMINATION"].iloc[spore_i])[:frames_considered]
    ellipse_minor += ast.literal_eval(df["ELLIPSE MINOR"].iloc[spore_i])[:frames_considered]
    area += ast.literal_eval(df["AREA"].iloc[spore_i])[:frames_considered]
    perimeter += ast.literal_eval(df["PERIMETER"].iloc[spore_i])[:frames_considered]
    intensity += ast.literal_eval(df["INTENSITY"].iloc[spore_i])[:frames_considered]

  ellipse_minor_dormant = []
  area_dormant = []
  perimeter_dormant = []
  intensity_dormant = []

  for i, germ_status in enumerate(germination):
    if germ_status < 1:
      ellipse_minor_dormant.append(ellipse_minor[i])
      area_dormant.append(area[i])
      perimeter_dormant.append(perimeter[i])
      intensity_dormant.append(intensity[i])

  points = Points(np.column_stack((ellipse_minor_dormant, area_dormant, perimeter_dormant)))
  plane = Plane.best_fit(points)
  distances = np.array([plane.distance_point(point) for point in points])

  # Calculate RMSE
  rmse = np.sqrt(mean_squared_error(np.zeros_like(distances), distances))
  print(f"RMSE of points to plane: {rmse}")
  print(plane)

  fig, ax = plot_3d(
      points.plotter(c=intensity_dormant, s=10, depthshade=False, cmap='viridis'),
      plane.plotter(alpha=0.1, lims_x=(-0.4, 0.4), lims_y=(-0.4, 0.4)),
  )
  ax.set_xlabel("Ellipse Minor")
  ax.set_ylabel("Area")
  ax.set_zlabel("Perimeter")

  cbar = plt.colorbar(ax.collections[0], ax=ax, shrink = 0.5, pad = 0.15)
  cbar.set_label("Intensity")

  plt.title(f"Physiological Features over Initial {frames_considered} Frames")
  plt.show()

frames_considered = 289
size_stats_relationship(df, ["ELLIPSE MINOR", "AREA", "PERIMETER", "INTENSITY", "GERMINATION"], frames_considered)

### Minimize ||m - (ax^2 + by^2 + cz^2)||

In [None]:
%matplotlib inline
from scipy.optimize import minimize


def func(coef, x, y, z, m):
    a = coef[0]
    b = coef[1]
    c = coef[2]
    return np.linalg.norm(m - a*x**2 + b*y**2 + c*z**2)

def fit_4d(df):
  #fitting m = ax^2 + by^2 + cz^2
  ellipse_minor_dormant = []
  area_dormant = []
  perimeter_dormant = []
  intensity_dormant = []

  for spore_i in range(len(df)):
    germination_frame = df["GERMINATION_INDEX"].iloc[spore_i]
    ellipse_minor_dormant += ast.literal_eval(df["ELLIPSE MINOR"].iloc[spore_i])[8:germination_frame]
    area_dormant += ast.literal_eval(df["AREA"].iloc[spore_i])[8:germination_frame]
    perimeter_dormant += ast.literal_eval(df["PERIMETER"].iloc[spore_i])[8:germination_frame]
    intensity_dormant += ast.literal_eval(df["INTENSITY"].iloc[spore_i])[8:germination_frame]

  x = np.array(area_dormant)
  y = np.array(perimeter_dormant)
  z = np.array(ellipse_minor_dormant)
  m = np.array(intensity_dormant)
  res = minimize(func, [1, 1, 1], args=(x, y, z, m))
  a, b, c = res.x
  return a, b, c

a, b, c = fit_4d(df)
print(f"a = {a}, b = {b}, c = {c}")




In [None]:
plt.clf()

def calculate_rmse(list1, list2):
    list1 = np.array(list1)
    list2 = np.array(list2)
    rmse = np.sqrt(np.mean((list1 - list2) ** 2))
    return rmse

def model_intensity(a,b,c, x, y, z):
  return a*x**2 + b*y**2 + c*z**2
  
def run_model(df):
 rmses = []
 residuals = []
 for spore_row in range(len(df)):

  spore_data = df.iloc[spore_row][["AREA", "PERIMETER", "ELLIPSE MINOR", "INTENSITY"]]
  germination_frame = df["GERMINATION_INDEX"].iloc[spore_row]

  spore_intensity = ast.literal_eval(spore_data["INTENSITY"])
  spore_area = ast.literal_eval(spore_data["AREA"])
  spore_perimeter = ast.literal_eval(spore_data["PERIMETER"])
  spore_minor = ast.literal_eval(spore_data["ELLIPSE MINOR"])

  pred_intensity = [-model_intensity(a,b,c,spore_area[t], spore_perimeter[t], spore_minor[t]) for t in range(len(spore_intensity))]
  # Create a figure with two subplots side by side
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))

  # Plot actual vs predicted intensity on the left subplot
  #ax1.scatter(pred_intensity[:germination_frame], spore_intensity[:germination_frame], label="Actual", color="blue")
  ax1.plot(range(germination_frame), spore_intensity[:germination_frame], label="Actual", color="blue")
  ax1.plot(range(germination_frame), pred_intensity[:germination_frame], label="Predicted", color="red")
  ax1.set_title(f"Spore {spore_row + 1} Intensity")
  ax1.set_xlabel("Time Frame")
  ax1.set_ylim(0, 0.8)
  ax1.set_ylabel("Intensity")
  ax1.legend()

  # Calculate RMSE
  rmse = calculate_rmse(pred_intensity[:germination_frame], spore_intensity[:germination_frame])
  rmses.append(rmse)

  # Plot residuals on the right subplot
  residuals = np.array(spore_intensity[:germination_frame]) - np.array(pred_intensity[:germination_frame])
  ax2.stem(range(len(residuals)), residuals, basefmt=" ", label = f"RMSE = {rmse:.2f}")
  ax2.set_ylim(-.3, .3)
  ax2.set_title(f"Spore {spore_row + 1} Residuals")
  ax2.set_xlabel("Time Frame")
  ax2.set_ylabel("Residuals")
  ax2.legend()

  # Show the combined plots
  plt.tight_layout()
  plt.show()
 return rmses 

predicted_rmses = run_model(df)
predicted_rmses_test = run_model(df_test)

In [None]:
print(f"average rmse (Train): {np.average(predicted_rmses)}")
print(f"average rmse (Test): {np.average(predicted_rmses_test)}")

rmse_data = {
    "Experiment": ["Train"] * len(predicted_rmses) + ["Test"] * len(predicted_rmses_test),
    "RMSE": np.concatenate([predicted_rmses, predicted_rmses_test])
}
rmse_df = pd.DataFrame(rmse_data)

fig, ax = plt.subplots()
sns.boxplot(data=rmse_df, x="Experiment", y="RMSE", ax=ax, hue = "Experiment", palette = "pastel")
ax.set_ylabel("RMSE (Predicted - Actual Intensity)")
ax.set_title("RMSEs of Individual Spores until Germination")
plt.show()




### Compute Area and Perimeter given Ellipse Minor and Major

In [None]:
def compute_perimeter(minor, major):
  a = major
  b = minor
  perimeter = 2*np.pi*np.sqrt((a**2 + b**2)/(2))
  return perimeter
def compute_area(minor, major):
  area = np.pi * minor * major
  return area

In [None]:
def minor_major(df):
 plt.figure(figsize=(10, 8))  # Create a new figure

 for spore_row in range(len(df)):
   
   plt.figure(figsize=(3, 2))  # Create a new figure
   spore_data = df.iloc[spore_row][["ELLIPSE MINOR", "ELLIPSE MAJOR"]]
   germination_frame = df["GERMINATION_INDEX"].iloc[spore_row]

   spore_minor = ast.literal_eval(spore_data["ELLIPSE MINOR"])
   spore_major = ast.literal_eval(spore_data["ELLIPSE MAJOR"])

  
   sns.lineplot(x = range(germination_frame), y = spore_minor[:germination_frame], label = "Minor", color = "red")
   sns.lineplot(x = range(germination_frame), y = spore_major[:germination_frame], label = "Major", color = "blue")

minor_major(df)

In [None]:
def makeup_data_yay(df):
  