In [111]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import glob
from scipy.signal import find_peaks
from scipy.signal import peak_widths

run_number = 3

# Filter out the light-adapted strips

In [112]:
trial_number = 2


# output\trial_1\081624_rois_metadata_bignine.csv
metadata_all = pd.read_csv(f"./output/trial_{trial_number}/081624_rois_metadata_bignine.csv")

metadata = metadata_all.copy()

# remove the light adapted strips. 
# 1. lower case the filename
metadata['filename'] = metadata['filename'].str.lower()
print(f"Before filtering out light-adapted images there are {len(metadata['filename'])} strips in the metadata.\n")

# 2. remove rows that don't contain 'dark'
metadata = metadata[metadata['filename'].str.contains('dark')]
print(f"After filtering, there are {len(metadata['filename'])} remaining.\n")

# get the strip numbers of the dark adapted strips
strip_filenames = metadata['strip_filename'].unique()

print(f"These are the remaining strips:")
for strip in metadata['strip_filename'].unique():
    print(f"\t{strip}")
    
# get the strips that were filtered out by comparing the strip_filenames in the metadata_all with the strip_numbers
filtered_out = metadata_all[~metadata_all['strip_filename'].isin(strip_filenames)]

print(f"These are the strips that were filtered out:")
for strip in filtered_out['strip_filename'].unique():
    print(f"\t{strip}")
    
    
# create a list of the isolated numbers from the strip filenames
strip_numbers = [int(strip.strip(".png").split('_')[-1]) for strip in strip_filenames]
print(f"\nThese are the strip numbers of the remaining strips: {strip_numbers}")

Before filtering out light-adapted images there are 9 strips in the metadata.

After filtering, there are 8 remaining.

These are the remaining strips:
	strip_106.png
	strip_134.png
	strip_135.png
	strip_161.png
	strip_176.png
	strip_187.png
	strip_229.png
	strip_232.png
These are the strips that were filtered out:
	strip_101.png

These are the strip numbers of the remaining strips: [106, 134, 135, 161, 176, 187, 229, 232]


# Filter the lumen df to only include the dark-adapted strips.

In [113]:
# load the lumen datafile with pd
lumen_data = pd.read_csv(f"./output/trial_{trial_number}/csv/lumen_{run_number}.csv")

# print the length of the df 
print(f"Before filtering out the light-adapted strips, there are {len(lumen_data)} rows in the lumen data.\n")

# get the lumen data for the strips that were not filtered out
lumen_data = lumen_data[lumen_data['strip'].isin(strip_numbers)]

print(f"After filtering, there are {len(lumen_data)} remaining.\n")

# export back to disk as a csv, dark-only
lumen_data.to_csv(f"./output/trial_{trial_number}/csv/lumen_{run_number}_dark.csv", index=False)

Before filtering out the light-adapted strips, there are 342 rows in the lumen data.

After filtering, there are 312 remaining.



# Filter the membrane data to only include the dark-adapted strips.


In [114]:
membrane_data = pd.read_csv(f"./output/trial_{trial_number}/csv/membrane_{run_number}.csv")
print(f"Before filtering out the light-adapted strips, there are {len(membrane_data)} rows in the membrane data.\n")

membrane_data = membrane_data[membrane_data['strip'].isin(strip_numbers)]
print(f"After filtering, there are {len(membrane_data)} remaining.\n")

membrane_data.to_csv(f"./output/trial_{trial_number}/csv/membrane_{run_number}_dark.csv", index=False)


Before filtering out the light-adapted strips, there are 396 rows in the membrane data.

After filtering, there are 360 remaining.



# get the threshold metadata so we can reference it

In [None]:
th_metadata = pd.read_csv(f"./output/trial_{trial_number}/threshold_metadata_{run_number}.csv", index_col=0)
th_metadata["process"] = th_metadata.index
print(th_metadata.head())

# calculate the mean width of the lumen, grouped by the process

In [None]:

# we need to get the mean width of the lumen for each image, but grouped by process. this is a groupby operation
mean_widths = lumen_data.groupby('process')['lumen_width_nm'].mean()

print(len(mean_widths))
print(mean_widths.head())

# sort the mean widths
mean_widths = mean_widths.sort_values()
expected_range = [4.5, 5]

# plot the mean widths. figsize the first number is the width, the second is the height
plt.figure(figsize=(10, 15))
mean_widths.plot(kind='barh')
plt.xlabel('Mean Lumen Width (nm)')
plt.ylabel('Process')
plt.title('Mean Lumen Width by Process')

# mark a dashed vertical line at the low and high end of the expected range
plt.axvline(expected_range[0], color='r', linestyle='--')
plt.axvline(expected_range[1], color='r', linestyle='--')

plt.show()

# Take a closer look at those in the range

In [None]:
# print the processes in mean_width that are within the range +/- 0.5 nm
# adjusted_range = [expected_range[0] - 0.5, expected_range[1] + 0.5]
adjusted_range = [expected_range[0], expected_range[1]]

adj_mean_widths = mean_widths[mean_widths.between(adjusted_range[0], adjusted_range[1])]
print(adj_mean_widths)
print(type(adj_mean_widths))

In [None]:
# get the index for the processes that are within the range
adj_processes = adj_mean_widths.index
print(adj_processes)

In [None]:
# take the index of the th_metadata and create a column
th_metadata['in_range'] = th_metadata.index.isin(adj_processes)
print(len(th_metadata))

# filter the th_metadata to only include the processes that are in the range
df = th_metadata[th_metadata['in_range']].copy()
print(len(df))

# Summarize results

In [66]:
import pandas as pd
import numpy as np

def summarize_widths(filtered_df, width_column, output_csv='width_summary.csv'):
    """
    Summarizes the widths for a given filtered DataFrame (either inner/outer membranes or lumen).
    
    Parameters:
    - filtered_df (pd.DataFrame): The DataFrame filtered to either inner/outer membranes or lumen.
    - width_column (str): The name of the column containing the widths to be summarized.
    - output_csv (str): The path to the output CSV file.
    
    Returns:
    - summary_df (pd.DataFrame): The summary DataFrame containing mean widths and standard deviations.
    """
    # Initialize an empty list to store the summary data
    summary_data = []

    # For each process, collect the width for each strip and the overall mean width
    for process in filtered_df['process'].unique():
        process_df = filtered_df[filtered_df['process'] == process]
        
        # Initialize a list to store the mean widths for each strip
        strip_mean_widths = []
        strip_width_sd = []
        
        for strip in process_df['strip'].unique():
            strip_df = process_df[process_df['strip'] == strip]
            mean_width = strip_df[width_column].mean()
            width_sd = strip_df[width_column].std()
            strip_mean_widths.append(mean_width)
            strip_width_sd.append(width_sd)
            summary_data.append([process, strip, mean_width, width_sd])
        
        # Calculate the overall mean width for the process
        overall_mean_width = np.mean(strip_mean_widths)
        overall_sd = np.mean(strip_width_sd)
        summary_data.append([process, 'overall', overall_mean_width, overall_sd])

    # Create a DataFrame from the summary data
    summary_df = pd.DataFrame(summary_data, columns=['process', 'strip', 'mean width (nm)', 'sd'])

    # Round down to 2 significant figures
    summary_df['mean width (nm)'] = summary_df['mean width (nm)'].round(2)
    summary_df['sd'] = summary_df['sd'].round(2)

    # Save the DataFrame to a CSV file
    summary_df.to_csv(output_csv, index=False)

    # Print the DataFrame (optional)
    print(summary_df)
    
    return summary_df


In [None]:

# Summarize lumen widths
lumen_summary_df = summarize_widths(lumen_data, 'lumen_width_nm', 'lumen_width_summary.csv')

# statistical test of the results
Are we in the range of the observed values from literature? 

Mean lumen width: 4.7, +/- 0.8nm (I interpret this to mean that one standard deviation is 0.8nm)

Ho: The mean lumen width of the dark-adapted strips is 4.7

H1: The mean lumen width of the dark-adapted strips is not 4.7

In [94]:
import pandas as pd
import numpy as np
from scipy import stats

def perform_stat_tests(
    filtered_df,
    width_column,
    target_values=(4.7, None),
    confidence_level=0.95,
    alpha=0.05,
    output_csv='stat_test_summary.csv'
):
    """
    Performs one-sample t-tests or Z-tests comparing the mean widths to a target mean for
    a given filtered DataFrame. Calculates confidence intervals and adds a significance 
    indicator.
    
    We don't have a good estimate of the standard deviation of the population, so we will
    use the sample standard deviation as an estimate. This will make the confidence intervals
    wider than they need to be, but it's a conservative approach.
    
    Unless we have a very good reason to believe that the population standard deviation is
    close to the sample standard deviation, we should use the t-test.

    Parameters:
    - filtered_df (pd.DataFrame): The DataFrame filtered to the relevant data.
    - width_column (str): The name of the column containing the widths to be tested.
    - target_values (tuple): A tuple containing the target mean and target standard deviation (if known).
      If target standard deviation is None, a t-test is performed. If provided, a Z-test is performed.
    - alpha (float): Significance level for the tests (default is 0.05).
    - output_csv (str): The path to the output CSV file.

    Returns:
    - summary_df (pd.DataFrame): The summary DataFrame containing test statistics, p-values, confidence intervals, and significance indicator.
    """
# Unpack target values
    target_mean, target_sd = target_values

    # Initialize an empty list to store the summary data
    summary_data = []

    # For each process, perform the statistical test for each strip and the overall data
    for process in filtered_df['process'].unique():
        process_df = filtered_df[filtered_df['process'] == process]

        # Initialize lists to store results for the overall process
        overall_widths = []

        # For each strip within the process
        for strip in process_df['strip'].unique():
            strip_df = process_df[process_df['strip'] == strip]
            widths = strip_df[width_column].dropna()
            n = len(widths)
            if n > 1:
                sample_mean = widths.mean()
                sample_sd = widths.std(ddof=1)
                standard_error = sample_sd / np.sqrt(n)

                # Calculate confidence intervals
                if target_sd is not None:
                    # Z-test
                    z_critical = stats.norm.ppf(1 - (1 - confidence_level) / 2)
                    margin_of_error = z_critical * (target_sd / np.sqrt(n))
                    ci_lower = sample_mean - margin_of_error
                    ci_upper = sample_mean + margin_of_error

                    # Perform one-sample Z-test
                    z_statistic = (sample_mean - target_mean) / (target_sd / np.sqrt(n))
                    p_value = 2 * (1 - stats.norm.cdf(abs(z_statistic)))
                    test_stat = z_statistic
                    test_type = 'Z-test'
                else:
                    # t-test
                    t_critical = stats.t.ppf(1 - (1 - confidence_level) / 2, df=n - 1)
                    margin_of_error = t_critical * standard_error
                    ci_lower = sample_mean - margin_of_error
                    ci_upper = sample_mean + margin_of_error

                    # Perform one-sample t-test
                    t_statistic = (sample_mean - target_mean) / standard_error
                    p_value = 2 * stats.t.sf(abs(t_statistic), df=n - 1)
                    test_stat = t_statistic
                    test_type = 't-test'

                # Determine significance level
                if p_value <= 0.001:
                    significance = '***'
                elif p_value <= 0.01:
                    significance = '**'
                elif p_value <= 0.05:
                    significance = '*'
                else:
                    significance = 'ns'  # Not significant

                # Append the results
                summary_data.append([
                    process, strip, n, sample_mean, sample_sd,
                    ci_lower, ci_upper,
                    test_stat, p_value, significance, test_type
                ])
            else:
                # Not enough data to perform the test
                summary_data.append([
                    process, strip, n, np.nan, np.nan,
                    np.nan, np.nan,
                    np.nan, np.nan, 'N/A', 'N/A'
                ])

            # Collect widths for overall process test
            overall_widths.extend(widths)

        # Perform test for overall process data
        overall_widths = np.array(overall_widths)
        n_overall = len(overall_widths)
        if n_overall > 1:
            overall_mean = overall_widths.mean()
            overall_sd = overall_widths.std(ddof=1)
            standard_error_overall = overall_sd / np.sqrt(n_overall)

            # Calculate confidence intervals
            if target_sd is not None:
                # Z-test
                z_critical = stats.norm.ppf(1 - (1 - confidence_level) / 2)
                margin_of_error = z_critical * (target_sd / np.sqrt(n_overall))
                ci_lower = overall_mean - margin_of_error
                ci_upper = overall_mean + margin_of_error

                # Perform one-sample Z-test
                z_statistic_overall = (overall_mean - target_mean) / (target_sd / np.sqrt(n_overall))
                p_value_overall = 2 * (1 - stats.norm.cdf(abs(z_statistic_overall)))
                test_stat_overall = z_statistic_overall
                test_type_overall = 'Z-test'
            else:
                # t-test
                t_critical = stats.t.ppf(1 - (1 - confidence_level) / 2, df=n_overall - 1)
                margin_of_error = t_critical * standard_error_overall
                ci_lower = overall_mean - margin_of_error
                ci_upper = overall_mean + margin_of_error

                # Perform one-sample t-test
                t_statistic_overall = (overall_mean - target_mean) / standard_error_overall
                p_value_overall = 2 * stats.t.sf(abs(t_statistic_overall), df=n_overall - 1)
                test_stat_overall = t_statistic_overall
                test_type_overall = 't-test'

            # Determine significance level
            if p_value_overall <= 0.001:
                significance_overall = '***'
            elif p_value_overall <= 0.01:
                significance_overall = '**'
            elif p_value_overall <= 0.05:
                significance_overall = '*'
            else:
                significance_overall = 'ns'  # Not significant

            summary_data.append([
                process, 'overall', n_overall, overall_mean, overall_sd,
                ci_lower, ci_upper,
                test_stat_overall, p_value_overall, significance_overall, test_type_overall
            ])
        else:
            summary_data.append([
                process, 'overall', n_overall, np.nan, np.nan,
                np.nan, np.nan,
                np.nan, np.nan, 'N/A', 'N/A'
            ])

    # Create a DataFrame from the summary data
    summary_df = pd.DataFrame(summary_data, columns=[
        'process', 'strip', 'n', 'mean width (nm)', 'sd',
        'CI Lower', 'CI Upper',
        'test statistic', 'p-value', 'significance', 'test type'
    ])

    # Round numerical columns to 4 decimal places
    numerical_cols = [
        'mean width (nm)', 'sd', 'CI Lower', 'CI Upper',
        'test statistic', 'p-value'
    ]
    summary_df[numerical_cols] = summary_df[numerical_cols].round(4)

    # Save the DataFrame to a CSV file
    summary_df.to_csv(output_csv, index=False)

    return summary_df

In [103]:
# load the lumen datafile, already filtered to dark-adapted strips
lumen_data = pd.read_csv(f"./output/trial_{trial_number}/csv/lumen_{run_number}_dark.csv")

# Define the target mean and standard deviation from the literature
lit_mean = 4.7 # from the literature, 4.7 nm
lit_sd = 0.8 # from the literature, +/- 0.8 nm. Not super confident in this one.

# best guess populationg standard deviation, by taking the mean of the sample standard deviations?
pop_sd = lumen_data['lumen_width_nm'].std()


In [106]:


# Perform the statistical tests
summary_df = perform_stat_tests(
    filtered_df=lumen_data,
    width_column='lumen_width_nm',
    target_values=(lit_mean, None),
    confidence_level=0.95,
    output_csv='lumen_stat_test_summary.csv'
)

pd.set_option('display.width', 1000)

# filter only to strip == 'overall'
summary_df = summary_df[summary_df['strip'] == 'overall']
print(summary_df)

# save to disk 
summary_df.to_csv(f"./output/trial_{trial_number}/lumen_stat_test_summary.csv", index=False)

         process    strip   n  mean width (nm)      sd  CI Lower  CI Upper  test statistic  p-value significance test type
8   0_otsuOffset  overall  52           4.8588  0.8549    4.6208    5.0968          1.3397   0.1863           ns    t-test
17  1_otsuOffset  overall  52           4.7328  0.8374    4.4996    4.9659          0.2822   0.7790           ns    t-test
26  2_otsuOffset  overall  52           4.8588  0.8549    4.6208    5.0968          1.3397   0.1863           ns    t-test
35  3_otsuOffset  overall  52           4.7328  0.8374    4.4996    4.9659          0.2822   0.7790           ns    t-test
44  4_otsuOffset  overall  52           4.8588  0.8549    4.6208    5.0968          1.3397   0.1863           ns    t-test
53  5_otsuOffset  overall  52           4.7328  0.8374    4.4996    4.9659          0.2822   0.7790           ns    t-test


# import the membrane df and summarize the results

In [None]:
membrane_df = pd.read_csv(f"./output/trial_{trial_number}/csv/membrane_{run_number}.csv")
membrane_df.head()

# Inner membrane widths

In [107]:
# isolate the inner membrane data
inner_membrane_df = membrane_df[membrane_df['membrane_type'] == 'inner']

# Summarize inner membrane widths
inner_summary_df = summarize_widths(inner_membrane_df, 'membrane_width_nm', 'inner_membrane_width_summary.csv')


         process    strip  mean width (nm)    sd
0   0_otsuOffset      101            11.77  0.74
1   0_otsuOffset      106            10.91  0.60
2   0_otsuOffset      134            11.38  0.17
3   0_otsuOffset      135            11.37  1.14
4   0_otsuOffset      161            11.22  0.32
5   0_otsuOffset      176            11.79  0.37
6   0_otsuOffset      187            12.66  0.30
7   0_otsuOffset      229            10.83  0.44
8   0_otsuOffset      232            11.18  0.71
9   0_otsuOffset  overall            11.46  0.53
10  1_otsuOffset      101            11.90  0.70
11  1_otsuOffset      106            11.07  0.54
12  1_otsuOffset      134            11.60  0.09
13  1_otsuOffset      135            11.42  1.17
14  1_otsuOffset      161            11.41  0.34
15  1_otsuOffset      176            11.90  0.35
16  1_otsuOffset      187            12.74  0.35
17  1_otsuOffset      229            11.01  0.34
18  1_otsuOffset      232            11.34  0.73
19  1_otsuOffset  ov

In [110]:
# expected values for the membrane width
#11.2 to 11.7 nm
lit_mean = np.mean([11.2, 11.7])
print(lit_mean)

# Perform the statistical tests
summary_df = perform_stat_tests(
    filtered_df=inner_membrane_df,
    width_column='membrane_width_nm',
    target_values=(lit_mean, None),
    confidence_level=0.95,
    output_csv='lumen_stat_test_summary.csv'
)

pd.set_option('display.width', 1000)

# filter only to strip == 'overall'
summary_df = summary_df[summary_df['strip'] == 'overall']
print(summary_df)

# save to disk 
summary_df.to_csv(f"./output/trial_{trial_number}/inner_membrane_stat_test_summary.csv", index=False)

11.45
         process    strip   n  mean width (nm)      sd  CI Lower  CI Upper  test statistic  p-value significance test type
9   0_otsuOffset  overall  48          11.3690  0.7249   11.1585   11.5795         -0.7739   0.4429           ns    t-test
19  1_otsuOffset  overall  48          11.5168  0.7012   11.3132   11.7204          0.6600   0.5125           ns    t-test
29  2_otsuOffset  overall  48          11.3690  0.7249   11.1585   11.5795         -0.7739   0.4429           ns    t-test
39  3_otsuOffset  overall  48          11.5168  0.7012   11.3132   11.7204          0.6600   0.5125           ns    t-test
49  4_otsuOffset  overall  48          11.3690  0.7249   11.1585   11.5795         -0.7739   0.4429           ns    t-test
59  5_otsuOffset  overall  48          11.5168  0.7012   11.3132   11.7204          0.6600   0.5125           ns    t-test


# outer membrane widths


In [None]:
# isolate the outer membrane data
outer_membrane_df = membrane_df[membrane_df['membrane_type'] == 'outer']

# Summarize outer membrane widths
outer_summary_df = summarize_widths(outer_membrane_df, 'membrane_width_nm', 'outer_membrane_width_summary.csv')

# Lets provide some visual feedback, showing the contours of the membranes on the original roi images.

In [47]:

def getMembraneImage(process_folder: str, strip_name: str) -> np.ndarray:
    """
    Get the membrane image for a given process.
    
    Args:
        process_folder: The folder of the process (e.g., ./output/trial_1/processed_images/process_1).
        strip_name: The name of the strip image file (e.g., 'strip_101.png').
    
    Returns:
        The membrane image as a NumPy array.
    """
    # Get the path to the membrane image
    membrane_image_path = os.path.join(process_folder, 'membrane', strip_name)
    
    # Load the membrane image
    membrane_image = cv2.imread(membrane_image_path, cv2.IMREAD_GRAYSCALE)
    
    return membrane_image

def getRawImage(roi_folder: str, strip_name: str) -> np.ndarray:
    """
    Get the raw image for a given strip.
    
    Args:
        strip_name: The name of the strip.
    
    Returns:
        The raw image as a NumPy array, converted to RGB.
        
    """
    # Get the path to the raw image
    raw_image_path = os.path.join(roi_folder, strip_name)
    
    # Load the raw image
    raw_image = cv2.imread(raw_image_path, cv2.IMREAD_GRAYSCALE)
    
    # convert to a rgb image
    raw_image = cv2.cvtColor(raw_image, cv2.COLOR_GRAY2RGB)
    
    return raw_image


In [None]:
# look at the images in the poi folder
# D:\repos\gapfinder_images\output\trial_1\poi\106_otsuOffset\membrane\strip_101.png

# get the contents of the poi folder, but only the folder names
poi_folder = f"./output/trial_{trial_number}/processed_images"
roi_folder = f"./output/trial_{trial_number}/rois"

process_folders = [f.path for f in os.scandir(poi_folder) if f.is_dir()]

# get the strip filenames from the metadata bignine
metadata = pd.read_csv(f"./output/trial_{trial_number}/081624_rois_metadata_bignine.csv")
strip_filenames = metadata['strip_filename'].unique()

# create a dict of the roi_images and their filenames
roi_images = {}

for strip in strip_filenames:
    roi_images[strip] = getRawImage(roi_folder, strip)
    
print(len(roi_images))
for key in roi_images.keys():
    print(key)

# Produce images that show the contours on the raw images
for each of the processes, we will get the membrane image, get the contours, and then draw the contours on top of the original image.
Then save that image in the process folder, in the subfolder "contours". The filename will be the strip filename.

In [None]:

def getMembraneDf(poi_folder: str, strip: str) -> pd.DataFrame:
    """
    Get the membrane data for the POI.
    
    Args:
        poi_folder: The folder of the POI (e.g., ./output/trial_1/poi/process_1).
    
    Returns:
        A DataFrame containing the membrane data.
    """
    # Get the path to the membrane data file
    membrane_data_path = os.path.join(poi_folder, 'grana_data_membrane.csv')
    print(f"Membrane data path: {membrane_data_path}")
    # Load the membrane data
    membrane_data = pd.read_csv(membrane_data_path) 
    print(f"len(membrane_data): {len(membrane_data)}")
    # isolate the numeric portion of the strip 
    strip1 = int(strip.split('_')[-1].strip('.png')) 
    print(f"Strip: {strip1}")
    
    # filter to only include the strip
    membrane_data = membrane_data[membrane_data['strip'] == strip1]
    
    return membrane_data


def drawContours(membrane_image, raw_image: np.ndarray, color: tuple = (0, 255, 0), thickness: int = 1, df: pd.DataFrame = None) -> np.ndarray:
    """
    Draw the contours of the membrane on the raw image.
    
    Args:
        membrane_image: The membrane image as a NumPy array.
        raw_image: The raw image as a NumPy array.
        color: The color of the contours.
    
    Returns:
        The raw image with the contours drawn on it.
    """
    
    # Find the contours in the membrane image. Membrane is white, background is black
    contours, _ = cv2.findContours(membrane_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Draw the contours on the raw image (-1 means all contours)
    raw_image_with_contours = cv2.drawContours(raw_image.copy(), contours, -1, color, thickness)
    
    if df is not None:
        # get the peaks from the df
        peaks = df['peaks'].values
        # for each peak, plot the peak number at the y value of the peak
        for i, peak in enumerate(peaks):
            
            # draw a dashed blue line at the peak
            cv2.line(raw_image_with_contours, (0, peak), (raw_image_with_contours.shape[1], peak), (0, 0, 255), 1, cv2.LINE_8, 0)    
        
            # add a text label for the peak, in red
            cv2.putText(raw_image_with_contours, f"{i}", (raw_image_with_contours.shape[1] - 10, peak), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
            

    return raw_image_with_contours

# Get the membrane image for the first process and strip
#first process in process_folders
process = process_folders[0]
strip = strip_filenames[0]
df = getMembraneDf(process, strip)

print(process)
membrane_image = getMembraneImage(process, strip)
print(membrane_image.shape)

# Get the raw image for the strip
raw_image = getRawImage(roi_folder, strip)
print(raw_image.shape)

# Draw the contours of the membrane on the raw image
raw_image_with_contours = drawContours(membrane_image, raw_image, color=(0, 255, 0), thickness=1)

# get the peak values for this strip and process from the membrane data
peaks = df['peaks'].values
print(peaks)

print(raw_image_with_contours.shape)
# Display the raw image with contours
plt.figure(figsize=(5, 5))

plt.imshow(raw_image_with_contours)
plt.axis('off')
plt.title('Raw Image with Contours')
plt.show()

# now do that for each process folder, for each strip name

In [None]:
from itertools import product

combinations = list(product(process_folders, strip_filenames))

for process, strip in combinations:
    membrane_image = getMembraneImage(process, strip)
    raw_image = getRawImage(roi_folder, strip)
    df = getMembraneDf(process, strip)
    raw_image_with_contours = drawContours(membrane_image, raw_image, color=(0, 255, 0), thickness=1)
    
    # create an output folder for the raw_vs_contours images
    raw_vs_contours_folder = os.path.join(process, 'raw_vs_contours')
    os.makedirs(raw_vs_contours_folder, exist_ok=True)
    raw_vs_contours_path = os.path.join(raw_vs_contours_folder, strip)
    
    # use opencv to write the image to the process_folder/contours folder
    contours_folder = os.path.join(process, 'contours')
    os.makedirs(contours_folder, exist_ok=True)
    contours_image_path = os.path.join(contours_folder, strip)

    cv2.imwrite(contours_image_path, raw_image_with_contours)
    print(f"Image written to {contours_image_path}")

    fig, axes = plt.subplots(1, 2, figsize=(8, 5))


    # Plot raw_image on the left
    axes[0].imshow(raw_image, cmap='gray')
    axes[0].axis('off')
    axes[0].set_title('Raw Image')

    # Plot raw_image_with_contours on the right
    axes[1].imshow(raw_image_with_contours)
    axes[1].axis('off')
    axes[1].set_title('Raw Image with Contours')

    if df is not None:
        # get the peaks from the df
        peaks = df['peaks'].values
        # for each peak, plot the peak number at the y value of the peak
        for i, peak in enumerate(peaks):
            # draw a dashed line at the peak
            axes[1].plot([0, raw_image_with_contours.shape[1]], [peak, peak], 'r--')
            # add a text label for the peak, in red
            axes[1].text(raw_image_with_contours.shape[1] + 1, peak, f"{i}", color='red', fontsize=12)
            
            # add the vertical line showing the peak width, in blue, at center of peak
            peak_width = df['membrane_width'].values[i]
            peak_center = raw_image.shape[1] // 2
            axes[1].plot([peak_center, peak_center], [peak - peak_width / 2, peak + peak_width / 2], 'b')
            
            # draw a horizontal line at the top and bottom of each of those peaks
            axes[1].plot([peak_center - 2.5, peak_center + 2.5], [peak - peak_width / 2, peak - peak_width / 2], 'b')
            axes[1].plot([peak_center -2.5, peak_center + 2.5], [peak + peak_width / 2, peak + peak_width / 2], 'b')
            

    plt.suptitle(f'Process: {process}, Strip: {strip}')
    plt.tight_layout()
    
    plt.savefig(raw_vs_contours_path)
    plt.show()