In [1]:

import pandas
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import glob
from scipy.signal import find_peaks
from scipy.signal import peak_widths
import pandas as pd
import os
# strip,grana_height,grana_height_nm,num_lumen,repeat_distance,repeat_distance_nm,px_per_nm,nm_per_px, scale, scale_pixels
# Include the scale and scale_pixels in the data export. 
# Also, include for each lumen and membrane: width, 
import pandas as pd
import numpy as np

def get_image_list(directory:str, image_type:str) -> list:
    filenames = glob.glob(f"{directory}/*.{image_type}")
    
    return [os.path.normpath(f) for f in filenames]

def get_processed_image(image_path, trial, process_name, invert = False):
    filename = os.path.basename(image_path)
    image =  cv2.imread(f"./output/trial_{trial}/processed_images/masks/{process_name}/{os.path.basename(image_path)}", cv2.IMREAD_GRAYSCALE)
    
    if invert:
        image = cv2.bitwise_not(image)
        
    return image

def get_subdirectories(directory):
    return [f.path for f in os.scandir(directory) if f.is_dir()]

def get_process_names(directory):
    return [os.path.basename(f) for f in get_subdirectories(directory)]

def load_images_for_given_process(image_path, trial_number, process_name):
    """
        Load images from the given filenames. Returns raw image and the processed image.
        
        The processed image is inverted so that the lumen/stroma is black and the membrane as white.
        This is to aid in the contouring process, which sees the area of the image as the area within the contour.
    """
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    try:
        p_image = cv2.imread(f"./output/trial_{trial_number}/masks/{process_name}/{os.path.basename(image_path)}", cv2.IMREAD_GRAYSCALE)
    except:
        p_image = None

    if p_image is None:
        raise ValueError(f"Processed image not found for {image_path}")

    return image, cv2.bitwise_not(p_image)

def get_original_filename(image_name, metadata_filename):
    """
        Search the metadata file for the original filename of the image that the strip was taken from, and return it.
    """
    image_metadata = pd.read_csv(metadata_filename)
    
    image_name = os.path.normpath(image_name)
    print(image_name)
    
    #output\trial_1\rois\strip_101.png
    image_name = os.path.basename(image_name)
    
    #strip_101.png
    strip_number = int(image_name.strip(".png").split("_")[1])
    
    
    # if image_metadata is none, thorw an error
    if image_metadata is None:
        raise ValueError("Image metadata is None")
    
    image_df = image_metadata[image_metadata['strip'] == strip_number].to_dict(orient='records')[0]
    return image_df["filename"]


def get_image_conversion_factors(image_name:str, conversion_df_filename: str, metadata_filename: str) -> dict:
    """ 
        Returns the dict with the nm_per_pixel and pixel_per_nm values for the given image name.
    """

    image_raw_filename = get_original_filename(image_name, metadata_filename)

    conversion_df = pd.read_csv(conversion_df_filename)
    
    conversion_df['filename'] = conversion_df['filename'].map(os.path.normpath)
    
    filename = os.path.normpath(image_raw_filename)#
    
    image_conversion_factors = conversion_df[conversion_df['filename'] == filename].to_dict(orient='records')[0]

    return {"nm_per_pixel": image_conversion_factors['nm_per_pixel'], 
            "pixel_per_nm": image_conversion_factors['pixel_per_nm'], 
            "scale": image_conversion_factors['scale'],
            "scale_pixels": image_conversion_factors['scale_pixels']}


def convert_nm_to_pixel(nm_value, nm_per_pixel):
    return nm_value / nm_per_pixel

def convert_pixel_to_nm(pixel_value, pixel_per_nm):
    return pixel_value / pixel_per_nm

def create_image_dict(image_name, metadata:dict) -> dict:
    """ retrieve the images for the given image name, trial number and process name, and return them in a dictionary """
        
    # p_image has the lumen/stroma as black and the membrane as white. We want to extract the membrane
    image, p_image = load_images_for_given_process(image_name, metadata["trial_number"], metadata["process_name"])

    # start with a black image for saving the membrane contours to
    membrane_image = np.zeros_like(p_image)    
    
    # create the contours based on the processed image
    membrane_contours = get_filtered_contours(p_image, min_area=100, max_area=np.Infinity)
    
    # draw the contours on the image
    cv2.drawContours(membrane_image, membrane_contours, -1, (255, 0, 0), -1)
    
    # invert the image so that the membrane is white and the lumen/stroma is black
    lumen_image = cv2.bitwise_not(membrane_image)

    convert_dict = get_image_conversion_factors(image_name, metadata["conversion_df_filename"], metadata["metadata_filename"])

    strip_name = os.path.basename(image_name).split(".png")[0]

    image_dict = {
        "strip_name": strip_name,
        "image_name": image_name,
        "image": image,
        "p_image": p_image,
        "lumen": lumen_image,
        "membrane": membrane_image,
        "nm_per_pixel": convert_dict["nm_per_pixel"],
        "pixel_per_nm": convert_dict["pixel_per_nm"],
        "scale": convert_dict["scale"],
        "scale_pixels": convert_dict["scale_pixels"],
    }
    
    return image_dict


def get_filtered_contours(image, min_area=0, max_area=np.Infinity, contour_method : int = cv2.RETR_EXTERNAL, contour_approximation : int = cv2.CHAIN_APPROX_SIMPLE):
    """
        Calculate the contours of the white regions of the image, then filter the results
        according to the given min and max area. Return the filtered contours.
        
    """
    
    if image is None:
        raise ValueError("Image is None")
    
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    contours, hierarchy = cv2.findContours(image, contour_method, contour_approximation)


    return [c for c in contours if min_area < cv2.contourArea(c) < max_area]

def calculate_peak_data(image_dict:dict, metadata: dict, peak_type: str = "membrane") -> dict:
    """
        Calculate the peaks and widths of the membrane histogram, and return the data in a dict.
    """
    image_name = image_dict["image_name"]
    nm_per_pixel = image_dict['nm_per_pixel']
    pixel_per_nm = image_dict['pixel_per_nm']
    input_image = image_dict[peak_type]
    chosen_height = metadata["chosen_height"]

    peak_data = {}

    histogram = np.sum(input_image, axis=1)

    peaks, _ = find_peaks(histogram)
    
    if peaks.size == 0:
        print(f"No peaks found for {image_name}")
        return None

    avg_peak_height = np.mean(histogram[peaks])
    half_height = avg_peak_height * chosen_height

    # we calculate the peak width at the half height of each peak, not at the 
    chosen_rel_height = half_height / avg_peak_height
    print(f"Chosen height: {chosen_height}, half height: {half_height}, avg peak height: {avg_peak_height}, chosen_rel_height: {chosen_rel_height}")

    peaks, _ = find_peaks(histogram, height=chosen_rel_height)

    # use a function to get width, width_height, left_ip, right ip for every individual peak
    # Take those and add them into the  lists: widths, width_heights, left_ips, right_ips
    # widths, width_heights, left_ips, right_ips = [], [], [], []
    
    # for p, peak in enumerate(peaks):
    #     # recalculate the peak width at the half height of each peak
    #     # this means we have to run peak_widths again, for each peak at the half height
    
    #     peak_width, width_height, left_ip, right_ip = peak_widths(histogram, [peak], rel_height=chosen_rel_height)
    #     print(f"Peak width: {peak_width}, width height: {width_height}, left ip: {left_ip}, right ip: {right_ip}")
        
    #     widths.append(peak_width[0])
    #     width_heights.append(width_height[0])
    #     left_ips.append(left_ip[0])
    #     right_ips.append(right_ip[0])
    widths, width_heights, left_ips, right_ips = peak_widths(histogram, peaks, rel_height=chosen_rel_height)

    peak_data["peaks"] = peaks
    peak_data["histogram"] = histogram
    peak_data["avg_peak_height"] = avg_peak_height
    peak_data["half_height"] = half_height
    peak_data["chosen_rel_height"] = chosen_rel_height
    peak_data["widths"] = widths
    peak_data["width_heights"] = width_heights
    peak_data["left_ips"] = left_ips
    peak_data["right_ips"] = right_ips
    
    return peak_data


def calculate_grana_height(membrane_data:dict) -> float:
    """
        Calculate the height of the grana stacks in px. Take the min of the left_ips and the
        max of the right_ips.
    """
        # Left ips: [24.36666667 34.17391304 48.41463415 62.39393939 77.22580645 91.818181[82]
    # Right ips: [29.16666667 43.40625    57.33333333 71.04166667 86.8        96.5       ]
    
    if membrane_data["right_ips"] is None or membrane_data["left_ips"] is None:
        print(f"No membrane data for {image_name}")
        return 0
    
    return np.max(membrane_data["right_ips"]) - np.min(membrane_data["left_ips"])


def calculate_repeat_distance(membrane_data:dict) -> float:
    """
        Calculate the distance between the membrane peaks in nm. We need to first calculate the
        float value for the center of the peaks, then calculate the difference between the peaks.
    """
    
    # todo: instead of calculating the diff from peaks, we
    # can calculate the diff from the left_ips and right_ips of the membrane. 
    # aget the center of the peaks using the left_ips and right_ips

    left_ips = membrane_data["left_ips"]
    right_ips = membrane_data["right_ips"]
    
    # zip together as pairs
    peaks = np.array(list(zip(left_ips, right_ips)))

    # the mean of each pair will get us the center of the peak. 
    center_points = np.mean(peaks, axis=1)
    
    # calculate the differences between the peaks
    repeat_distances = np.diff(center_points)
    
    # round them down to two significant figures
    repeat_distances = np.round(repeat_distances, 2)
    
    # flatten the numpy array into a list
    repeat_distances = repeat_distances.flatten()
        
    return repeat_distances

# calculate_values(membrane_data, lumen_data, image_dict, metadata)/
def calculate_grana_values(membrane_data: dict, lumen_data: dict, image_dict: dict) -> dict:
    """ 
        Take the peaks and widths of the membrane and lumen histograms, and return the grana values.
        peaks data:
            peak_data["peaks"] = peaks
            peak_data["histogram"] = histogram
            peak_data["avg_peak_height"] = avg_peak_height
            peak_data["half_height"] = half_height
            peak_data["chosen_rel_height"] = chosen_rel_height
            peak_data["widths"] = widths
            peak_data["width_heights"] = width_heights
            peak_data["left_ips"] = left_ips
            peak_data["right_ips"] = right_ips
        image_dict:
            "image_name": image_name,
            "image": image,
            "p_image": p_image,
            "lumen": lumen_image,
            "membrane": membrane_image,
            "nm_per_pixel": convert_dict["nm_per_pixel"],
            "pixel_per_nm": convert_dict["pixel_per_nm"],
            "scale": convert_dict["scale"],
            "scale_pixels": convert_dict["scale_pixels"],
    """
    # strip,grana_height,grana_height_nm,num_lumen,repeat_distance,repeat_distance_nm,px_per_nm,nm_per_px, scale, scale_pixels
    # Also, include for each lumen and membrane: width, 
    try:
        grana_values = {
            "strip": os.path.basename(image_dict["image_name"]).strip(".png").strip("strip_"),
            "image_name": image_dict["image_name"],
            # "image_dict": image_dict,
            "px_per_nm": image_dict['pixel_per_nm'],
            "nm_per_px": image_dict['nm_per_pixel'],
            "scale": image_dict['scale'],
            "scale_pixels": image_dict['scale_pixels'],
            # "membrane_data": membrane_data,
            # "lumen_data": lumen_data,
            "grana_height": calculate_grana_height(membrane_data),
            "num_lumen": len(lumen_data["peaks"]),
            "repeat_distance": calculate_repeat_distance(membrane_data),
            "lumen_width": lumen_data["widths"],
            "membrane_width": membrane_data["widths"],
            "lumen_width_heights": lumen_data['width_heights'],
            "membrane_width_heights": membrane_data['width_heights'],
        }
    except:
        print(f"Error calculating grana values for {image_dict['image_name']}")
        return None
    
    return grana_values


# def export_grana_values(grana_values: dict) -> dict:
    
#     if grana_values is None:
#         raise ValueError("grana_values is None")
    
#     return {
#         "strip": grana_values['strip_num'],
#         "grana_height" : grana_values['grana_height'],
#         "num_lumen": grana_values['num_lumen'],
#         "repeat_distance": grana_values['repeat_distance'],
#         "px_per_nm": grana_values['image_dict']['pixel_per_nm'],
#         "nm_per_px": grana_values['image_dict']['nm_per_pixel'],
#         "scale": grana_values['image_dict']['scale'],
#         "scale_pixels": grana_values['image_dict']['scale_pixels'],
#         "lumen_width": grana_values['lumen_width'],
#         "membrane_width": grana_values['membrane_width'],
#         "lumen_width_height": grana_values['lumen_data']['width_heights'],
#         "membrane_width_height": grana_values['membrane_data']['width_heights'],
#     }

def plot_histogram(grana_data: dict, metadata:dict, output_directory:str, peak_type: str = "membrane", display: bool = False):

    image_dict = grana_data["image_dict"]
    membrane_data = grana_data["membrane_data"]
    lumen_data = grana_data["lumen_data"]
    strip_name = image_dict["strip_name"]
    process_name = metadata["process_name"]
    trial_number = metadata["trial_number"]
    
    if peak_type == "membrane":
        peaks = membrane_data["peaks"]
        histogram = membrane_data["histogram"]
        half_height = membrane_data["half_height"]
        left_ips = membrane_data["left_ips"]
        right_ips = membrane_data["right_ips"]
    else:
        peaks = lumen_data["peaks"]
        histogram = lumen_data["histogram"]
        half_height = lumen_data["half_height"]
        left_ips = lumen_data["left_ips"]
        right_ips = lumen_data["right_ips"]

    ################ Plot it ################
    # plot the histogram, but make sure the background is white
    fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=150, facecolor='w')
    plt.plot(histogram)

    # plot the peaks, and then a green line dropping down
    plt.plot(peaks, histogram[peaks], "x")
    for peak in peaks:
        plt.plot([peak, peak], [0, histogram[peak]], "--g")
        
    # use the left and right interpolated points to plot the width of the peak
    for left_ip, right_ip in zip(left_ips, right_ips):
        plt.plot([left_ip, right_ip], [half_height, half_height], "-r")

    plt.title(f"{peak_type} Histogram\n{strip_name}")
    plt.savefig(f"{output_directory}/histograms/{strip_name}_{peak_type}_histogram.png")
    
    if display:
        plt.show()
    else:
        plt.close()


def explode_grana_data(df) -> (pd.DataFrame, pd.DataFrame):
        """
        Explode the grana data! We need one row per lumen_width and repeat_distance in one df (dropping the membrane_width),
        and one row per membrane_width in another df (dropping the lumen_width and repeat_distance).
        """
        
        # Split the dataframes into two separate dataframes
        membrane_df = df.drop(columns=["lumen_width", "repeat_distance", "repeat_distance_nm"])
        lumen_df = df.drop(columns=["membrane_width", "repeat_distance_nm"])
        
        # Explode the membrane_df based on 'membrane_width'
        membrane_df = membrane_df.explode("membrane_width")
        
        # Zip the lumen_width and repeat_distance columns together as a list of tuples
        lumen_df["lumen_repeat_pairs"] = list(zip(lumen_df["lumen_width"], lumen_df["repeat_distance"]))
        
        # Explode the 'lumen_repeat_pairs' column into separate rows
        lumen_df = lumen_df.explode("lumen_repeat_pairs")
        
        # After exploding, split the tuple back into two columns
        lumen_df["lumen_width"], lumen_df["repeat_distance"] = zip(*lumen_df["lumen_repeat_pairs"])
        
        # Drop the temporary 'lumen_repeat_pairs' column
        lumen_df = lumen_df.drop(columns=["lumen_repeat_pairs"])
        
        return membrane_df, lumen_df



In [2]:


selected_trial = 1
base_path = f"./output/trial_{selected_trial}"
process_names = get_process_names(f"{base_path}/masks")
roi_directory = f"{base_path}/rois"
conversion_df_filename = "./metadata/image_scale_conversion.csv"
metadata_filename = f"{base_path}/081624_rois_metadata_bignine.csv"
processed_image_path = f"{base_path}/processed_images"

print(process_names)

['0_adaptive', '0_ostuOffset', '0_simple', '100_ostuOffset', '100_tozero', '101_ostuOffset', '101_tozero', '102_ostuOffset', '102_tozero', '103_ostuOffset', '103_tozero', '104_ostuOffset', '104_tozero', '105_ostuOffset', '105_tozero', '106_ostuOffset', '106_tozero', '107_ostuOffset', '107_tozero', '108_ostuOffset', '108_tozero', '109_ostuOffset', '109_tozero', '10_adaptive', '10_ostuOffset', '10_simple', '110_ostuOffset', '110_tozero', '111_ostuOffset', '111_tozero', '112_ostuOffset', '112_tozero', '113_ostuOffset', '113_tozero', '114_ostuOffset', '114_tozero', '115_ostuOffset', '115_tozero', '116_ostuOffset', '116_tozero', '117_ostuOffset', '117_tozero', '118_ostuOffset', '118_tozero', '119_ostuOffset', '119_tozero', '11_adaptive', '11_ostuOffset', '11_simple', '120_ostuOffset', '120_tozero', '121_ostuOffset', '121_tozero', '122_ostuOffset', '122_tozero', '123_ostuOffset', '123_tozero', '124_ostuOffset', '124_tozero', '125_ostuOffset', '125_tozero', '126_ostuOffset', '126_tozero', '12

In [3]:
process = '107_ostuOffset'

metadata = {
    "trial_number": selected_trial,
    "process_name": process,
    "conversion_df_filename" : conversion_df_filename,
    "metadata_filename" : metadata_filename,
    "images": get_image_list(directory=roi_directory, image_type="png"),
    "chosen_height": 0.5,
    }

print(f"Process: {process}")

output_directory = f"{processed_image_path}/{process}"
print(f"Creating output directory {output_directory}")
os.makedirs(f"{output_directory}/histograms/", exist_ok=True)

all_grana = []
all_grana_data= []

for image_number, image_name in enumerate(metadata["images"]):
    
    print(f"Processing image {image_number + 1} of {len(metadata['images'])}")  
    
    # load the images in their dict: image, p_image, lumen_image, membrane_image    
    image_dict = create_image_dict(image_name, metadata)

    membrane_data = calculate_peak_data(image_dict, metadata, peak_type="membrane")
    lumen_data = calculate_peak_data(image_dict, metadata, peak_type="lumen")
    
    if membrane_data is None or lumen_data is None:
        print(f"Error processing peak data for process: {process} on image: {image_name}")
        continue
    
    grana_values = calculate_grana_values(membrane_data, lumen_data, image_dict)
    
    if grana_values is not None:
        all_grana_data.append(grana_values)
        all_grana.append(grana_values)
    else:
        print(f"Error processing grana values for: {image_name}")    
    

# check to see if the all_grana_data is empty
if len(all_grana_data) == 0:
    print(f"No grana data found for {process}")
else:
    print(f"Found {len(all_grana_data)} grana data items for {process}")
    


Process: 107_ostuOffset
Creating output directory ./output/trial_1/processed_images/107_ostuOffset
Processing image 1 of 9
output\trial_1\rois\strip_101.png
Chosen height: 0.5, half height: 7650.0, avg peak height: 15300.0, chosen_rel_height: 0.5
Chosen height: 0.5, half height: 7650.0, avg peak height: 15300.0, chosen_rel_height: 0.5
Processing image 2 of 9
output\trial_1\rois\strip_106.png
Chosen height: 0.5, half height: 7650.0, avg peak height: 15300.0, chosen_rel_height: 0.5
Chosen height: 0.5, half height: 7340.357142857143, avg peak height: 14680.714285714286, chosen_rel_height: 0.5
Processing image 3 of 9
output\trial_1\rois\strip_134.png
Chosen height: 0.5, half height: 7650.0, avg peak height: 15300.0, chosen_rel_height: 0.5
Chosen height: 0.5, half height: 7235.625, avg peak height: 14471.25, chosen_rel_height: 0.5
Processing image 4 of 9
output\trial_1\rois\strip_135.png
Chosen height: 0.5, half height: 6283.928571428572, avg peak height: 12567.857142857143, chosen_rel_heig

In [4]:
# create a dataframe from the grana data
grana_df = pd.DataFrame(all_grana_data)

# Create a dataframe from the grana data
grana_df = pd.DataFrame(all_grana_data)

# add a column for identifying the membrane or lumen type
grana_df["type"] = "-"

grana_df.head()

Unnamed: 0,strip,image_name,px_per_nm,nm_per_px,scale,scale_pixels,grana_height,num_lumen,repeat_distance,lumen_width,membrane_width,lumen_width_heights,membrane_width_heights,type
0,101,output\trial_1\rois\strip_101.png,0.88,1.136364,500,440,73.481481,5,"[12.04, 14.29, 14.17, 14.8, 12.42]","[3.814285714285713, 3.7044642857142875, 4.0225...","[5.9671957671957685, 10.481250000000003, 10.68...","[7650.0, 7650.0, 7650.0, 7650.0, 7650.0]","[7650.0, 7650.0, 7650.0, 7650.0, 7650.0, 7650.0]",-
1,106,output\trial_1\rois\strip_106.png,1.095,0.913242,200,219,123.356265,7,"[14.81, 17.27, 18.41, 16.92, 16.76, 17.31, 14.5]","[5.614800759013285, 5.421921921921928, 5.71076...","[6.843068875326939, 11.542130365659773, 12.158...","[7650.0, 7650.0, 7650.0, 5992.5, 7650.0, 7140....","[7650.0, 7650.0, 7650.0, 7650.0, 7650.0, 7650....",-
2,134,output\trial_1\rois\strip_134.png,1.095,0.913242,200,219,71.613354,4,"[15.07, 17.34, 17.24, 13.79]","[4.810869565217388, 4.657142857142851, 4.78596...","[7.917701863354036, 12.592857142857149, 12.778...","[7650.0, 7650.0, 7012.5, 6630.0]","[7650.0, 7650.0, 7650.0, 7650.0, 7650.0]",-
3,135,output\trial_1\rois\strip_135.png,1.095,0.913242,200,219,87.452381,6,"[16.18, 25.83, 0.0, 22.79, 15.14, 2.79]","[5.532258064516128, 5.003252032520329, 4.15056...","[8.785714285714285, 12.501075268817203, 29.150...","[7650.0, 7650.0, 2167.5, 7650.0, 11602.5, 1147...","[7650.0, 7650.0, 7650.0, 7650.0, 7650.0, 3697....",-
4,161,output\trial_1\rois\strip_161.png,0.88,1.136364,500,440,108.508282,8,"[11.59, 14.05, 13.94, 14.05, 13.07, 11.7, 13.6...","[3.579192546583851, 4.223214285714285, 4.09090...","[5.997062279670974, 10.026785714285715, 9.6233...","[7650.0, 7650.0, 7650.0, 7650.0, 7012.5, 5100....","[7650.0, 7650.0, 7650.0, 7650.0, 7650.0, 7650....",-


In [5]:

# # just the membranes 

# Split the dataframes into two separate dataframes
membrane_df = grana_df.drop(columns=["lumen_width", "repeat_distance", "lumen_width_heights"])

# set the type for each dataframe
membrane_df["type"] = "membrane"

# Add an 'index' column to track the order of each item in the 'membrane_width' list
membrane_df["index"] = membrane_df.apply(lambda row: list(range(len(row["membrane_width"]))), axis=1)

# Explode the membrane_df based on 'membrane_width' and the new 'membrane_index' column
membrane_df = membrane_df.explode(["membrane_width", "membrane_width_heights","index"])

# Print the resulting dataframe
print("Membrane DataFrame with Index:")
print(membrane_df)

# add the process as a column to the dataframe
membrane_df["process"] = process

#save it to csv
print(f"Saving membrane data to {output_directory}/grana_data_membrane.csv")
membrane_df.to_csv(f"{output_directory}/grana_data_membrane.csv", index=False)


Membrane DataFrame with Index:
   strip                         image_name  px_per_nm  nm_per_px  scale  \
0    101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
0    101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
0    101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
0    101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
0    101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
..   ...                                ...        ...        ...    ...   
8    232  output\trial_1\rois\strip_232.png       0.88   1.136364    500   
8    232  output\trial_1\rois\strip_232.png       0.88   1.136364    500   
8    232  output\trial_1\rois\strip_232.png       0.88   1.136364    500   
8    232  output\trial_1\rois\strip_232.png       0.88   1.136364    500   
8    232  output\trial_1\rois\strip_232.png       0.88   1.136364    500   

    scale_pixels  grana_height  num_lumen membrane_width

In [6]:

# # just the lumens and repeat_distance

# Drop the 'membrane_width' column as not needed here
lumen_df = grana_df.copy()
lumen_df["type"] = "lumen"

# drop membrane_width
lumen_df = lumen_df.drop(columns=["membrane_width"])

# Add an 'index' column to track the order of each item in the 'lumen_width' list before exploding
lumen_df["index"] = lumen_df.apply(lambda row: list(range(len(row["lumen_width"]))), axis=1)

# Zip the lumen_width and repeat_distance columns together as a list of tuples, including the index
lumen_df["lumen_repeat_pairs"] = lumen_df.apply(lambda row: list(zip(row["lumen_width"], row["repeat_distance"], row["index"])), axis=1)

# Explode the 'lumen_repeat_pairs' column into separate rows
lumen_df = lumen_df.explode("lumen_repeat_pairs")

# After exploding, split the tuple back into three columns (lumen_width, repeat_distance, and index)
lumen_df["lumen_width"], lumen_df["repeat_distance"], lumen_df["index"] = zip(*lumen_df["lumen_repeat_pairs"])

# Drop the temporary 'lumen_repeat_pairs' column
lumen_df = lumen_df.drop(columns=["lumen_repeat_pairs"])

# Print the resulting dataframe
print("\nLumen DataFrame:")
print(lumen_df.head())

print(f"Saving lumen data to {output_directory}/grana_data_lumen.csv")
lumen_df["process"] = process
lumen_df.to_csv(f"{output_directory}/grana_data_lumen.csv", index=False)

# # create a dataframe from the grana data
# grana_df = pd.DataFrame(all_grana_data)

# # Example: Call the function with your grana DataFrame
# membrane_df, lumen_df = explode_grana_data(grana_df)

# # save the dataframes to csv
# print(f"Saving membrane data to {output_directory}/membrane_data.csv")
# membrane_df.to_csv(f"{output_directory}/membrane_data.csv", index=False)
# lumen_df.to_csv(f"{output_directory}/lumen_data.csv", index=False)

# print(lumen_df.head())

# plot the histograms of the membrane and lumen
for i, grana_data in enumerate(all_grana):
    image_dict = grana_data["image_dict"]
    membrane_data = grana_data["membrane_data"]
    lumen_data = grana_data["lumen_data"]
    image_name = image_dict["image_name"]
    strip_name = image_dict["strip_name"]
    process_name = metadata["process_name"]
    
    print(f"Processing {image_dict['image_name']}")
    
    plot_histogram(grana_data, metadata, output_directory, peak_type="membrane")
    plot_histogram(grana_data, metadata, output_directory, peak_type="lumen")
    
    # calculate the grana height
    grana_height = calculate_grana_height(membrane_data)
    
    left_ips = membrane_data["left_ips"]
    right_ips = membrane_data["right_ips"]

    print(f"Process: {process_name}")
    print(f"{strip_name} grana height: {grana_height} px")
    print(f"Left ips: {left_ips}")
    print(f"Right ips: {right_ips}")
    



Lumen DataFrame:
  strip                         image_name  px_per_nm  nm_per_px  scale  \
0   101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
0   101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
0   101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
0   101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   
0   101  output\trial_1\rois\strip_101.png       0.88   1.136364    500   

   scale_pixels  grana_height  num_lumen  repeat_distance  lumen_width  \
0           440     73.481481          5            12.04     3.814286   
0           440     73.481481          5            14.29     3.704464   
0           440     73.481481          5            14.17     4.022523   
0           440     73.481481          5            14.80     4.447254   
0           440     73.481481          5            12.42     4.096618   

                        lumen_width_heights  \
0  [7650.0, 7650.0, 7650.0, 7650.0, 765

KeyError: 'image_dict'