In [38]:
from pathlib import Path
import shutil
import os
import pandas as pd

In [39]:
# Define your output directory (folder containing the results of the analysis in 1_image_analysis)
PARENT_FOLDER = Path("./output")

# Define your username (subfolder name under the output directory)
USERNAME = "Andrew"

# Merge parent_folder and username into a Path
results_directory = PARENT_FOLDER / USERNAME

In [40]:
def copy_csv_results(results_directory):
    """Copy all .csv files from each plate folder into a per_organoid_stats folder under results_directory"""

    # Create an empty list to store the subdirectories within results_directory
    subdirectories = []

    # Iterate over subfolders in the results_directory and add them to subdirectories list
    for subfolder in results_directory.iterdir():
        if subfolder.is_dir():
            subdirectories.append(subfolder.name)

    # Create the destination folder to copy the .csv files contained in each subdir
    try:
        csv_results_path = os.path.join(results_directory, "per_organoid_stats")
        os.makedirs(csv_results_path)
    except FileExistsError:
        print(f"Directory already exists: {csv_results_path}")

    # Iterate over each subdirectory to scan and copy .csv files contained within
    for subdir in subdirectories:
        # Build the path to each of the subdirectories
        subdirectory_path = Path(results_directory) / subdir

        # Check if the subdirectory exists
        if subdirectory_path.exists():
            # Scan for .csv files in each subdirectory
            for file_path in subdirectory_path.glob("*.csv"):
                # Copy the file to the destination subfolder
                shutil.copy2(file_path, csv_results_path)

In [41]:
def extract_summary_stats(csv_path):
    """Processes a per_organoid .csv results file counting the number of occurrences and calculate the average of each property returning a summary_stats_df"""
    # Read the .csv into a pandas DataFrame
    df = pd.read_csv(csv_path)

    # Grouping and counting occurrences
    grouped_counts = df.groupby(['well_id', 'Class Name']).size().unstack(fill_value=0)

    # Specifying the columns to calculate the average for
    columns_to_average = ['Area', 'Area_filled', 'Perimeter', 'Circularity', 'Eccentricity', 'Solidity']

    # Grouping by 'well_id' and 'Class Name' and calculating the mean
    average_values = df.groupby(['well_id', 'Class Name'])[columns_to_average].mean().reset_index()

    # Joining the counts back to the original dataframe
    df_merged = average_values.merge(grouped_counts, on='well_id', how='left')

    # Optionally, if you want to create a specific column for each count based on the class in each row
    for class_name in grouped_counts.columns:
        df_merged[class_name] = df_merged[class_name].where(df_merged['Class Name'] == class_name, 0)

    # List of columns to update with their maximum value per well_id
    columns_to_maximize = ['dead', 'differentiated', 'undifferentiated']

    # Apply transform to update each specified column with its max value per well_id
    for column in columns_to_maximize:
        df_merged[column] = df_merged.groupby('well_id')[column].transform('max')
        
    # Renaming the columns
    df_merged = df_merged.rename(columns={
        'dead': 'nr_dead',
        'differentiated': 'nr_organoids',
        'undifferentiated': 'nr_spheroids'
    })

    # Calculate the dead to total ratio
    df_merged["dead_ratio"] = df_merged["nr_dead"] / (df_merged["nr_dead"] + df_merged["nr_organoids"] + df_merged["nr_spheroids"])

    # Calculate the organoid to spheroid ratio
    df_merged["organoid_ratio"] = df_merged["nr_organoids"] / (df_merged["nr_organoids"] + df_merged["nr_spheroids"])

    # Calculate the organoid to spheroid ratio
    df_merged["spheroid_ratio"] = df_merged["nr_spheroids"] / (df_merged["nr_organoids"] + df_merged["nr_spheroids"])

    # Extract the plate_name from the csv_path
    csv_path = Path(csv_path)
    plate_name = csv_path.stem

    # Adding the new column 'plate_name' to the left of df_merged
    df_merged.insert(0, 'plate_name', plate_name)

    return df_merged

In [42]:
# Copies .csv files containing per organoid results into a per_organoid_stats folder within results_directory
copy_csv_results(results_directory)

In [43]:
csv_path = "./output/Andrew/per_organoid_stats/P1.2023-09-29-11-56-39.csv" 
summary_df = extract_summary_stats(csv_path)
summary_df

Unnamed: 0,plate_name,well_id,Class Name,Area,Area_filled,Perimeter,Circularity,Eccentricity,Solidity,nr_dead,nr_organoids,nr_spheroids,dead_ratio,organoid_ratio,spheroid_ratio
0,P1.2023-09-29-11-56-39,A01,dead,150.885246,150.918033,44.425950,0.932545,0.618440,0.970952,61,8,5,0.824324,0.615385,0.384615
1,P1.2023-09-29-11-56-39,A01,differentiated,171.250000,171.250000,49.802670,0.861291,0.749284,0.950065,61,8,5,0.824324,0.615385,0.384615
2,P1.2023-09-29-11-56-39,A01,undifferentiated,159.400000,159.400000,44.824978,0.937261,0.632854,0.962320,61,8,5,0.824324,0.615385,0.384615
3,P1.2023-09-29-11-56-39,A02,dead,131.236842,131.236842,41.108018,0.928146,0.613568,0.922080,38,19,10,0.567164,0.655172,0.344828
4,P1.2023-09-29-11-56-39,A02,differentiated,253.947368,253.947368,61.565755,0.841735,0.686854,0.942884,38,19,10,0.567164,0.655172,0.344828
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,P1.2023-09-29-11-56-39,D05,differentiated,188.000000,188.000000,49.656854,0.958097,0.661720,1.000000,10,1,17,0.357143,0.055556,0.944444
64,P1.2023-09-29-11-56-39,D05,undifferentiated,203.705882,203.705882,49.310245,0.948121,0.522036,0.980393,10,1,17,0.357143,0.055556,0.944444
65,P1.2023-09-29-11-56-39,D06,dead,124.090909,124.090909,39.131890,0.965005,0.561212,0.961805,11,1,17,0.379310,0.055556,0.944444
66,P1.2023-09-29-11-56-39,D06,differentiated,202.000000,202.000000,52.384776,0.925020,0.807886,0.990196,11,1,17,0.379310,0.055556,0.944444
