In [None]:
from pathlib import Path
import re
import tifffile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
# Define the model_name and hence the class predictions you want to explore
model_name = 'model_gfp_dapi_bcat_lsd1_epoch_100'

# Define the directories containing your training/validation images and annotated ground truth pairs
train_img_directory = Path("./train_validation_data/train_data")
test_img_directory = Path("./test_data")

prediction_tiff_directory = f"prediction_tiffs/{model_name}"

# Create a list containing all file paths .czi input images (train and test)
f_images = sorted(train_img_directory.glob("*.czi")) + sorted(test_img_directory.glob("*.czi"))

# Add the filepaths to the predicted class .tiffs to a list
f_classes = sorted(Path(prediction_tiff_directory).glob("*.tiff"))

In [None]:
# Extract sample_ids and create a mapping from sample_id to class prediction and image file paths
sample_id_to_files = {}

for filepath in f_classes:
    filename = filepath.name
    sample_id = filename.split("_")[0]
    if sample_id not in sample_id_to_files:
        sample_id_to_files[sample_id] = {'class': None, 'image': None}
    sample_id_to_files[sample_id]['class'] = filepath

for filepath in f_images:
    filename = filepath.name
    sample_id = filename.split("_")[0]
    if sample_id not in sample_id_to_files:
        sample_id_to_files[sample_id] = {'class': None, 'image': None}
    sample_id_to_files[sample_id]['image'] = filepath

sample_ids = list(sample_id_to_files.keys())
print(f"The following sample_ids will be analyzed: {sample_ids}")

In [None]:
stats = []

for sample_id, filepaths in sample_id_to_files.items():

    # Unwrap each filepath belonging to a single sample_id
    input_image_filepath = filepaths['image']
    class_pred_filepath = filepaths['class']

    # Extract filename and sample_id from class_pred Path object
    filename = class_pred_filepath.name
    sample_id = filename.split("_")[0]

    # Extract mouse_id using regular expressions
    integer_part = re.search(r'\d+', sample_id).group()

    # Convert the extracted string to an integer
    mouse_id = int(integer_part)

    # Read the .tiff file
    predicted_classes = tifffile.imread(class_pred_filepath)

    # Extract tumor and background classes as separate arrays
    tumor_class = predicted_classes == 2
    healthy_class = predicted_classes == 1

    # Calculate the total number of pixels
    total_pixels = predicted_classes.size

    # Calculate the number of pixels occupied by tumor and healthy classes
    tumor_pixels = np.sum(tumor_class)
    healthy_pixels = np.sum(healthy_class)

    # Calculate the number of pixels occupied by the tissue (healthy + tumor)
    tissue_pixels = tumor_pixels + healthy_pixels

    # Calculate the percentage of total tissue area occupied by each class
    tumor_percentage = (tumor_pixels / tissue_pixels) * 100
    healthy_percentage = (healthy_pixels / tissue_pixels) * 100

    # Create a dictionary containing all extracted info per image
    stats_dict = {
        "sample_id": sample_id,
        "mouse_id": mouse_id,
        "tumor_perc": tumor_percentage,
        "healthy_perc": healthy_percentage
    }

    stats.append(stats_dict)

df = pd.DataFrame(stats)

df

In [None]:
mouse_id_csv_path = "./mouse_ids.csv"

df_mouse_id = pd.read_csv(mouse_id_csv_path, delimiter=",", encoding="UTF-8")

# Merge both processed_results_df and mouse_id dataframes on staining_id
merged_df = pd.merge(df, df_mouse_id, on="mouse_id")

merged_df

#TODO: Save merged_df to .csv file after extracting Lgr5-GFP, LSD1 and Bcat intensity stats

In [None]:
# Mapping genotypes to colors
colors = {'Apcf Lsd1WT': 'blue', 'Apcf Lsd1f': 'red'}
merged_df['color'] = merged_df['genotype'].map(colors)

# Creating the scatter plot
plt.figure(figsize=(10, 6))
for genotype in merged_df['genotype'].unique():
    subset = merged_df[merged_df['genotype'] == genotype]
    plt.scatter(subset['tumor_perc'], subset['healthy_perc'], c=subset['color'], label=genotype, alpha=0.6)

plt.xlabel('Tumor Percentage')
plt.ylabel('Healthy Percentage')
plt.title('Scatter Plot of Tumor and Healthy Percentages by Genotype')
plt.legend(title='Genotype')
plt.grid(True)
plt.show()

In [None]:
# Filtering the dataframe to include only the 'tumor_perc' column
tumor_df = merged_df[['sample_id', 'mouse_id', 'tumor_perc', 'genotype', 'sex', 'days_post_TAM']]

# Mapping genotypes to colors
colors = {'Apcf Lsd1WT': 'blue', 'Apcf Lsd1f': 'red'}
tumor_df['color'] = tumor_df['genotype'].map(colors)

# Creating the scatter plot
plt.figure(figsize=(10, 6))
for genotype in tumor_df['genotype'].unique():
    subset = tumor_df[tumor_df['genotype'] == genotype]
    plt.scatter(subset['sample_id'], subset['tumor_perc'], c=subset['color'], label=genotype, alpha=0.6)

# Adding labels to the plot
plt.xlabel('Sample ID')
plt.ylabel('% of SR affected by tumor')
plt.title('Tumor burden by Genotype')

# Creating a legend
handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[genotype], markersize=10) for genotype in colors]
labels = list(colors.keys())
plt.legend(handles, labels, title='Genotype')

# Displaying the plot
plt.grid(True)
plt.show()

In [None]:
# Filtering the dataframe to include only the 'healthy_perc' column
healthy_df = merged_df[['sample_id', 'mouse_id', 'healthy_perc', 'genotype', 'sex', 'days_post_TAM']]

# Mapping genotypes to colors
colors = {'Apcf Lsd1WT': 'blue', 'Apcf Lsd1f': 'red'}
healthy_df['color'] = healthy_df['genotype'].map(colors)

# Creating the scatter plot
plt.figure(figsize=(10, 6))
for genotype in healthy_df['genotype'].unique():
    subset = healthy_df[healthy_df['genotype'] == genotype]
    plt.scatter(subset['sample_id'], subset['healthy_perc'], c=subset['color'], label=genotype, alpha=0.6)

# Adding labels to the plot
plt.xlabel('Sample ID')
plt.ylabel('Healthy Percentage')
plt.title('Scatter Plot of Healthy Percentages by Genotype')

# Creating a legend
handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[genotype], markersize=10) for genotype in colors]
labels = list(colors.keys())
plt.legend(handles, labels, title='Genotype')

# Displaying the plot
plt.grid(True)
plt.show()