In [9]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import glob
import datetime

In [10]:
import tkinter as tk
from tkinter import filedialog, messagebox

## Select folders

In [11]:
root = tk.Tk()
root.withdraw()
messagebox.showinfo("Folder Selection", "Press OK and then select the master folder (raw images)")

photos_dir_master = filedialog.askdirectory(title="Select the master folder")
photos_dir_master

'/Users/semenzalab/Desktop/master folder'

In [12]:
# folder containing csv files with results from ImageJ
messagebox.showinfo("Folder Selection", "Press OK and then select the results folder (containing all csv files to be analyzed)")
results_dir = filedialog.askdirectory(title="Select the folder containing resulting csv files from ImageJ")
results_dir

'/Users/semenzalab/Desktop/master folder__bw_output/Results'

# Run cells below

In [13]:
# folder path you want to store processed results in
try:
    output_dir = os.path.join(os.path.dirname(results_dir), "Processed Results_cellnum")
    os.makedirs(output_dir)
except FileExistsError:
    ctime = datetime.datetime.now()
    output_dir = os.path.join(os.path.dirname(results_dir), 
                              f"Processed Results_cellnum_{ctime.hour}{ctime.minute}{ctime.second}")

In [14]:
images_per_figure = []
figures = [folder for folder in os.listdir(photos_dir_master) if os.path.isdir(os.path.join(photos_dir_master, folder))]

In [15]:
results = glob.glob(os.path.join(results_dir, "*.csv"))

In [16]:
mod_cellnum_dct_sq = {}
clump_dct_sq = {}
for i in range(len(figures)):
    csv_filename = os.path.join(results_dir, figures[i] + ".csv")
    cellnums = pd.read_csv(csv_filename, index_col=0)
    image_names_i = glob.glob(os.path.join(photos_dir_master + "/" + figures[i], "**", "*.tif"), recursive=True)
    image_names_i = [i.split("/")[-1] for i in image_names_i]
    cellnums_dct = {i: sum(cellnums["Label"] == f"outliers_{i}") for i in image_names_i}
    clump_dct = {i: [] for i in image_names_i}
    mod_cellnum_dct = cellnums_dct.copy()
    for image in image_names_i:
        image_data = cellnums.loc[cellnums["Label"] == f"outliers_{image}", :]
        sizes = image_data["Area"].values
        image_mean_area = np.mean(sizes)
        image_median_area = np.median(sizes)

        #p40 = np.percentile(sizes, 40)
        #p60 = np.percentile(sizes, 60)
        #image_range_area = np.mean(sizes[(sizes >= p40) & (sizes <= p60)])
        for cell in image_data.index:
            if image_data.loc[cell, "Area"] >= 1.8 * image_median_area:
                num_extra = int((image_data.loc[cell, "Area"] // image_mean_area)) - 1
                mod_cellnum_dct[image] += num_extra
                clump_dct[image].append(f"{cell}-{num_extra + 1}")
    mod_cellnum_dct_sq.update({figures[i]: mod_cellnum_dct})
    clump_dct_sq.update({figures[i]: clump_dct})

In [17]:
names = [f"{i}-{j}" for i in range(1,4) for j in range(1,4)]
names_br = list(range(1,4))

for folder in mod_cellnum_dct_sq.keys():
    cellnum_df = pd.DataFrame.from_dict(mod_cellnum_dct_sq[folder], orient="index").reset_index()
    cellnum_df.columns = ["image_name", "cell_nums"]
    
    clump_df = pd.DataFrame.from_dict(clump_dct_sq[folder], orient="index").reset_index()
    
    df = cellnum_df.copy()
    df.set_index("image_name", inplace=True)
    reform_idx = [j.lower() for j in df.index]
    df.index = reform_idx
    print(df)
    bio_groups = np.unique([j[:-7] for j in reform_idx])
    new_idx = [f"{bio_group}{name}.tif" for bio_group in bio_groups for name in names]
    new_dct = {new_id: df.loc[new_id, "cell_nums"] for new_id in new_idx}
    new_vals = np.array(list(new_dct.values())).reshape(-1,3)
    new_df = pd.DataFrame(new_vals, columns=["TR1", "TR2", "TR3"], 
                          index=[f"{bio_group}{k}" for bio_group in bio_groups for k in names_br]).\
                            reset_index()
    new_df["average"] = new_df.loc[:, ["TR1", "TR2", "TR3"]].mean(axis=1)

    with pd.ExcelWriter(f"{output_dir}/{folder}_processed.xlsx") as f:
        cellnum_df.to_excel(f, sheet_name="cell_nums")
        clump_df.to_excel(f, sheet_name="clump_data")
        new_df.to_excel(f, sheet_name="re-formatted")

        

                               cell_nums
fig.s3d-invasion-ev-re2-1.tif         94
fig.s3d-invasion-ev-re2-2.tif        107
fig.s3d-invasion-ev-re2-3.tif        105
fig.s3d-invasion-ev-re3-2.tif         69
fig.s3d-invasion-ev-re3-3.tif         57
fig.s3d-invasion-ev-re1-1.tif        147
fig.s3d-invasion-ev-re1-3.tif        132
fig.s3d-invasion-ev-re3-1.tif        102
fig.s3d-invasion-ev-re1-2.tif        144
fig.s3d-invasion-oe-re1-1.tif        345
fig.s3d-invasion-oe-re3-3.tif        190
fig.s3d-invasion-oe-re3-2.tif        236
fig.s3d-invasion-oe-re1-2.tif        305
fig.s3d-invasion-oe-re3-1.tif        226
fig.s3d-invasion-oe-re1-3.tif        278
fig.s3d-invasion-oe-re2-1.tif        253
fig.s3d-invasion-oe-re2-3.tif        233
fig.s3d-invasion-oe-re2-2.tif        360
                                          cell_nums
fig.3g-invasion-shhif1a-20%-re1-3.tif           100
fig.3g-invasion-shhif1a-20%-re3-1.tif            84
fig.3g-invasion-shhif1a-20%-re1-2.tif            92
fig.3g-invasi