computational-cell-analytics · constantinpape · Feb 8, 2024 · Feb 5, 2024 · Feb 5, 2024 · Feb 5, 2024
diff --git a/finetuning/evaluation/.gitignore b/finetuning/evaluation/.gitignore
@@ -1 +1 @@
-figures/*
+*.png
diff --git a/finetuning/evaluation/experiments/run_updated_unetr_evaluations.py b/finetuning/evaluation/experiments/run_updated_unetr_evaluations.py
@@ -0,0 +1,173 @@
+import os
+import re
+import subprocess
+from glob import glob
+from pathlib import Path
+
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+
+CMD = "python submit_all_evaluation.py "
+CHECKPOINT_ROOT = "/scratch/usr/nimanwai/experiments/micro-sam/unetr-decoder-updates/"
+EXPERIMENT_ROOT = "/scratch/projects/nim00007/sam/experiments/new_models/test/unetr-decoder-updates"
+
+
+def run_eval_process(cmd):
+    proc = subprocess.Popen(cmd)
+    try:
+        outs, errs = proc.communicate(timeout=60)
+    except subprocess.TimeoutExpired:
+        proc.terminate()
+        outs, errs = proc.communicate()
+
+
+def run_specific_experiment(dataset_name, model_type, setup):
+    all_checkpoint_dirs = sorted(glob(os.path.join(CHECKPOINT_ROOT, f"{setup}-*")))
+    for checkpoint_dir in all_checkpoint_dirs:
+        checkpoint_path = os.path.join(checkpoint_dir, "checkpoints", model_type, "lm_generalist_sam", "best.pt")
+
+        experiment_name = checkpoint_dir.split("/")[-1]
+        experiment_folder = os.path.join(EXPERIMENT_ROOT, experiment_name, dataset_name, model_type)
+
+        cmd = CMD + f"-d {dataset_name} " + f"-m {model_type} " + "-e generalist "
+        cmd += f"--checkpoint_path {checkpoint_path} "
+        cmd += f"--experiment_path {experiment_folder}"
+        print(f"Running the command: {cmd} \n")
+        _cmd = re.split(r"\s", cmd)
+        run_eval_process(_cmd)
+
+
+def _get_plots(dataset_name, model_type):
+    experiment_dirs = sorted(glob(os.path.join(EXPERIMENT_ROOT, "*")))
+
+    # adding a fixed color palette to each experiments, for consistency in plotting the legends
+    palette = {"amg": "C0", "ais": "C1", "box": "C2", "i_b": "C3", "point": "C4", "i_p": "C5"}
+
+    fig, ax = plt.subplots(1, len(experiment_dirs), figsize=(20, 10), sharex="col", sharey="row")
+
+    for idx, _experiment_dir in enumerate(experiment_dirs):
+        all_result_paths = sorted(glob(os.path.join(_experiment_dir, dataset_name, model_type, "results", "*")))
+        res_list_per_experiment = []
+        for i, result_path in enumerate(all_result_paths):
+            # avoid using the grid-search parameters' files
+            _tmp_check = os.path.split(result_path)[-1]
+            if _tmp_check.startswith("grid_search_"):
+                continue
+
+            res = pd.read_csv(result_path)
+            setting_name = Path(result_path).stem
+            if setting_name == "amg" or setting_name.startswith("instance"):  # saving results from amg or ais
+                res_df = pd.DataFrame(
+                    {
+                        "name": model_type,
+                        "type": Path(result_path).stem if len(setting_name) == 3 else "ais",
+                        "results": res.iloc[0]["msa"]
+                    }, index=[i]
+                )
+            else:  # saving results from iterative prompting
+                prompt_name = Path(result_path).stem.split("_")[-1]
+                res_df = pd.concat(
+                    [
+                        pd.DataFrame(
+                            {
+                                "name": model_type,
+                                "type": prompt_name,
+                                "results": res.iloc[0]["msa"]
+                            }, index=[i]
+                        ),
+                        pd.DataFrame(
+                            {
+                                "name": model_type,
+                                "type": f"i_{prompt_name[0]}",
+                                "results": res.iloc[-1]["msa"]
+                            }, index=[i]
+                        )
+                    ]
+                )
+            res_list_per_experiment.append(res_df)
+
+        res_df_per_experiment = pd.concat(res_list_per_experiment, ignore_index=True)
+
+        container = sns.barplot(
+            x="name", y="results", hue="type", data=res_df_per_experiment, ax=ax[idx], palette=palette
+        )
+        ax[idx].set(xlabel="Experiments", ylabel="Segmentation Quality")
+        ax[idx].legend(title="Settings", bbox_to_anchor=(1, 1))
+
+        # adding the numbers over the barplots
+        for j in container.containers:
+            container.bar_label(j, fmt='%.2f')
+
+        # titles for each subplot
+        ax[idx].title.set_text(_experiment_dir.split("/")[-1])
+
+    # here, we remove the legends for each subplot, and get one common legend for all
+    all_lines, all_labels = [], []
+    for ax in fig.axes:
+        lines, labels = ax.get_legend_handles_labels()
+        for line, label in zip(lines, labels):
+            if label not in all_labels:
+                all_lines.append(line)
+                all_labels.append(label)
+        ax.get_legend().remove()
+
+    fig.legend(all_lines, all_labels)
+    plt.show()
+    plt.tight_layout()
+    plt.subplots_adjust(top=0.90, right=0.95)
+    fig.suptitle(dataset_name, fontsize=20)
+
+    save_path = f"figures/{dataset_name}/{model_type}.png"
+
+    try:
+        plt.savefig(save_path)
+    except FileNotFoundError:
+        os.makedirs(os.path.split(save_path)[0], exist_ok=True)
+        plt.savefig(save_path)
+
+    plt.close()
+    print(f"Plot saved at {save_path}")
+
+
+def run_one_setup(all_dataset_list, all_model_list, setup):
+    for dataset_name in all_dataset_list:
+        for model_type in all_model_list:
+            run_specific_experiment(dataset_name=dataset_name, model_type=model_type, setup=setup)
+            breakpoint()
+
+
+def for_all_lm(setup):
+    assert setup in ["conv-transpose", "bilinear"]
+
+    # let's run for in-domain
+    run_one_setup(
+        all_dataset_list=["tissuenet", "deepbacs", "plantseg/root", "livecell", "neurips-cell-seg"],
+        all_model_list=["vit_t", "vit_b", "vit_l", "vit_h"],
+        setup=setup
+    )
+
+
+def _run_evaluations():
+    os.chdir("../")
+    # for_all_lm("conv-transpose")
+    for_all_lm("bilinear")
+
+
+def _get_all_plots():
+    all_datasets = ["tissuenet", "deepbacs", "plantseg/root", "livecell", "neurips-cell-seg"]
+    all_models = ["vit_t", "vit_b", "vit_l", "vit_h"]
+
+    for dataset_name in all_datasets:
+        for model_type in all_models:
+            _get_plots(dataset_name, model_type)
+
+
+def main():
+    # _run_evaluations()
+    _get_all_plots()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/finetuning/evaluation/preprocess_datasets.py b/finetuning/evaluation/preprocess_datasets.py
@@ -752,8 +752,8 @@ def neurips_raw_trafo(raw):
 def for_deepbacs(save_dir):
     "Move the datasets from the internal split (provided by default in deepbacs) to our `slices` logic"
     for split in ["val", "test"]:
-        image_paths = os.path.join(ROOT, "deepbacs", "mixed", split, "source", "*")
-        label_paths = os.path.join(ROOT, "deepbacs", "mixed", split, "target", "*")
+        image_paths = sorted(glob(os.path.join(ROOT, "deepbacs", "mixed", split, "source", "*")))
+        label_paths = sorted(glob(os.path.join(ROOT, "deepbacs", "mixed", split, "target", "*")))
 
         os.makedirs(os.path.join(save_dir, split, "raw"), exist_ok=True)
         os.makedirs(os.path.join(save_dir, split, "labels"), exist_ok=True)
@@ -801,7 +801,7 @@ def main():
     # let's ensure all the data is downloaded
     download_all_datasets(ROOT)
 
-    # now let's save the slices as tif 
+    # now let's save the slices as tif
     preprocess_lm_datasets()
     preprocess_em_datasets()
 

diff --git a/finetuning/evaluation/run_all_evaluations.py b/finetuning/evaluation/run_all_evaluations.py
@@ -31,15 +31,15 @@ def run_one_setup(all_dataset_list, all_model_list, all_experiment_set_list, roi
 def for_all_lm():
     # let's run for in-domain
     run_one_setup(
-        all_dataset_list=["tissuenet", "deepbacs", "plantseg_root", "livecell"],
+        all_dataset_list=["tissuenet", "deepbacs", "plantseg/root", "livecell"],
         all_model_list=["vit_b", "vit_h"],
         all_experiment_set_list=["vanilla", "generalist", "specialist"],
         roi="lm"
     )
 
     # next, let's run for out-of-domain
     run_one_setup(
-        all_dataset_list=["covid_if", "plantseg_ovules", "hpa", "lizard", "mouse-embryo", "ctc", "neurips-cell-seg"],
+        all_dataset_list=["covid_if", "plantseg/ovules", "hpa", "lizard", "mouse-embryo", "ctc", "neurips-cell-seg"],
         all_model_list=["vit_b", "vit_h"],
         all_experiment_set_list=["vanilla", "generalist"],
         roi="lm"

diff --git a/finetuning/evaluation/submit_all_evaluation.py b/finetuning/evaluation/submit_all_evaluation.py
@@ -17,6 +17,7 @@ def write_batch_script(
 #SBATCH -p grete:shared
 #SBATCH -G A100:1
 #SBATCH -A gzz0001
+#SBATCH --constraint=80gb
 #SBATCH --job-name={inference_setup}
 
 source ~/.bashrc
@@ -130,9 +131,16 @@ def submit_slurm(args):
         all_setups = ["precompute_embeddings", "evaluate_amg", "iterative_prompting"]
     else:
         all_setups = ["precompute_embeddings", "evaluate_amg", "evaluate_instance_segmentation", "iterative_prompting"]
+
+    # env name
+    if model_type == "vit_t":
+        env_name = "mobilesam"
+    else:
+        env_name = "sam"
+
     for current_setup in all_setups:
         write_batch_script(
-            env_name="sam",
+            env_name=env_name,
             out_path=get_batch_script_names(tmp_folder),
             inference_setup=current_setup,
             checkpoint=checkpoint,

diff --git a/finetuning/evaluation/util.py b/finetuning/evaluation/util.py
@@ -86,10 +86,11 @@ def get_model(model_type, ckpt):
 
 
 def get_paths(dataset_name, split):
-    assert dataset_name in DATASETS
+    assert dataset_name in DATASETS, dataset_name
 
     if dataset_name == "livecell":
-        return _get_livecell_paths(input_folder=os.path.join(ROOT, "livecell"), split=split)
+        image_paths, gt_paths = _get_livecell_paths(input_folder=os.path.join(ROOT, "livecell"), split=split)
+        return sorted(image_paths), sorted(gt_paths)
 
     image_dir, gt_dir = get_dataset_paths(dataset_name, split)
     image_paths = sorted(glob(os.path.join(image_dir)))

diff --git a/finetuning/generalists/training/light_microscopy/obtain_lm_datasets.py b/finetuning/generalists/training/light_microscopy/obtain_lm_datasets.py
@@ -44,9 +44,8 @@ def get_concat_lm_datasets(input_path, patch_shape, split_choice):
             n_samples=1000 if split_choice == "train" else 100
         ),
         datasets.get_livecell_dataset(
-            path=os.path.join(input_path, "livecell"), split=split_choice, patch_shape=patch_shape,
-            label_transform=label_transform, sampler=sampler, label_dtype=label_dtype, raw_transform=identity,
-            n_samples=1000 if split_choice == "train" else 100, download=True
+            path=os.path.join(input_path, "livecell"), split=split_choice, patch_shape=patch_shape, download=True,
+            label_transform=label_transform, sampler=sampler, label_dtype=label_dtype, raw_transform=identity
         ),
         datasets.get_deepbacs_dataset(
             path=os.path.join(input_path, "deepbacs"), split=split_choice, patch_shape=patch_shape,

diff --git a/finetuning/generalists/training/light_microscopy/train_lm_generalist.py b/finetuning/generalists/training/light_microscopy/train_lm_generalist.py
@@ -41,7 +41,8 @@ def finetune_lm_generalist(args):
         use_sam_stats=True,
         final_activation="Sigmoid",
         use_skip_connection=False,
-        resize_input=True
+        resize_input=True,
+        use_conv_transpose=True
     )
     unetr.to(device)