# ABBA cell count analysis

This notebook is the last step in the ABBA whole-brain cell counting analysis.  
It assumes you have done the following steps:
- Alignment of brain slices in ABBA, exported to a QuPath project.
- Detected cells of interest in QuPath. The detections should be exported to ```.csv``` files (one per slice) in a folder called ```results```. 
- If there are regions to exclude, you should have drawn them and exported to ```.txt``` files (one per slice) in a folder called ```regions_to_exclude```.

Run this notebook to load the cell counts and do analysis on them. 

## Before we start ...
### Set parameters

In [None]:
import os

CONFIG_FILE_NAME = "braian_config.toml"                     # assumes the file is in DATA_ROOT directory
# USE_REMOTE_DATA -> if True, it tries to read the data on the laboratory's server
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "p6", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "experiment", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "proof", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "rebased_on_mjd", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "ieg", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "cohort4iba1", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "p17iba1", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "cohort4gaba", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "p17gaba", False
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "APP_PNS/Cohort 4/GABA_cFos", True
# EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "APP_PNS/P17/cFos_Iba1", True
EXPERIMENT_DIRECTORY, USE_REMOTE_DATA = "Cariplo_NRe/IEGs Experiment", True

# ###################################### REMOTE DIRECTORIES #####################################
IS_COLLABORATION_PROJ = False
COLLABORATION_DIRECTORY = os.path.join("Mathias Schmidt", "soumnya")

# ###################################### LOCAL DIRECTORIES ######################################
# DATA_ROOT  = f"../data/experiments/sowmya/{EXPERIMENT_DIRECTORY}"
DATA_ROOT  = f"../data/experiments/{EXPERIMENT_DIRECTORY}"
PLOTS_ROOT = f"../plots/{EXPERIMENT_DIRECTORY}"

In [None]:
REMOVE_SMALL_REGIONS_FROM_SLICES = False
REMOVE_HIGH_CV_REGIONS = False
CVAR_THRESHOLD = 1

# ###################################### PLOT OPTIONS ######################################
PLOT_ALLENBRAIN_HIERARCHY = False
PLOT_ANIMALS_ROOTS = True
PLOT_COEFFICIENT_OF_VARIATION = True
PLOT_COEFFICIENT_OF_VARIATION_THRESHOLD = 1

SAVE_ANIMALS = True
SAVE_GROUPS = True

# Script's code

In [None]:
import os
import sys
from typing import List

project_path = os.path.dirname(os.path.abspath(os.getcwd()))
sys.path.append(project_path)
import BraiAn

In [None]:
if USE_REMOTE_DATA:
    DATA_ROOT, PLOTS_ROOT = BraiAn.remote_dirs(EXPERIMENT_DIRECTORY, IS_COLLABORATION_PROJ, COLLABORATION_DIRECTORY)

data_input_path = os.path.join(DATA_ROOT, "QuPath_output")
data_output_path= os.path.join(DATA_ROOT, "BraiAn_output")
config_file = os.path.join(DATA_ROOT, CONFIG_FILE_NAME)

if not(os.path.exists(data_output_path)):
    os.makedirs(data_output_path, exist_ok=True)
os.makedirs(PLOTS_ROOT, exist_ok=True)

In [None]:
import tomllib

with open(config_file, "rb") as f:
    config = tomllib.load(f)
config
# ######################################### LOAD CONFIG #########################################
EXPERIMENT_NAME = config["experiment"]["name"]

ATLAS_VERSION = config["atlas"]["version"]
BRANCHES_TO_EXCLUDE = config["atlas"]["excluded-branches"]

BRAINS_AREA_KEY = config["brains"]["area-column"]
BRAINS_TRACER_KEYS = config["brains"]["tracer-columns"]
BRAINS_OVERLAPPING_TRACERS = [[i_marker-1 for i_marker in comp["markers"]]
                              for comp in config["comparison"].values()
                              if isinstance(comp, dict) and comp["metric"] == "Overlapping" and "markers" in comp]
BRAINS_MARKERS = config["brains"]["markers"]
BRAINS_AGGREGATION_MODE = config["brains"]["slices-aggregation-mode"]   # available options are: 'sum', 'mean'/'avg', 'std', 'variation'/'cvar'

REGIONS_TO_PLOT_SELECTION_METHOD = config["comparison"]["regions-to-plot"]  # Available options are:
                                                                            #   - "summary structures"
                                                                            #   - "major divisions"
                                                                            #   - "depth <n>" where <n> is an integer of the depth desired
                                                                            #   - "structural level <n>" where <n> is an integer of the level desired
                                                                            #   - "smallest"
                                                                            # where <n> is an integer of the depth/level desired

from collections import namedtuple
GroupDirectory = namedtuple("GroupDirectory", "id name dirs")
groups = [
    GroupDirectory(
        id=int(group[len("group"):])-1,
        name=config["experiment"][group]["name"],
        dirs=config["experiment"][group]["dirs"]
    ) for group in config["experiment"] if group.startswith("group") and group[len("group"):].isdigit()
]

## The Allen Brain Atlas

We start by importing the mouse Allen Brain Atlas, in which we find information about all brain regions (their parent region and children regions in the brain hierarchy, for example).

In [None]:
# from https://help.brain-map.org/display/api/Downloading+an+Ontology%27s+Structure+Graph
# StructureGraph id=1
path_to_allen_json = os.path.join(project_path, "data", "AllenMouseBrainOntology.json")
BraiAn.cache(path_to_allen_json, "http://api.brain-map.org/api/v2/structure_graph_download/1.json")
AllenBrain = BraiAn.AllenBrainHierarchy(path_to_allen_json, BRANCHES_TO_EXCLUDE, version=ATLAS_VERSION)

#parent_region = AllenBrain.parent_region
#direct_subregions = AllenBrain.direct_subregions
#full_name = AllenBrain.full_name
#regions = AllenBrain.list_all_subregions("root", mode="depth")

In [None]:
match REGIONS_TO_PLOT_SELECTION_METHOD:
    case "summary structures":
        # selects the Summary Strucutures
        path_to_summary_structures = os.path.join(project_path, "data", "AllenSummaryStructures.csv")
        AllenBrain.select_from_csv(path_to_summary_structures)
    case "major divisions":
        AllenBrain.select_regions(BraiAn.MAJOR_DIVISIONS)
    case "smallest":
        AllenBrain.select_leaves()
    case s if s.startswith("depth"):
        n = REGIONS_TO_PLOT_SELECTION_METHOD.split(" ")[-1]
        try:
            depth = int(n)
        except Exception:
            raise Exception("Could not retrieve the <n> parameter of the 'depth' method for 'REGIONS_TO_PLOT_SELECTION_METHOD'")
        AllenBrain.select_at_depth(depth)
    case s if s.startswith("structural level"):
        n = REGIONS_TO_PLOT_SELECTION_METHOD.split(" ")[-1]
        try:
            level = int(n)
        except Exception:
            raise Exception("Could not retrieve the <n> parameter of the 'structural level' method for 'REGIONS_TO_PLOT_SELECTION_METHOD'")
        AllenBrain.select_at_structural_level(level)
    case _:
        raise Exception(f"Invalid value '{REGIONS_TO_PLOT_SELECTION_METHOD}' for REGIONS_TO_PLOT_SELECTION_METHOD")
selected_regions = AllenBrain.get_selected_regions()
print(f"You selected {len(selected_regions)} regions to plot.")

We can also visualize the hierarchy of brain regions as a network (a tree). **Note that running the above cell may take a few minutes**.

In [None]:
## Plot brain region hierarchy
## If you want to plot it, install PyDot (pydot)
if PLOT_ALLENBRAIN_HIERARCHY:
    fig = AllenBrain.plot_plotly_graph()
    fig.show()

Based on the graph above, you might want to specify the regions on which you want to do further PLS analysis:  
*Note: to see more information about the regions, hover over them with your mouse.*

- Specify a level. Analysis can only be done on one level (slice) in the brain region.

- To exclude brain regions that belong to a certain branch, add the *abbreviated* nodes at the beginning of the branches to the list above.  
Example:  
```branches_to_exclude = ["retina", "VS"]```  
means that **all the subregions that belong to the retina and the ventricular systems** are excluded from the PLS analysis.

## Load data

Now, we're ready to read the ```.csv``` files with the cell counts, and also the exclusion files (if there were regions to exclude).  
Below, you have to specify:
- ```animals_root```: Absolute path to the folder that contains the animal folders.
- ```group_1_dirs```: A list of names of the folders corresponding to animals in **Group 1** (e.g., Control group). Indeed, it is necessary to store the results in individual folders for each animal.
- ```group_2_dirs```: A list of names of the folders corresponding to animals in **Group 2** (e.g., Stress group).
- ```group_1_name```: A meaningful string for Group 1.
- ```group_2_name```: A meaningful string for Group 2.
- ```area_key```: A string of the column in the ```.csv``` files that refers to the size of a brain areatra
- ```tracer_key```: A string of the column in the ```.csv``` files that refers to the tracer number used to highlight the marker
- ```marker```: A string of the marker we would like to highlight (e.g. CFos)

Provare a modificar per ottenere densita in mm^2 (da micron)

Now, we load the Control and Stress results seperately in two pandas dataframes, and save the results.

**Note**: regions to exclude are automatically excluded.

In [None]:
import re

def fix_overlap_detection_if_old_qpscript(sliced_brain: BraiAn.SlicedBrain):
    # if sliced_brain has was computed on data collected from an old QuPath script,
    # then the number of detection of the first marker is ~wrong. It must be summed to the overlaps between marker1 and marker2
    for i in range(len(sliced_brain.markers)): # e.g. header="GABA-(cFos+GABA)"
        marker1_diff = sliced_brain.markers[i]
        # see https://regex101.com/r/LLwGIl/1
        markers = re.compile("(?P<m1>\w+)-\((?:(\w+)\+(?P=m1)|(?P=m1)\+(\w+))\)").findall(marker1_diff)
        # e.g markers=[('GABA', 'cFos', '')]
        if len(markers) == 0:
            continue
        markers = [m for m in markers[0] if len(m) != 0]
        marker1, marker2 = markers  # e.g. marker1="GABA" and marker2="cFos"
        for brain_slice in sliced_brain.slices:
            brain_slice.data[marker1_diff] += brain_slice.data[f"{marker1_diff}+{marker2}"]
            brain_slice.data.rename(columns={marker1_diff: marker1, f"{marker1_diff}+{marker2}": f"{marker1}+{marker2}"}, inplace=True)
        sliced_brain.markers[i] = marker1
        overlap_i = next(i for i in range(len(sliced_brain.markers)) if sliced_brain.markers[i] == f"{marker1_diff}+{marker2}")
        sliced_brain.markers[overlap_i] = f"{marker1}+{marker2}"
        

In [None]:
groups_slices: List[List[BraiAn.SlicedBrain]] = []

for i in range(len(groups)):
    group_slices = []
    for animal_dir in groups[i].dirs:
        if not os.path.isdir(os.path.join(data_input_path, animal_dir)):
            print(f"WARNING: could not find the directory '{animal_dir}' in '{EXPERIMENT_DIRECTORY}'. Skipping this animal.")
            continue
        sliced_brain = BraiAn.SlicedBrain(animal_dir,
                                            os.path.join(data_input_path, animal_dir),
                                            AllenBrain,
                                            BRAINS_AREA_KEY,
                                            BRAINS_TRACER_KEYS,
                                            BRAINS_MARKERS,
                                            *BRAINS_OVERLAPPING_TRACERS,
                                            area_units="µm2")
        fix_overlap_detection_if_old_qpscript(sliced_brain)
        group_slices.append(sliced_brain)
    groups_slices.append(group_slices)
    print(f"Imported all brain slices from {str(len(groups[i].dirs))} animals of {groups[i].name} group.")

In [None]:
if REMOVE_SMALL_REGIONS_FROM_SLICES:
    for group_ in groups_slices:
        for animal_ in group_:
            for s in animal_.slices:
                s._data = s.data
                # TODO: currently there is no differentiation between real markers and overlapping markers.
                # This bad workaround excludes all those markers having a '+' in the name.
                real_markers = [m for m in animal_.markers if "+" not in m]
                s.data = s.data[(s.data[real_markers] != 1).any(axis=1) & (s.data.area > 0.001)].copy(deep=True)

In [None]:
if PLOT_ANIMALS_ROOTS:
    region_name = "root"
    root_plot = BraiAn.plot_region_density(region_name, *groups_slices, width=1000, height=500)
    root_plot.show()

In [None]:
# print("N regions above threshold:", sum([(brain.data > cv_threshold).sum() for brain in cvar_brains]))
# print("N regions below threshold:", sum([(brain.data <= cv_threshold).sum() for brain in cvar_brains]))
if PLOT_COEFFICIENT_OF_VARIATION:
    cvar_plot = BraiAn.plot_cv_above_threshold(AllenBrain, *groups_slices, cv_threshold=PLOT_COEFFICIENT_OF_VARIATION_THRESHOLD, width=1000, height=500)
    cvar_plot.show()

In [None]:
r = "ENTl6a"
a = "224.3CM"
# m = "CFos"
def check_animal_region(animal_name: str, region_acronym: str, marker=None):
    try:
        sliced_brain = next(animal for group in groups_slices for animal in group if animal_name == animal.name)
    except StopIteration:
        print(f"Can't find region animal '{animal_name}'")
        return
    sliced_brain = BraiAn.merge_sliced_hemispheres(sliced_brain)
    all_slices_df = sliced_brain.concat_slices()
    slices_per_area = all_slices_df.groupby(all_slices_df.index).count().iloc[:,0]
    if region_acronym not in slices_per_area.index:
        print(f"Can't find region '{region_acronym}' for animal '{animal_name}'")
        return
    markers = sliced_brain.markers if marker is None else [marker]
    brain_avg = BraiAn.AnimalBrain.from_slices(sliced_brain, mode="avg", hemisphere_distinction=False)
    brain_std = BraiAn.AnimalBrain.from_slices(sliced_brain, mode="std", hemisphere_distinction=False)
    for m in markers:
        marker_avg = brain_avg[m]
        marker_std = brain_std[m]
        print(f"""Summary for brain region '{region_acronym}' of marker '{m}':
            - N slices: {slices_per_area[region_acronym]}
            - Mean: {marker_avg[region_acronym]:.2f} {m}/mm²),
            - S.D.: {marker_std[region_acronym]:.2f} {m}/mm²,
            - Coefficient of Variation: {marker_avg[region_acronym]}
        """)
# for a in groups[-1].dirs:
check_animal_region(a, r) #, m)

In [None]:
# if you want to see the slices in specific, run this
import pandas as pd
slices = []
try:
    sliced_brain = next(animal for group in groups_slices for animal in group if a == animal.name)
    sliced_brain = BraiAn.merge_sliced_hemispheres(sliced_brain)
    for slice in sliced_brain.slices:
        if r not in slice.markers_density.index:
            continue
        region_densities = slice.markers_density.loc[r].copy()
        region_densities.index += " density"
        region_densities.name = slice.name
        slices.append(region_densities)
except StopIteration:
    print(f"Can't find region '{r}' for animal '{a}'")
pd.concat(slices, axis=1) if len(slices) != 0 else None

In [None]:
# NOTE: brains are being written WITH Left/Right discrimination
# If you desire to save them without, call AnimalBrain with hemisphere_distinction=False

groups_sum_brains: List[List[BraiAn.AnimalBrain]] = [[BraiAn.AnimalBrain.from_slices(sliced_brain, mode=BRAINS_AGGREGATION_MODE, hemisphere_distinction=False) for sliced_brain in sliced_brain_list] for sliced_brain_list in groups_slices]
if SAVE_ANIMALS:
    for i in range(len(groups)):
        for animal in groups_sum_brains[i]:
            animal.to_csv(data_output_path)

In [None]:
if REMOVE_HIGH_CV_REGIONS:
    for i, group_slices in enumerate(groups_slices):
        for animal_brain, slices in zip(groups_sum_brains[i], group_slices):
            cvars = BraiAn.AnimalBrain.from_slices(slices, mode="cvar", hemisphere_distinction=animal_brain.is_split, min_slices=0)
            # TODO: currently there is no differentiation between real markers and overlapping markers.
            # This bad workaround excludes all those markers having a '+' in the name.
            real_markers = [m for m in cvars.markers if "+" not in m]
            cvars_data = cvars.to_pandas()
            disperse_regions = cvars_data.index[(cvars_data > CVAR_THRESHOLD)[real_markers].any(axis=1)]
            print(f"removing {len(disperse_regions)}/{len(cvars_data)} dispersive regions from '{slices.name}'")
            animal_brain.remove_region(*disperse_regions)

In [None]:
groups_density: List[BraiAn.AnimalGroup] = [BraiAn.AnimalGroup(groups[i].name, groups_sum_brains[i], metric="density", brain_onthology=AllenBrain, merge_hemispheres=True) for i in range(len(groups))]

In [None]:
if SAVE_GROUPS:
    for dgroup in groups_density:
        dgroup.to_csv(data_output_path, f"cell_counts_{dgroup.name}_density.csv", overwrite=True)

In [None]:
import copy

def make_filename(*ss: str):
    return "_".join((s.replace(' ', '_') for s in ss if s != ""))

if "comparison" in config:
    for comp in config["comparison"].values():
        if not isinstance(comp, dict):
            continue
        plots_output_dir = os.path.join(PLOTS_ROOT, comp["dir"])
        os.makedirs(plots_output_dir, exist_ok=True)
        if "groups" in comp and len(comp["groups"]) == 2:
            # group comparison
            metric = comp["metric"]
            right_i = comp["groups"][0]-1
            left_i = comp["groups"][1]-1
            right_g = BraiAn.AnimalGroup(groups[right_i].name, groups_sum_brains[right_i], metric=metric, brain_onthology=AllenBrain, merge_hemispheres=True)
            left_g = BraiAn.AnimalGroup(groups[left_i].name, groups_sum_brains[left_i], metric=metric, brain_onthology=AllenBrain, merge_hemispheres=True)
            for marker in right_g.markers:
                if "+" in marker:
                    # TODO: currently there is no differentiation between real markers and overlapping markers.
                    # This bad workaround excludes all those markers having a '+' in the name.
                    continue
                if metric.lower() == "overlapping":
                    print(f"WARNING: comparison between groups for metric='Overlapping' not implemented yet!") # needs to specify which markers to overlap
                    continue
                if not right_g.is_comparable(left_g):
                    print(f"WARNING: '{right_g}' is not comparable with '{left_g}'!")
                    continue
                right_data = copy.copy(right_g.mean[marker])
                left_data = copy.copy(left_g.mean[marker])
                filename = make_filename(metric.lower(), marker, f"{right_data.data_name}+{left_data.data_name}")
                right_data.plot(selected_regions,
                            plots_output_dir, filename, other=left_data, n=12,
                            cmin=None, cmax=None, cmap="magma_r",
                            orientation="frontal", show_text=False)
        elif "markers" in comp and len(comp["markers"]) == 2:
            # marker comparison
            metric = comp["metric"]
            for i, animals_sum in enumerate(groups_sum_brains): 
                right_m, left_m = (animals_sum[0].markers[i-1] for i in comp["markers"])
                group_name = groups[i].name
                if metric.lower() == "overlapping":
                    animals = [brain.overlap_markers(right_m, left_m) for brain in animals_sum]
                else:
                    animals = animals_sum
                animal_group = BraiAn.AnimalGroup(group_name, animals, metric=metric, brain_onthology=AllenBrain, merge_hemispheres=True)
                right_data = copy.copy(animal_group.mean[right_m])
                right_data.data_name = right_m
                left_data = copy.copy(animal_group.mean[left_m])
                left_data.data_name = left_m
                filename = make_filename(metric.lower(), group_name, f"{right_m}+{left_m}")
                right_data.plot(selected_regions,
                            plots_output_dir, filename, other=left_data, n=12,
                            cmin=None, cmax=None, cmap="magma_r",
                            orientation="frontal", show_text=False, title=f"{animal_group.name} overlapping")

In [None]:
import importlib
__imported_modules = sys.modules.copy()
for module_name, module in __imported_modules.items():
    if not module_name.startswith("BraiAn"): # and not module_name.startswith("bgheatmaps"):
        continue
    try:
        # print("reaload:", module_name)
        importlib.reload(module)
    except ModuleNotFoundError:
        continue