## Set parameters

In [None]:
EXPERIMENT_DIRECTORY = "rebased_on_mjd"             # "experiment" or "proof" or "rebased_on_ss" or "rebased_on_mjd"
USE_LOCAL_DATA = False                              # if False, it tries to read the data on the laboratory's server
# ###################################### LOCAL DIRECTORIES ######################################
DATA_ROOT  = f"../data/experiments/soumnya/{EXPERIMENT_DIRECTORY}"
PLOTS_ROOT = f"../plots/soumnya/{EXPERIMENT_DIRECTORY}/"

In [None]:
# ####################################### GENERAL OPTIONS #######################################
BRANCHES_TO_EXCLUDE = ["retina", "VS", "grv", "fiber tracts", "CB"]
NORMALIZATION = "Density"                           # call get_normalization_methods() on a AnimalGroup object to know its available normalization methods
MIN_AREA = 0.0                                      # area in mmÂ². If a region of one animal is smaller, that same region won't be considered in the PLS
                                                    # That is because the PLS only considers the brain regions that appears in every animal of the groups
REGIONS_TO_PLOT_SELECTION_METHOD = "summary structures"         # Available options are: "summary structures", "major divisions" "depth <n>", "structural level <n>"
                                                    # where <n> is an integer of the depth/level desired
SAVED_PLOT_EXTENSION = ".html"                      # '.html' for interactive plot
                                                    # '.svg' for vectorized image
                                                    # '.png'/'.jpg'/... for rasterized image

# ######################################### PLS OPTIONS #########################################
PLS_SALIENCE_THRESHOLD = 1.5                        # Only brain regions with a salience higher than plot_threshold are shown. 2 is the significance threshold.
PLS_RANK = 1
PLS_NUM_BOOTSTRAP = 5000
PLS_NUM_PERMUTATIONS = 5000
PLOT_DISTRIBUTION_OF_SINGULAR_VALUES = True

# ##################################### SALIENCE SCORE PLOT #####################################
SHOW_SALIENCE_SCORES_PLOT = True
SAVE_SALIENCE_SCORES_PLOT = True
SALIENCE_TITLE_TEXT_SIZE = 40
SALIENCE_AXIS_TEXT_SIZE = 22
SALIENCE_USE_ACRONYMS = True
SALIENCE_USE_ACRONYMS_IN_MJD = False
SALIENCE_MJD_BG_OPACITY = 0.3
SALIENCE_WIDTH = 1000
SALIENCE_BARHEIGHT = 30

# ########################################## PIE CHART ##########################################
PIE_SAVE_PLOT = True
PIE_SHOW_PLOT = False
PIE_USE_ACRONYMS = False
PIE_HOLE = 0.4                                          # a value between 0 (no hole) and 1 (just a hole, no plot)
PIE_TEXT_SIZE = 25

# ########################################## BAR PLOT ###########################################
BAR_SAVE_PLOT = True
BAR_SHOW_PLOT = False
BAR_ANIMAL_SIZE = 8
BAR_TITLE_TEXT_SIZE = 40
BAR_AXIS_TEXT_SIZE = 22
BAR_HEIGHT = 30
BAR_WIDTH = 1_500
BAR_TITLE = ""
BAR_USE_ACRONYMS = True

# ###################################### CORRELATION MATRIX #####################################
MATRIX_SAVE_PLOT = False
MATRIX_SHOW_PLOT = False
MATRIX_CELL_HEIGHT = 5
MATRIX_CELL_RATIO = 1 # 3/2
MATRIX_MIN_PLOT_HEIGHT = 500

# ######################################## CORR. NETWORK ########################################
NETWORK_P_CUTOFF = 0.05                                 # 1 if you don't want to filter by p-value
NETWORK_R_CUTOFF = 0.8
NETWORK_USE_NEGATIVE_LINKS = True
NETWORK_USE_ISOLATED_VERTICES = True

# ######################################## CHORD DIAGRAM ########################################
CHORD_SAVE_PLOT = False
CHORD_SHOW_PLOT = True
CHORD_PLOT_SIZE = 1200
CHORD_NO_BACKGROUND = False
CHORD_REGIONS_SIZE = 15
CHORD_REGIONS_FONT_SIZE = 10
CHORD_MAX_EDGE_WIDTH = 5
CHORD_USE_WEIGHTED_EDGE_WIDTHS = True
CHORD_USE_COLORSCALE_EDGES = True
CHORD_COLORSCALE = "RdBu_r"                             # see https://plotly.com/python/builtin-colorscales/
CHORD_COLORSCALE_MIN = -1
CHORD_BOTTOM_ANNOTATIONS = dict(
    annotation1 = "Dark grey nodes are regions with insufficient data to compute cross correlation",
    annotation2 = "Light grey nodes are regions with no correlation with others above the threshold",
    annotation3 = "This is the third annotation",
    # howmany annotations desired with the following format:
    # annotations<k> = "<annotation>"
)
# ###############################################################################################
from plotly.colors import DEFAULT_PLOTLY_COLORS
from collections import namedtuple
GroupInfo = namedtuple("GroupInfo", "name colour")

In [None]:
# SOUMNYA FEMALES+MALES - 2 Groups {Stress|Control}
# SHILA - 2 Groups {Control|Stress+Resilient}
group_1_info = GroupInfo(
                name="Control",
                colour=DEFAULT_PLOTLY_COLORS[4]
            )
group_2_info = GroupInfo(
                name="Stress",
                colour=DEFAULT_PLOTLY_COLORS[5]
            )
group_folder = "C-S"

In [None]:
group_1_info = GroupInfo(
                name="Control (Females)",
                colour=DEFAULT_PLOTLY_COLORS[0]
            )
group_2_info = GroupInfo(
                name="Stress (Females)",
                colour=DEFAULT_PLOTLY_COLORS[1]
            )
group_folder = "CF-SF"

In [None]:
group_1_info = GroupInfo(
                name="Control (Males)",
                colour=DEFAULT_PLOTLY_COLORS[2]
            )
group_2_info = GroupInfo(
                name="Stress (Males)",
                colour=DEFAULT_PLOTLY_COLORS[3]
            )
group_folder = "CM-SM"

## Scripts's code

In [None]:
import os
import pandas as pd
import sys

project_path = os.path.dirname(os.path.abspath(os.getcwd()))
sys.path.append(project_path)
import BraiAn

In [None]:
if not USE_LOCAL_DATA:
    match sys.platform:
        case "darwin":
            mnt_point = "/Volumes/Ricerca/"
            
        case "linux":
            mnt_point = "/run/user/1000/gvfs/smb-share:server=ich.techosp.it,share=ricerca/"
        case "win32":
            mnt_point = "\\\\ich.techosp.it\\Ricerca\\"
        case _:
            raise Exception(f"Can't find the 'Ricerca' folder in the server for '{sys.platform}' operative system. Please report the developer (Carlo)!")
    if not os.path.isdir(mnt_point):
        raise Exception(f"Could not read '{mnt_point}'. Please be sure you are connected to the server.")
    DATA_ROOT  = os.path.join(mnt_point, "Lab Matteoli", "Silva", "collaborations", "Mathias Schmidt", "soumnya", "data", EXPERIMENT_DIRECTORY)
    PLOTS_ROOT = os.path.join(mnt_point, "Lab Matteoli", "Silva", "collaborations", "Mathias Schmidt", "soumnya", "results", EXPERIMENT_DIRECTORY, "plots")

data_input_path = os.path.join(DATA_ROOT, "BraiAn_output")
data_output_path = os.path.join(data_input_path, group_folder)
plots_output_path = os.path.join(PLOTS_ROOT, group_folder)

if not(os.path.exists(data_output_path)):
    os.makedirs(data_output_path, exist_ok=True)

if not(os.path.exists(plots_output_path)):
    os.makedirs(plots_output_path, exist_ok=True)

In [None]:
# from https://help.brain-map.org/display/api/Downloading+an+Ontology%27s+Structure+Graph
# StructureGraph id=1
path_to_allen_json = os.path.join(project_path, "data", "AllenMouseBrainOntology.json")
AllenBrain = BraiAn.AllenBrainHierarchy(path_to_allen_json, BRANCHES_TO_EXCLUDE)

In [None]:
match REGIONS_TO_PLOT_SELECTION_METHOD:
    case "summary structures":
        # selects the Summary Strucutures
        path_to_summary_structures = os.path.join(project_path, "data", "AllenSummaryStructures.csv")
        AllenBrain.select_from_csv(path_to_summary_structures)
    case "major divisions":
        AllenBrain.select_regions(BraiAn.MAJOR_DIVISIONS)
    case s if s.startswith("depth"):
        n = REGIONS_TO_PLOT_SELECTION_METHOD.split(" ")[-1]
        try:
            depth = int(n)
        except Exception:
            raise Exception("Could not retrieve the <n> parameter of the 'depth' method for 'REGIONS_TO_PLOT_SELECTION_METHOD'")
        AllenBrain.select_at_depth(depth)
    case s if s.startswith("structural level"):
        n = REGIONS_TO_PLOT_SELECTION_METHOD.split(" ")[-1]
        try:
            level = int(n)
        except Exception:
            raise Exception("Could not retrieve the <n> parameter of the 'structural level' method for 'REGIONS_TO_PLOT_SELECTION_METHOD'")
        AllenBrain.select_at_structural_level(level)
    case _:
        raise Exception(f"Invalid value '{REGIONS_TO_PLOT_SELECTION_METHOD}' for REGIONS_TO_PLOT_SELECTION_METHOD")
selected_regions = AllenBrain.get_selected_regions()
print(f"You selected {len(selected_regions)} regions to do PLS analysis over.")

In [None]:
group_1 = BraiAn.AnimalGroup.from_csv(group_1_info.name, data_input_path, f"cell_counts_{group_1_info.name}.csv")
group_1.remove_smaller_subregions(MIN_AREA, selected_regions, AllenBrain)
group_2 = BraiAn.AnimalGroup.from_csv(group_2_info.name, data_input_path, f"cell_counts_{group_2_info.name}.csv")
group_2.remove_smaller_subregions(MIN_AREA, selected_regions, AllenBrain)
if not group_1.is_comparable(group_2):
    raise ImportError("Group 1 and Group 2 are not comparable!\n\
Please check that you're reading two groups that normalized on the same brain regions and on the same marker")

The data are stored in ```group_1.data``` and ```group_2.data```:

In [None]:
group_1.data

# Partial Least Squares  

The analysis done below is taken from the tutorial written by [Krishnan et al.](https://www.sciencedirect.com/science/article/pii/S1053811910010074).  
Run the 2 cells below to get started.

In [None]:
# Create a PLS object
pls = BraiAn.PLS(group_1, group_2, selected_regions, NORMALIZATION)

# Show the matrix X
pls.X

In [None]:
# Show the matrix Y
pd.get_dummies(pls.y).rename(columns={0: group_2.name, 1: group_1.name})

The two matrices printed above (X and Y) illustrate the data on which the PLS is done.  
- ```X:``` The rows in this matrix are the mice. The columns in the matrix are the regions selected for analysis. The values in the matrix are the **normalized value of marked cells: in that region relative to the whole brain.** 
The normalization methods are either:
  + Density
  + Percentage (on the total number of detected marked cells outside of excluded regions)
  + RelativeDensity
- ```Y:``` The rows in this matrix are the mice. The columns in the matrix are the 2 groups. **A value in this matrix is 1 if the mice belongs to the specified group**.

In brief, PLS analyzes the relationship (correlation) between the columns of ```X``` and ```Y```. In our specific case, there will be 2 important outputs:
- **Salience scores**: Each brain region has a salience score. A high salience scores means that the brain region explains much of the correlation between ```X``` and ```Y```.  
- **Singular values**: These are the eigenvalues of the correlation matrix $R = Y^TX$.

## Random permutations to see whether we can differentiate signal from noise. 
Here, we randomly shuffle the group to which a mouse belongs, and calculate the singular values of the permuted dataset.  
From [Krishnan et al.](https://www.sciencedirect.com/science/article/pii/S1053811910010074):  
> The set of all the (permuted) singular values provides a sampling distribution of the singular values under the null hypothesis and, therefore can be used as a null hypothesis test.

*Note: running the cell below will take a few minutes.*

In [None]:
print(f"Randomly permuting singular values {PLS_NUM_PERMUTATIONS} times...")
s,singular_values = pls.randomly_permute_singular_values(PLS_NUM_PERMUTATIONS)
# Plot distribution of singular values
if PLOT_DISTRIBUTION_OF_SINGULAR_VALUES:
    fig = BraiAn.plot_permutation(pls.s[0], singular_values, PLS_NUM_PERMUTATIONS)
    fig.show()

In [None]:
# Calculate p-value = Probability(experiment | H0)
p = (singular_values[:,0] > s[0]).sum() / PLS_NUM_PERMUTATIONS
print("p-value = "+str(p))

## Bootstrap to identify stable salience scores

Here, we use [bootstrapping](https://en.wikipedia.org/wiki/Bootstrapping_(statistics)) (= sampling of the mice in the dataset, with replacement) to get an estimate of which salience scores are stable.

From [Krishnan et al.](https://www.sciencedirect.com/science/article/pii/S1053811910010074):  
> When a vector of saliences is considered generalizable and is kept for further analysis, we need to identify its elements that are stable through resampling. In practice, the stability of an element is evaluated by dividing it by its standard error. [...] To estimate the standard errors, we create bootstrap samples which are obtained by sampling with replacement the observations in and (Efron and Tibshirani, 1986). A salience standard error is then estimated as the standard error of the saliences from a large number of these bootstrap samples (say 1000 or 10000). **The ratios are akin to a Z-score, therefore when they are larger than 2 the corresponding saliences are considered significantly stable.**

*Note: Running the cell below will take a few minutes.*

In [None]:
print(f"Bootstrapping salience scores {PLS_NUM_BOOTSTRAP} times...")
u_salience_scores,v_salience_scores = pls.bootstrap_salience_scores(PLS_RANK, PLS_NUM_BOOTSTRAP)

In [None]:
salient_regions = pls.above_threshold(PLS_SALIENCE_THRESHOLD).reset_index().rename(columns={"index":"acronym", 0: "salience_score"})

# save the salient regions in a CSV
pls_salience_threshold_str = str(PLS_SALIENCE_THRESHOLD).replace(".", "_")
salient_regions_file = f"PLS_{group_1.marker}_{NORMALIZATION}_salient_regions_above_{pls_salience_threshold_str}.csv"
BraiAn.save_csv(salient_regions, data_output_path, salient_regions_file.lower(), overwrite=True)

# save ALL the regions with salient score
pls_filename = f"PLS_{group_1.marker}_{NORMALIZATION}_salience_scores.csv"
BraiAn.save_csv(v_salience_scores.rename(columns={0:"salience_score"}), data_output_path, pls_filename.lower(), overwrite=True)

In [None]:
# Plot PLS salience scores
fig = BraiAn.plot_salient_regions(salient_regions, AllenBrain,
                                    title=f"Salient regions (|score| >= {PLS_SALIENCE_THRESHOLD})",
                                    title_size=SALIENCE_TITLE_TEXT_SIZE, axis_size=SALIENCE_AXIS_TEXT_SIZE,
                                    use_acronyms=SALIENCE_USE_ACRONYMS, use_acronyms_in_mjd=SALIENCE_USE_ACRONYMS_IN_MJD,
                                    mjd_opacity=SALIENCE_MJD_BG_OPACITY,
                                    width=SALIENCE_WIDTH, barheight=SALIENCE_BARHEIGHT)

if SAVE_SALIENCE_SCORES_PLOT:
    if not(os.path.exists(plots_output_path)):
        os.mkdir(plots_output_path)
    plot_filename = f"PLS_{group_1.marker}_{NORMALIZATION}{SAVED_PLOT_EXTENSION}".lower()
    plot_filepath = os.path.join(plots_output_path, plot_filename)
    match SAVED_PLOT_EXTENSION.lower():
        case ".html":
            fig.write_html(plot_filepath, config=dict(toImageButtonOptions=dict(format="svg")))
        case _:
            fig.write_image(plot_filepath)

if SHOW_SALIENCE_SCORES_PLOT:
    fig.show()

In [None]:
prism_data = BraiAn.as_prism_data(NORMALIZATION, group_1, group_2, AllenBrain)
prism_data = prism_data.loc[salient_regions.acronym.array]
prism_file = f"prism_{group_folder}_{group_1.marker}_{NORMALIZATION}_above_{pls_salience_threshold_str}.csv"
BraiAn.save_csv(prism_data.swaplevel(), data_output_path, prism_file.lower(), sep=",", overwrite=True)

# Plotting

In [None]:
fig = BraiAn.plot_pie(v_salience_scores.index.values, AllenBrain, use_acronyms=PIE_USE_ACRONYMS, hole=PIE_HOLE, line_width=1, text_size=PIE_TEXT_SIZE)
regions_to_plot_selection_method_str = REGIONS_TO_PLOT_SELECTION_METHOD.replace(" ", "_")

if PIE_SAVE_PLOT:
    plot_filename = f"pls_all_regions_piechart_{group_folder}_{group_1.marker}_{NORMALIZATION}_{regions_to_plot_selection_method_str}{SAVED_PLOT_EXTENSION}".lower()
    plot_filepath = os.path.join(plots_output_path, plot_filename)
    match SAVED_PLOT_EXTENSION.lower():
        case ".html":
            fig.write_html(plot_filepath, config=dict(toImageButtonOptions=dict(format="svg")))
        case _:
            fig.write_image(plot_filepath)
if PIE_SHOW_PLOT:
    fig.show()

In [None]:
fig = BraiAn.plot_pie(salient_regions.acronym.array, AllenBrain, use_acronyms=PIE_USE_ACRONYMS, hole=PIE_HOLE, line_width=1, text_size=PIE_TEXT_SIZE)
regions_to_plot_selection_method_str = REGIONS_TO_PLOT_SELECTION_METHOD.replace(" ", "_")

if PIE_SAVE_PLOT:
    plot_filename = f"pls_{pls_salience_threshold_str}_piechart_{group_folder}_{group_1.marker}_{NORMALIZATION}_{regions_to_plot_selection_method_str}{SAVED_PLOT_EXTENSION}".lower()
    plot_filepath = os.path.join(plots_output_path, plot_filename)
    match SAVED_PLOT_EXTENSION.lower():
        case ".html":
            fig.write_html(plot_filepath, config=dict(toImageButtonOptions=dict(format="svg")))
        case _:
            fig.write_image(plot_filepath)
if PIE_SHOW_PLOT:
    fig.show()

In [None]:
fig = BraiAn.plot_groups(NORMALIZATION, AllenBrain, group_1, group_2, selected_regions=salient_regions.acronym.array,
                            plot_title=BAR_TITLE, title_size=BAR_TITLE_TEXT_SIZE, axis_size=BAR_AXIS_TEXT_SIZE, animal_size=BAR_ANIMAL_SIZE,
                            use_acronyms=BAR_USE_ACRONYMS, colors=(group.colour for group in (group_1_info, group_2_info)),
                            width=BAR_WIDTH, barheight=BAR_HEIGHT, bargap=0.3, bargroupgap=0.0)

if BAR_SAVE_PLOT:
    plot_filename = f"pls_{pls_salience_threshold_str}_barplot_{group_folder}_{group_1.marker}_{NORMALIZATION}_{regions_to_plot_selection_method_str}{SAVED_PLOT_EXTENSION}".lower()
    plot_filepath = os.path.join(plots_output_path, plot_filename)
    match SAVED_PLOT_EXTENSION.lower():
        case ".html":
            fig.write_html(plot_filepath, config=dict(toImageButtonOptions=dict(format="svg")))
        case _:
            fig.write_image(plot_filepath)
if BAR_SHOW_PLOT:
    fig.show()

In [None]:
if MATRIX_SAVE_PLOT or MATRIX_SHOW_PLOT or CHORD_SAVE_PLOT or CHORD_SHOW_PLOT:
    groups_cross_correlations = []
    for group in (group_1, group_2):
        # min_animals=None because it doesn't matter. PLS already removes every region with NaNs.
        cc = BraiAn.CrossCorrelation(group, salient_regions.acronym.array, AllenBrain, NORMALIZATION, min_animals=None) 
        groups_cross_correlations.append(cc)

In [None]:
if MATRIX_SAVE_PLOT or MATRIX_SHOW_PLOT:
    for group, cc in zip((group_1, group_2), groups_cross_correlations):
        title = f"{group.name} Pearson cross correlation matrix (n = {group.n})"
        fig = cc.plot(
                title=title,
                cell_height=MATRIX_CELL_HEIGHT, min_plot_height=MATRIX_MIN_PLOT_HEIGHT,
                aspect_ratio=MATRIX_CELL_RATIO)
        if MATRIX_SAVE_PLOT:
            plot_filename = f"pls_{pls_salience_threshold_str}_correlation_matrix_filtered_{group.name}_{group.marker}_{NORMALIZATION}_{regions_to_plot_selection_method_str}{SAVED_PLOT_EXTENSION}".lower()
            plot_filepath = os.path.join(plots_output_path, plot_filename)
            match SAVED_PLOT_EXTENSION.lower():
                case ".html":
                    fig.write_html(plot_filepath, config=dict(toImageButtonOptions=dict(format="svg")))
                case _:
                    fig.write_image(plot_filepath)
        if MATRIX_SHOW_PLOT:
            fig.show()

In [None]:
if CHORD_SAVE_PLOT or CHORD_SHOW_PLOT:
    for group, cc in zip((group_1, group_2), groups_cross_correlations):
        connectome = BraiAn.FunctionalConnectome(cc, p_cutoff=NETWORK_P_CUTOFF, r_cutoff=NETWORK_R_CUTOFF,
                                         negatives=NETWORK_USE_NEGATIVE_LINKS, isolated_vertices=NETWORK_USE_ISOLATED_VERTICES, weighted=True)

        title = f"{group.name} connectomics graph from Pearson correlation (n = {group.n}, {'|r|' if NETWORK_USE_NEGATIVE_LINKS else 'r'} >= {NETWORK_R_CUTOFF}, p <= {NETWORK_P_CUTOFF})"
        group_annotations = dict(
                                subtitle="",
                                **CHORD_BOTTOM_ANNOTATIONS
                            )
        fig = BraiAn.draw_chord_plot(connectome,
                                    AllenBrain=AllenBrain,
                                    ideograms_arc_index=50,
                                    title=title,
                                    size=CHORD_PLOT_SIZE,
                                    no_background=CHORD_NO_BACKGROUND,
                                    regions_size=CHORD_REGIONS_SIZE,
                                    regions_font_size=CHORD_REGIONS_FONT_SIZE,
                                    max_edge_width=CHORD_MAX_EDGE_WIDTH,
                                    use_weighted_edge_widths=CHORD_USE_WEIGHTED_EDGE_WIDTHS,
                                    colorscale_edges=CHORD_USE_COLORSCALE_EDGES,
                                    colorscale=CHORD_COLORSCALE,
                                    colorscale_min=CHORD_COLORSCALE_MIN,
                                    **group_annotations
        )
        if CHORD_SAVE_PLOT:
            p_str = str(NETWORK_P_CUTOFF).replace(".", "_")
            r_str = str(NETWORK_R_CUTOFF).replace(".", "_")
            plot_filename = f"pls_{pls_salience_threshold_str}_chord_plot_filtered_p{p_str}_r{r_str}_{group.name}_{group.marker}_{NORMALIZATION}_{regions_to_plot_selection_method_str}{SAVED_PLOT_EXTENSION}".lower()
            plot_filepath = os.path.join(plots_output_path, plot_filename)
            match SAVED_PLOT_EXTENSION.lower():
                case ".html":
                    fig.write_html(plot_filepath, config=dict(toImageButtonOptions=dict(format="svg")))
                case _:
                    fig.write_image(plot_filepath)
        if CHORD_SHOW_PLOT:
            fig.show()

In [None]:
import importlib
importlib.reload(BraiAn.statistics)
importlib.reload(BraiAn.plot)
importlib.reload(BraiAn.plot_chord)
importlib.reload(BraiAn)