## Set parameters

In [None]:
# ####################################### GENERAL OPTIONS #######################################
data_root = "/run/user/1000/gvfs/smb-share:server=ich.techosp.it,share=ricerca/Lab Matteoli/Silva/collaborations/Mathias/soumnya/data/experiment/"
plots_root = "/run/user/1000/gvfs/smb-share:server=ich.techosp.it,share=ricerca/Lab Matteoli/Silva/collaborations/Mathias/soumnya/results/experiment/plots/"
branches_to_exclude = ["retina", "VS", "grv", "fiber tracts", "CB"]
region_to_plot_selection_method = "depth 5"         # Available options are: "summary structures", "major divisions" "depth <n>", "structural level <n>"
                                                    # where <n> is an integer of the depth/level desired
normalization = "Density"                           # call get_normalization_methods() on a AnimalGroup object to know its available normalization methods
saved_plot_extension = ".html"                      # '.html' for interactive plot
                                                    # '.svg' for vectorized image
                                                    # '.png'/'.jpg'/... for rasterized image

# ######################################### PLS OPTIONS #########################################
pls_salience_threshold = 1.2 # Only brain regions with a salience higher than plot_threshold are shown. 2 is the significance threshold.
pls_rank = 1
pls_num_bootstrap = 5000
pls_num_permutations = 5000
plot_distribution_of_singular_values = True
plot_salience_scores = True

# ########################################## BAR PLOT ###########################################
bar_height = 10_000
bar_use_acronyms = False
bar_save_plot = True
bar_show_plot = False

# ###################################### CORRELATION MATRIX #####################################
matrix_cell_height = 5
matrix_cell_ratio = 3/2
matrix_min_plot_height = 500
matrix_save_plot = True
matrix_show_plot = True

# ######################################## CHORD DIAGRAM ########################################
chord_p_cutoff = 1 # 0.05                      # 1 if you don't want to filter by p-value
chord_r_cutoff = 0.8
chord_plot_size = 1200
chord_no_background = False
chord_regions_size = 15
chord_regions_font_size = 10
chord_max_edge_width = 5
chord_use_weighted_edge_widths = True
chord_use_colorscale_edges = True
chord_save_plot = True
chord_show_plot = True
chord_bottom_annotations = dict(
    annotation1 = "Dark grey nodes are regions with insufficient data to compute cross correlation",
    annotation2 = "Light grey nodes are regions with no correlation with others above the threshold",
    annotation3 = "This is the third annotation",
    # howmany annotations desired with the following format:
    # annotations<k> = "<annotation>"
)

In [None]:
experiment = "soumnya"
group_1_name = "Control"
group_2_name = "Stress"
output_folder = "C-S"

In [None]:
experiment = "soumnya"
group_1_name = "Control (Females)"
group_2_name = "Stress (Females)"
output_folder = "CF-SF"

In [None]:
experiment = "soumnya"
group_1_name = "Control (Males)"
group_2_name = "Stress (Males)"
output_folder = "CM-SM"

## Scripts's code

In [None]:
import BraiAn
import pandas as pd
import os

In [None]:
data_input_path = os.path.join(data_root, "BraiAn_output")
data_output_path = os.path.join(data_input_path, output_folder)
plots_output_path = os.path.join(plots_root, output_folder)

if not(os.path.exists(data_output_path)):
    os.makedirs(data_output_path, exist_ok=True)

if not(os.path.exists(plots_output_path)):
    os.makedirs(plots_output_path, exist_ok=True)

In [None]:
group_1 = BraiAn.AnimalGroup.from_csv(group_1_name, data_input_path, f"cell_counts_{group_1_name}.csv")
group_2 = BraiAn.AnimalGroup.from_csv(group_2_name, data_input_path, f"cell_counts_{group_2_name}.csv")
if not group_1.is_comparable(group_2):
    raise ImportError("Group 1 and Group 2 are not comparable!\n\
Please check that you're reading two groups that normalized on the same brain regions and on the same marker")

The data are stored in ```group_1.data``` and ```group_2.data```:

In [None]:
group_1.data

In [None]:
# from https://help.brain-map.org/display/api/Downloading+an+Ontology%27s+Structure+Graph
# StructureGraph id=1
path_to_allen_json = "./data/AllenMouseBrainOntology.json"
AllenBrain = BraiAn.AllenBrainHierarchy(path_to_allen_json, branches_to_exclude)

In [None]:
if region_to_plot_selection_method == "summary structures":
    AllenBrain.select_from_csv("./data/AllenSummaryStructures.csv")
     # selects the Summary Strucutures
elif region_to_plot_selection_method == "major divisions":
    AllenBrain.select_regions(BraiAn.MAJOR_DIVISIONS)
elif region_to_plot_selection_method.startswith("depth"):
    n = region_to_plot_selection_method.split(" ")[-1]
    try:
        depth = int(n)
    except Exception:
        raise Exception("Could not retrieve the <n> parameter of the 'depth' method for 'region_to_plot_selection_method'")
    regions_to_plot = AllenBrain.select_at_depth(depth)
elif region_to_plot_selection_method.startswith("structural level"):
    n = region_to_plot_selection_method.split(" ")[-1]
    try:
        level = int(n)
    except Exception:
        raise Exception("Could not retrieve the <n> parameter of the 'structural level' method for 'region_to_plot_selection_method'")
    regions_to_plot = AllenBrain.select_at_structural_level(level)
regions_to_plot = AllenBrain.get_selected_regions()
print(f"You selected {len(regions_to_plot)} regions to plot.")

# Partial Least Squares  

The analysis done below is taken from the tutorial written by [Krishnan et al.](https://www.sciencedirect.com/science/article/pii/S1053811910010074).  
Run the 2 cells below to get started.

In [None]:
# Create a PLS object
pls = BraiAn.PLS(group_1, group_2, regions_to_plot, normalization)

# Show the matrix X
pls.X

In [None]:
# Show the matrix Y
pd.get_dummies(pls.y).rename(columns={0: group_2_name, 1: group_1_name})

The two matrices printed above (X and Y) illustrate the data on which the PLS is done.  
- ```X:``` The rows in this matrix are the mice. The columns in the matrix are the regions selected for analysis. The values in the matrix are the **normalized value of marked cells: in that region relative to the whole brain.** 
The normalization methods are either:
  + Density
  + Percentage (on the total number of detected marked cells outside of excluded regions)
  + RelativeDensity
- ```Y:``` The rows in this matrix are the mice. The columns in the matrix are the 2 groups. **A value in this matrix is 1 if the mice belongs to the specified group**.

In brief, PLS analyzes the relationship (correlation) between the columns of ```X``` and ```Y```. In our specific case, there will be 2 important outputs:
- **Salience scores**: Each brain region has a salience score. A high salience scores means that the brain region explains much of the correlation between ```X``` and ```Y```.  
- **Singular values**: These are the eigenvalues of the correlation matrix $R = Y^TX$.

## Random permutations to see whether we can differentiate signal from noise. 
Here, we randomly shuffle the group to which a mouse belongs, and calculate the singular values of the permuted dataset.  
From [Krishnan et al.](https://www.sciencedirect.com/science/article/pii/S1053811910010074):  
> The set of all the (permuted) singular values provides a sampling distribution of the singular values under the null hypothesis and, therefore can be used as a null hypothesis test.

*Note: running the cell below will take a few minutes.*

In [None]:
print(f"Randomly permuting singular values {pls_num_permutations} times...")
s,singular_values = pls.randomly_permute_singular_values(pls_num_permutations)
# Plot distribution of singular values
if plot_distribution_of_singular_values:
    fig = BraiAn.plot_permutation(pls.s[0], singular_values, pls_num_permutations)
    fig.show()

In [None]:
# Calculate p-value = Probability(experiment | H0)
p = (singular_values[:,0] > s[0]).sum() / pls_num_permutations
print("p-value = "+str(p))

## Bootstrap to identify stable salience scores

Here, we use [bootstrapping](https://en.wikipedia.org/wiki/Bootstrapping_(statistics)) (= sampling of the mice in the dataset, with replacement) to get an estimate of which salience scores are stable.

From [Krishnan et al.](https://www.sciencedirect.com/science/article/pii/S1053811910010074):  
> When a vector of saliences is considered generalizable and is kept for further analysis, we need to identify its elements that are stable through resampling. In practice, the stability of an element is evaluated by dividing it by its standard error. [...] To estimate the standard errors, we create bootstrap samples which are obtained by sampling with replacement the observations in and (Efron and Tibshirani, 1986). A salience standard error is then estimated as the standard error of the saliences from a large number of these bootstrap samples (say 1000 or 10000). **The ratios are akin to a Z-score, therefore when they are larger than 2 the corresponding saliences are considered significantly stable.**

*Note: Running the cell below will take a few minutes.*

In [None]:
print(f"Bootstrapping salience scores {pls_num_bootstrap} times...")
u_salience_scores,v_salience_scores = pls.bootstrap_salience_scores(pls_rank, pls_num_bootstrap)

In [None]:
# Plot PLS salience scores
if plot_salience_scores:
    file_title = f"PLS_{group_1.marker}_{normalization}.svg".lower()
    tp, salient_regions = pls.plot_salience_scores(pls_salience_threshold, plots_output_path, file_title,
                                  fig_width=1000, fig_height=2000)

In [None]:
# salient_regions = salient_regions.reset_index()
# salient_regions.columns = ["region", "salience"]
# salient_regions["salience"] = salient_regions["salience"].abs()
# salient_regions = salient_regions.sort_values(by="salience")
# salient_regions.to_csv(os.path.join(data_output_path, "salient_regions.csv"), sep=";", index=False)
# salient_regions

In [None]:
pls_filename = f"PLS_{group_1.marker}_{normalization}_salience_scores.csv".lower()
v_salience_scores = v_salience_scores.rename(columns={0:"salience score"})
BraiAn.save_csv(v_salience_scores, data_output_path, pls_filename, overwrite=True)

# Plotting

In [None]:
fig = BraiAn.plot_groups(normalization, AllenBrain, group_1, group_2,
                            selected_regions=regions_to_plot, use_acronyms=bar_use_acronyms, height=bar_height)

if bar_save_plot:
    plot_filename =    f"pls_barplot_{output_folder}_{normalization}_{group_1.marker}{saved_plot_extension}".lower()
    plot_filepath = os.path.join(plots_output_path, plot_filename)
    match saved_plot_extension.lower():
        case ".html":
            fig.write_html(plot_filepath)
        case _:
            fig.write_image(plot_filepath)
if bar_show_plot:
    fig.show()

In [None]:
groups_cross_correlations = []
for group in (group_1, group_2):
    # min_animals=None because it doesn't matter. PLS already removes every region with NaNs.
    r, p = group.cross_correlation(normalization, regions_to_plot, min_animals=None)
    groups_cross_correlations.append((r, p))

In [None]:
if matrix_save_plot or matrix_show_plot:
    for group, (r,p) in zip((group_1, group_2), groups_cross_correlations):
        title = f"{group.name} Pearson cross correlation matrix (n = {group.n})"
        fig = BraiAn.plot_cross_correlation(r=r, p=p,
                title=title,
                cell_height=matrix_cell_height, min_plot_height=matrix_min_plot_height,
                aspect_ratio=matrix_cell_ratio)
        if matrix_save_plot:
            plot_filename = f"pls_correlation_matrix_filtered_{group.name}_{normalization}_{group.marker}{saved_plot_extension}".lower()
            plot_filepath = os.path.join(plots_output_path, plot_filename)
            match saved_plot_extension.lower():
                case ".html":
                    fig.write_html(plot_filepath)
                case _:
                    fig.write_image(plot_filepath)
        if matrix_show_plot:
            fig.show()

In [None]:
for group, (r,p) in zip((group_1, group_2), groups_cross_correlations):
    group_annotations = dict(
                            subtitle="",
                            **chord_bottom_annotations
                        )
    fig = BraiAn.draw_chord_plot(r=r, p=p, r_cutoff=chord_r_cutoff, p_cutoff=chord_p_cutoff,
                                AllenBrain=AllenBrain,
                                ideograms_a=50,
                                title=f"{group.name} connectomics graph from Pearson correlation (n = {group.n}, |r| >= {chord_r_cutoff}, p <= {chord_p_cutoff})",
                                size=chord_plot_size,
                                no_background=chord_no_background,
                                regions_size=chord_regions_size,
                                regions_font_size=chord_regions_font_size,
                                max_edge_width=chord_max_edge_width,
                                use_weighted_edge_widths=chord_use_weighted_edge_widths,
                                colorscale_edges=chord_use_colorscale_edges,
                                **group_annotations
    )
    if chord_save_plot:
        plot_filename = f"pls_chord_plot_filtered_{group.name}_{normalization}_{group.marker}{saved_plot_extension}".lower()
        plot_filepath = os.path.join(plots_output_path, plot_filename)
        match saved_plot_extension.lower():
            case ".html":
                fig.write_html(plot_filepath)
            case _:
                fig.write_image(plot_filepath)
    if chord_show_plot:
        fig.show()

In [None]:
import importlib
importlib.reload(BraiAn.plot_chord)
importlib.reload(BraiAn)