## Notebook containing code used for manuscript supplementary figure 5

### Note that most paths will need to be changed based on where the files were saved to your local folder

In [None]:
import anndata
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import commot as ct
import spateo as st
import ncem
from ncem.data import get_data_custom, customLoader

import os
import seaborn as sns
import sys
from tqdm import tqdm

from scipy.spatial import KDTree

In [None]:
np.random.seed(888)

In [None]:
%config InlineBackend.print_figure_kwargs={'dpi': 300.0}

In [None]:
# For viewability purposes, process all "COL" elements to "Collagens": 
def replace_col_with_collagens(string):
    # Split the string at the colon (if any)
    parts = string.split(':')
    
    # Split the first part of the string at slashes
    elements = parts[0].split('/')
    
    # Flag to check if we've encountered a "COL" element or a "Collagens" element
    encountered_col = False
    
    # Process each element
    for i, element in enumerate(elements):
        # If the element starts with "COL" or "b_COL", or if it is "Collagens" or "b_Collagens"
        if element.startswith("COL") or element.startswith("b_COL") or element in ["Collagens", "b_Collagens"]:
            # If we've already encountered a "COL" or "Collagens" element, remove this one
            if encountered_col:
                elements[i] = None
            # Otherwise, replace it with "Collagens" or "b_Collagens" as appropriate
            else:
                if element.startswith("b_COL") or element == "b_Collagens":
                    elements[i] = "b_Collagens"
                else:
                    elements[i] = "Collagens"
                encountered_col = True
                
    # Remove None elements and join the rest with slashes
    replaced_part = '/'.join([element for element in elements if element is not None])
    
    # If there's a second part, add it back
    if len(parts) > 1:
        replaced_string = replaced_part + ':' + parts[1]
    else:
        replaced_string = replaced_part
        
    return replaced_string

## Resources used for the CosMx sample can be found: https://www.dropbox.com/scl/fo/z3bvppoq96vg442lma0rs/ACIXLqp-FXjuYQ2ZeAeHFEA?rlkey=84h21aoigdxrpfz9yrbsyepwg&st=55ozincu&dl=0
## Resources used for the MERFISH sample can be found: https://www.dropbox.com/scl/fo/s7mjpdgbk4f2mj1rndooo/AAkL3b4W3JazjGDn3pSKKrk?rlkey=3acd5da9bcl743x0byrrm8jxp&st=9g9b9o47&dl=0 
## Database files used here can be found: https://www.dropbox.com/scl/fo/dcd95so9zhkb8lnjkkxep/ANwmkFeb-sgtS89leHQezlU?rlkey=saiul4j5rr1vt6lwjl4hirmwh&st=brpjqw2c&dl=0

### Make sure to change each file path to the relevant local folder

In [None]:
# Set the Spateo database directory here:
database_dir = "/mnt/d/SCData/CCI_database"

## Load MERFISH brain sample FOVs

In [None]:
# Two FOVs are included- use this to change which is selected (options are 153 or 162)
fov_number = 153

In [None]:
# Replace with wherever this file is stored locally
path_to_cosmx = f"/mnt/d/SCData/Spateo_data/MERFISH_mouse_cortex/MERFISH_mouse_brain_mouse1_fov{fov_number}.h5ad"
# Replace with wherever the L:R database is stored locally
lr_db = pd.read_csv("/mnt/c/Users/danie/Desktop/Github/Github/spateo-release-main/spateo/tools/database/lr_db_mouse.csv", index_col=0)

In [None]:
merfish_fov = anndata.read_h5ad(path_to_cosmx)
merfish_fov.uns["__type"] = "UMI"

### Figure S5l- spatially-resolved cell types plot

In [None]:
spatial_coords = merfish_fov.obsm['spatial']
x_coords = spatial_coords[:, 0]
y_coords = spatial_coords[:, 1]
cell_types = merfish_fov.obs['general_cell_type']

In [None]:
unique_cell_types = np.unique(cell_types)
color_map = {
    'Astro': '#d70000',
    'Endo': '#00fdcf',
    'L23_IT': '#eeb9b9',
    'L45_IT': '#00af8a',
    'L56_NP': '#d38c8f',
    'L5_ET': '#c59f72',
    'L5_IT': '#00d6d5',
    'L6_CT': '#a9001f',
    'L6_IT': '#bfd57c',
    'L45_IT_SSp': '#f46200',
    'L6b': '#d2b75b',
    'Lamp5': '#ad94ec',
    'Micro': '#213400',
    'OPC': '#fb7cff',
    'Oligo': '#91a2ea',
    'PVM': '#ad3b30',
    'Peri': '#734abc',
    'Pvalb': '#602541',
    'SMC': '#e2b392',
    'Sncg': '#bc94d2',
    'Sst': '#1726ff',
    'VLMC': '#8a1323',
    'Vip': '#2f3ea8',
    'striatum': '#ffa500'
}

In [None]:
# Scatter plot
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16

fig, axes = plt.subplots(1, 1, figsize=(6, 5))
fig.suptitle(f'MERFISH mouse brain- FOV {fov_number}', fontsize=20)
    
for cell_type in unique_cell_types:
    idx = cell_types == cell_type
    axes.scatter(x_coords[idx], y_coords[idx], color=color_map[cell_type], label=cell_type, s=5)

axes.set_ylim(axes.get_ylim())  # Sync y-limits with the scatter plot
# Remove plot borders
for spine in axes.spines.values():
    spine.set_visible(False)

# Remove tick marks and labels
axes.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
axes.legend(title='Cell Type', bbox_to_anchor=(1.15, 1), loc='upper left', title_fontsize='12', fontsize='10')

plt.tight_layout(rect=[0, 0, 0.95, 1])  # Adjust the layout to fit everything nicely
plt.show()

In [None]:
# Set to the folders that the inputs (only the targets list for this data) are contained in and that the outputs (model results) will save to:
cci_input_directory = "/mnt/d/SCAnalysis/Spateo_MERFISH_benchmark/CCI_inputs"
cci_output_directory = "/mnt/d/SCAnalysis/Spateo_MERFISH_benchmark/CCI_outputs"
cci_output_id = os.path.join(cci_output_directory, f"fov_{fov_number}_target_genes.csv")
cci_targets_file = os.path.join(cci_input_directory, f"target_genes_slice{fov_number}.txt")

#### Initialize CCI model (can skip if predictions .csv file was created locally or downloaded from the folder)

In [None]:
if fov_number == 153:
    lb = 50.0
    ub = 139.6
else:
    lb = 48.9
    ub = 136.4

In [None]:
# For clarity, this is how the distance bounds are determined
lb = st.tl.find_neighbors.find_bw_for_n_neighbors(
    merfish_fov,
    coords_key="spatial",
    n_anchors=2000,
    target_n_neighbors=9,
    initial_bw=200,
    exclude_self=True
)

In [None]:
ub = st.tl.find_neighbors.find_bw_for_n_neighbors(
    merfish_fov,
    coords_key="spatial",
    n_anchors=2000,
    target_n_neighbors=70,
    initial_bw=200,
    exclude_self=True
)

In [None]:
adata_path = path_to_cosmx
#output_path = "/mnt/d/SCData/Russell_et_al_Slidetags/CCI_modeling_outputs/human_melanoma_multiome_test.csv"
output_path = cci_output_id
target_path = cci_targets_file
cci_dir_path = database_dir
mod_type = "niche"
distr = "poisson"
species = "mouse"
group_key = "general_cell_type"
coords_key = "spatial"
distance_membrane_bound = lb
distance_secreted = ub
minbw = lb
maxbw = ub * 1.5

if not os.path.exists(os.path.dirname(output_path)):
    os.makedirs(os.path.dirname(output_path))

In [None]:
parser, args_list = st.tl.define_spateo_argparse(
    adata_path=adata_path,
    targets_path=target_path,
    cci_dir=cci_dir_path,
    mod_type=mod_type,
    distr=distr,
    species=species,
    group_key=group_key,
    coords_key=coords_key,
    distance_membrane_bound=distance_membrane_bound,
    distance_secreted=distance_secreted,
    minbw=minbw,
    maxbw=maxbw,
    output_path=output_path,
)

In [None]:
import time

t1 = time.time()

swr_model = st.tl.MuSIC(parser, args_list)
swr_model._set_up_model()
swr_model.fit()
swr_model.predict_and_save()

t_last = time.time()

print("Total Time Elapsed:", np.round(t_last - t1, 2), "seconds")
print("-" * 60)

In [None]:
# Note that the predictions.csv file is also provided in the Dropbox

#### Run NCEM model for the comparison (can also skip to the next section- the predictions file is included in the Dropbox folder)

In [None]:
# These are the contents of the "target_genes" txt files, spelled out in the form of a list
if sample_id == 153:
    target_genes = ["Flt1", "Aqp4", "Parm1", "Rorb", "Syt6", "Calb1", "Prdm8", "Rspo1", "Lypd1", "Adamts4", "Vtn", "Lamp5"]
else:
    target_genes = ["Flt1", "Aqp4", "Parm1", "Rorb", "Syt6", "Calb1", "Prdm8", "Rspo1", "Ptpru", "Adamts4", "Vtn", "Lamp5"]

In [None]:
# 6779 is prime, need to randomly drop 1 cell for NCEM- this is saved as a separate file in the MERFISH dropbox
# Replace the file path with wherever this file is stored locally
mouse_brain_ncem = anndata.read_h5ad(f"/mnt/d/SCData/Spateo_data/MERFISH_mouse_cortex/MERFISH_mouse_brain_mouse1_fov{fov_number}_NCEM_processed.h5ad")

In [None]:
# Requirement for initializing interpreter
mouse_brain_ncem.uns["spatial"] = "Hello, world"

In [None]:
# Use the upper distance bound as the distance parameter for NCEM
dist = ub

In [None]:
interpreter = ncem.interpretation.interpreter.InterpreterInteraction()

In [None]:
interpreter.data = customLoader(
    adata=mouse_brain_ncem, cluster='general_cell_type', patient='Batch', library_id='Batch', radius=dist,
)
get_data_custom(interpreter=interpreter)

In [None]:
interpreter.n_eval_nodes_per_graph = 2

In [None]:
interpreter.get_sender_receiver_effects()

In [None]:
# Backsolve to get the design matrix:
img_keys = interpreter.img_keys_all
nodes_idx = interpreter.nodes_idx_all

In [None]:
(target, interactions, _, _, _), y = interpreter._get_np_data(image_keys=img_keys, nodes_idx=nodes_idx)
x_design = np.concatenate([target, interactions], axis=1)
x_design

In [None]:
def ols_fit(x_, y_):
    """beta = (XT * X)^-1 XT y"""
    X = np.matmul(
        np.linalg.pinv(np.matmul(x_.T, x_)),
        x_.T
    )
    return np.array([
        np.matmul(
            X, y_[:, [i]]
        )
        for i in range(y_.shape[1])
    ])

ols = ols_fit(x_=x_design, y_=y)
params = ols.squeeze()

params.shape

In [None]:
reconst = np.matmul(x_design, params.T)
reconst

In [None]:
reconst_df = pd.DataFrame(reconst, index=mouse_brain_ncem.obs_names, columns=target_genes)

In [None]:
# Change to an appropriate location on the local system
save_path = f"/mnt/d/SCAnalysis/Spateo_MERFISH_benchmark/NCEM_predictions_fov{fov_number}.csv"
reconst_df.to_csv(save_path)

### Figure S5m- barplots comparing performance of Spateo vs. NCEM

In [None]:
# Change to the location on the local system where the NCEM predictions were saved
ncem_save_path = f"/mnt/d/SCAnalysis/Spateo_MERFISH_benchmark/NCEM_predictions_fov{fov_number}.csv"
ncem_reconst_df = pd.read_csv(ncem_save_path, index_col=0)

In [None]:
spateo_save_path = os.path.join(cci_output_directory, "predictions.csv")
spateo_reconst_df = pd.read_csv(spateo_save_path, index_col=0)

In [None]:
# In case the previous section was skipped over
if sample_id == 153:
    target_genes = ["Flt1", "Aqp4", "Parm1", "Rorb", "Syt6", "Calb1", "Prdm8", "Rspo1", "Lypd1", "Adamts4", "Vtn", "Lamp5"]
else:
    target_genes = ["Flt1", "Aqp4", "Parm1", "Rorb", "Syt6", "Calb1", "Prdm8", "Rspo1", "Ptpru", "Adamts4", "Vtn", "Lamp5"]

# 6779 is prime, need to randomly drop 1 cell for NCEM- this is saved as a separate file in the MERFISH dropbox
# Replace the file path with wherever this file is stored locally
mouse_brain_ncem = anndata.read_h5ad(f"/mnt/d/SCData/Spateo_data/MERFISH_mouse_cortex/MERFISH_mouse_brain_mouse1_fov{fov_number}_NCEM_processed.h5ad")

#### Bootstrap resampling- can skip over this section as well if these files have already been generated or the result files were downloaded from the resource folder

In [None]:
# Note that for the figure, only the R-squared comparison is included, but this computes additional metrics that can also be compared w/ modifications to code below:
def compute_metrics(y_true, y_pred):
    metrics = {}
    
    rp, _ = pearsonr(y_true, y_pred)
    r, _ = spearmanr(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)

    binary_y_true = (y_true != 0).astype(int)
    binary_y_pred = (y_pred != 0).astype(int)
    intersection = np.sum(binary_y_true * binary_y_pred)
    union = np.sum(np.maximum(binary_y_true, binary_y_pred))
    ji = intersection / union if union != 0 else 1.0

    metrics['Pearson r'] = rp
    metrics['Spearman r'] = r
    metrics['R-squared'] = r2
    metrics['RMSE'] = rmse
    metrics['Jaccard index'] = ji

    return metrics

In [None]:
np.random.seed(888)

In [None]:
# Define the number of bootstrap samples
n_bootstrap_samples = 1000
n_samples_to_pick = 200
confidence_level = 0.95

# Initialize empty DataFrames
total_rs_df = pd.DataFrame()
bootstrap_df = pd.DataFrame()  # To store all bootstrap results

In [None]:
# Bootstrap for the Spateo results
for i, gene in tqdm(enumerate(all_genes), desc="Computing metrics for all genes..."):
    y = merfish_fov[:, gene].X.toarray().reshape(-1)
    spateo_results_target = spateo_reconst_df[gene].values.reshape(-1)

    non_zero_indices = np.nonzero(y)[0]
    y_non_zero = y[non_zero_indices]
    spateo_results_target_non_zero = spateo_results_target[non_zero_indices]
    
    if np.isnan(y).any() or np.isnan(spateo_results_target).any():
        print(f"Array contains NaN values for gene {gene}")
        continue
    
    # Bootstrap resampling
    bootstrap_metrics = []
    for _ in range(n_bootstrap_samples):
        indices = np.random.choice(len(y_non_zero), size=n_samples_to_pick, replace=False)
        y_resampled = y_non_zero[indices]
        spateo_results_resampled = spateo_results_target_non_zero[indices]
        metrics = compute_metrics(y_resampled, spateo_results_resampled)
        bootstrap_metrics.append(metrics)
    
    # Convert bootstrap metrics to DataFrame
    spateo_bootstrap_df = pd.DataFrame(bootstrap_metrics)
    spateo_bootstrap_df.columns = [f"Spateo {col} {gene}" for col in spateo_bootstrap_df.columns]
    bootstrap_df = pd.concat([bootstrap_df, spateo_bootstrap_df], axis=1)
    
    # Compute confidence intervals
    ci_lower = spateo_bootstrap_df.quantile((1 - confidence_level) / 2)
    ci_upper = spateo_bootstrap_df.quantile(1 - (1 - confidence_level) / 2)

    # Original metrics for the gene
    original_metrics = compute_metrics(y_non_zero, spateo_results_target_non_zero)
    
    # Append metric and confidence intervals to DataFrame
    new_rs = pd.DataFrame([{'Gene names': gene, 'Model type': "Spateo", 'R-squared': original_metrics['R-squared'],
                            'CI Lower': ci_lower[f'Spateo R-squared {gene}'], 'CI Upper': ci_upper[f'Spateo R-squared {gene}']}])
    total_rs_df = pd.concat([total_rs_df, new_rs])

In [None]:
# Bootstrap for the NCEM results
for i, gene in tqdm(enumerate(all_genes), desc="Computing NCEM metrics for all genes..."):
    y = mouse_brain_ncem[:, gene].X.toarray().reshape(-1)
    NCEM_results_target = ncem_reconst_df[gene].values.reshape(-1)

    non_zero_indices = np.nonzero(y)[0]
    y_non_zero = y[non_zero_indices]
    NCEM_results_target_non_zero = NCEM_results_target[non_zero_indices]
    
    if np.isnan(y).any() or np.isnan(NCEM_results_target).any():
        print(f"Array contains NaN values for gene {gene}")
        continue
    
    # Bootstrap resampling
    bootstrap_metrics = []
    for _ in range(n_bootstrap_samples):
        indices = np.random.choice(len(y_non_zero), size=n_samples_to_pick, replace=False)
        y_resampled = y_non_zero[indices]
        NCEM_resampled = NCEM_results_target_non_zero[indices]
        metrics = compute_metrics(y_resampled, NCEM_resampled)
        bootstrap_metrics.append(metrics)
    
    # Convert bootstrap metrics to DataFrame
    ncem_bootstrap_df = pd.DataFrame(bootstrap_metrics)
    ncem_bootstrap_df.columns = [f"NCEM {col} {gene}" for col in ncem_bootstrap_df.columns]
    bootstrap_df = pd.concat([bootstrap_df, ncem_bootstrap_df], axis=1)
    
    # Compute confidence intervals
    ci_lower = ncem_bootstrap_df.quantile((1 - confidence_level) / 2)
    ci_upper = ncem_bootstrap_df.quantile(1 - (1 - confidence_level) / 2)

    # Original metrics for the gene
    original_metrics = compute_metrics(y_non_zero, NCEM_results_target_non_zero)
    
    # Append metrics and confidence intervals to DataFrames
    new_rs = pd.DataFrame([{'Gene names': gene, 'Model type': "NCEM", 'R-squared': original_metrics['R-squared'],
                            'CI Lower': ci_lower[f'NCEM R-squared {gene}'], 'CI Upper': ci_upper[f'NCEM R-squared {gene}']}])
    total_rs_df = pd.concat([total_rs_df, new_rs])

In [None]:
# Change the path to an appropriate local directory
save_folder = f"/mnt/d/SCAnalysis/Spateo_MERFISH_benchmark"
total_rs_df.index = np.arange(len(total_rs_df))
total_rs_df.to_csv(os.path.join(save_folder, f"fov{fov_number}_benchmark_R-sq_results.csv"))

#### Barplot

In [None]:
def fisher_z_test_with_ci(r1, ci_lower1, ci_upper1, r2, ci_lower2, ci_upper2):
    z1 = 0.5 * np.log((1 + r1) / (1 - r1))
    z2 = 0.5 * np.log((1 + r2) / (1 - r2))
    var_z1 = ((ci_upper1 - ci_lower1) / (2 * 1.96)) ** 2
    var_z2 = ((ci_upper2 - ci_lower2) / (2 * 1.96)) ** 2
    z_diff = (z1 - z2) / np.sqrt(var_z1 + var_z2)
    p_value = 2 * (1 - norm.cdf(abs(z_diff)))
    return p_value

In [None]:
# Change the path to the directory these were saved to
save_folder = f"/mnt/d/SCAnalysis/Spateo_MERFISH_benchmark"
metric_results = pd.read_csv(os.path.join(save_folder, f"fov{fov_number}_benchmark_R-sq_results.csv"), index_col=0)

In [None]:
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests

In [None]:
results = []

# Perform the test for each gene
for gene in metric_results["Gene names"].unique():
    spateo_row = metric_results[(metric_results["Gene names"] == gene) & (metric_results["Model type"] == "Spateo")].iloc[0]
    spateo_r = spateo_row["R-squared"]
    spateo_ci_lower = spateo_row["CI Lower"]
    spateo_ci_upper = spateo_row["CI Upper"]
    
    for index, row in metric_results[(metric_results["Gene names"] == gene) & (metric_results["Model type"] != "Spateo")].iterrows():
        model = row["Model type"]
        other_r = row["R-squared"]
        other_ci_lower = row["CI Lower"]
        other_ci_upper = row["CI Upper"]
        p_value = fisher_z_test_with_ci(spateo_r, spateo_ci_lower, spateo_ci_upper, other_r, other_ci_lower, other_ci_upper)
        results.append([gene, model, p_value])

# Convert results to a dataframe
results_df = pd.DataFrame(results, columns=["Gene", "Model", "p-value"])

# Adjust p-values for multiple comparisons using Benjamini-Hochberg correction
results_df["q-value"] = results_df["p-value"] * len(results_df) / (np.arange(1, len(results_df) + 1))
results_df

In [None]:
pastel_colors = sns.color_palette("pastel")
# Convert the colors to hex codes
colors_hex = [sns.color_palette("pastel").as_hex() for color in pastel_colors][0]
colors_hex[0], colors_hex[1] = colors_hex[1], colors_hex[0]
colors_hex

In [None]:
plt.figure(figsize=(12, 5))

# Use seaborn's barplot function with hue parameter for condition
ax = sns.barplot(data=metric_results, x="Gene names", y=col, hue="Model type", palette=colors_hex, edgecolor='black', dodge=True, ci=None)

# Add error bars
bar_width = 0.8 / len(metric_results["Model type"].unique())  # Adjusting for the number of hue categories
for i, (gene, model) in enumerate(zip(metric_results["Gene names"], metric_results["Model type"])):
    y = metric_results[(metric_results["Gene names"] == gene) & (metric_results["Model type"] == model)][col].values[0]
    ci_lower = metric_results[(metric_results["Gene names"] == gene) & (metric_results["Model type"] == model)]["CI Lower"].values[0]
    ci_upper = metric_results[(metric_results["Gene names"] == gene) & (metric_results["Model type"] == model)]["CI Upper"].values[0]
    
    # Calculate x position for each bar
    x = np.where(metric_results["Gene names"].unique() == gene)[0][0]
    x = x - bar_width / 2 * (len(metric_results["Model type"].unique()) - 1) + bar_width * list(metric_results["Model type"].unique()).index(model)
    
    # Plot the error bar
    ax.errorbar(x, y, yerr=[[y - ci_lower], [ci_upper - y]], fmt='none', c='black', elinewidth=3, capsize=4.0, capthick=2.5)

# Add significance annotations
# Initialize a dictionary to track the number of annotations for each gene
annotation_count = {gene: 0 for gene in metric_results["Gene names"].unique()}

for _, row in results_df.iterrows():
    gene = row["Gene"]
    model = row["Model"]
    p_value = row["p-value"]
    q_value = row["q-value"]

    # Determine the asterisk symbol based on q-value
    if q_value < 0.00005:
        symbol = "****"
    elif q_value < 0.0005:
        symbol = "***"
    elif q_value < 0.005:
        symbol = "**"
    elif q_value < 0.05:
        symbol = "*"
    else:
        continue  # Skip if not significant
    
    # Find the y-value for the annotation
    y = metric_results[(metric_results["Gene names"] == gene) & (metric_results["Model type"] == model)][col].values[0]
    
    # Calculate x position for each bar
    x = np.where(metric_results["Gene names"].unique() == gene)[0][0]
    x = x - bar_width / 2 * (len(metric_results["Model type"].unique()) - 1) + bar_width * list(metric_results["Model type"].unique()).index(model)
    
    # Find the y-value for the Spateo model
    y_spateo = metric_results[(metric_results["Gene names"] == gene) & (metric_results["Model type"] == "Spateo")][col].values[0]
    x_spateo = np.where(metric_results["Gene names"].unique() == gene)[0][0]
    x_spateo = x_spateo - bar_width / 2 * (len(metric_results["Model type"].unique()) - 1) + bar_width * list(metric_results["Model type"].unique()).index("Spateo")
    
    # Calculate the vertical position for the annotation
    annotation_offset = annotation_count[gene] * 0.15 + 0.1
    y_max = max(y, y_spateo) + annotation_offset
    
    # Plot the line between the Spateo bar and the other model bar
    ax.plot([x, x, x_spateo, x_spateo], [y_max, y_max + 0.02, y_max + 0.02, y_max], lw=1.5, c='black')
    
    # Add the asterisk annotation above the line
    ax.text((x + x_spateo) / 2, y_max + 0.02, symbol, ha='center', va='bottom', color='black', fontsize=18)
    
    # Update the annotation count for the gene
    annotation_count[gene] += 1

# For better readability, place the legend outside of the plot
plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left', fontsize=28)

plt.ylabel(r'Variance explained ($R^2$)', fontsize=24)
plt.xlabel('Target gene', fontsize=36)
plt.xticks(fontsize=30, rotation=90)
plt.yticks(fontsize=28)
plt.ylim(0.1, 1.2)

plt.show()

## Load FOV 4 of the CosMx lung cancer sample

In [None]:
# Replace with wherever this file is stored locally
path_to_cosmx = "/mnt/d/SCData/Spateo_data/CosMx/fov_4.h5ad"
# Replace with wherever the L:R database is stored locally
lr_db = pd.read_csv("/mnt/c/Users/danie/Desktop/Github/Github/spateo-release-main/spateo/tools/database/lr_db_mouse.csv", index_col=0)

In [None]:
lung_fov4 = anndata.read_h5ad(path_to_cosmx)
lung_fov4.uns["__type"] = "UMI"

### Figure S5o- spatially-resolved cell types plot

In [None]:
st.pl.geo(
    lung_fov4, 
    color=["predicted_celltypes"], 
    show_legend='upper left', 
    save_show_or_return='show', 
    figsize=(5, 3), 
    color_key=lung_fov4.uns["celltype_colors"]
)

### Benchmark w/ the COMMOT CCI array

In [None]:
lb = 120.0
ub = 336.1

#### Run Spateo CCI model (can skip if predictions .csv file was created locally or downloaded from the folder)

In [None]:
# Set to the folders that the inputs (ligands list, receptors list, targets list) are contained in and that the outputs (model results) will save to:
cci_input_directory = "/mnt/d/SCAnalysis/Spateo_CosMx_benchmark/CCI_inputs"
cci_output_directory = "/mnt/d/SCAnalysis/Spateo_CosMx_benchmark/CCI_outputs"
cci_output_id = os.path.join(cci_output_directory, "lung_fov4_target_genes.csv")
cci_ligands_file = os.path.join(cci_input_directory, "ligands.txt")
cci_receptors_file = os.path.join(cci_input_directory, "receptors.txt")
cci_targets_file = os.path.join(cci_input_directory, "targets.txt")

In [None]:
# For clarity, this is how the distance bounds are determined
lb = st.tl.find_neighbors.find_bw_for_n_neighbors(
    lung_fov4,
    coords_key="spatial",
    target_n_neighbors=9,
    initial_bw=100,
    exclude_self=True
)

In [None]:
ub = st.tl.find_neighbors.find_bw_for_n_neighbors(
    lung_fov4,
    coords_key="spatial",
    target_n_neighbors=70,
    initial_bw=100,
    exclude_self=True
)

In [None]:
# Define inputs:
adata_path = path_to_cosmx
output_path = cci_output_directory
# Use the ligand/receptor paths from the model fitting:
ligand_path = cci_ligands_file
receptor_path = cci_receptors_file
target_path = cci_targets_file
cci_dir_path = database_dir
mod_type = "lr"
species = "human"
distr = "poisson"

# Key storing cell type information
group_key = "predicted_celltypes"

# Key storing your spatial coordinates
coords_key = "spatial"
distance_membrane_bound = lb
distance_secreted = ub
minbw = lb * 1.5
maxbw = ub

if not os.path.exists(os.path.dirname(output_path)):
    os.makedirs(os.path.dirname(output_path))

In [None]:
parser, args_list = st.tl.define_spateo_argparse(
    adata_path=adata_path,
    custom_lig_path=ligand_path,
    custom_rec_path=receptor_path,
    targets_path=target_path,
    cci_dir=cci_dir_path,
    mod_type=mod_type,
    distr=distr,
    species=species,
    group_key=group_key,
    coords_key=coords_key,
    distance_membrane_bound=distance_membrane_bound,
    distance_secreted=distance_secreted,
    minbw=minbw,
    maxbw=maxbw,
    output_path=output_path,
)

In [None]:
import time

t1 = time.time()

swr_model = st.tl.MuSIC(parser, args_list)
swr_model._set_up_model()
swr_model.fit()
swr_model.predict_and_save()

t_last = time.time()

print("Total Time Elapsed:", np.round(t_last - t1, 2), "seconds")
print("-" * 60)

In [None]:
# Note that the predictions.csv file is also provided in the Dropbox

#### Run COMMOT for the comparison (can skip if the AnnData object was already created locally or downloaded from the folder)

In [None]:
np.random.seed(42)

In [None]:
# Processing for secreted signaling (will add fields to the AnnData object)
df_cellchat = ct.pp.ligand_receptor_database(species='human', signaling_type='Secreted Signaling', database='CellChat')
df_cellchat_filtered = ct.pp.filter_lr_database(df_cellchat, lung_fov4, min_cell_pct=0.05)

ct.tl.spatial_communication(
    lung_fov4,
    database_name='cellchat', 
    df_ligrec=df_cellchat_filtered, 
    dis_thr=ub, 
    heteromeric=True, 
    pathway_sum=True
)

In [None]:
# Processing for ECM signaling
df_cellchat = ct.pp.ligand_receptor_database(species='human', signaling_type='ECM-Receptor', database='CellChat')
df_cellchat_filtered = ct.pp.filter_lr_database(df_cellchat, lung_fov4, min_cell_pct=0.05)

# Both models operate w/ the assumption that ECM components diffuse about as far as other extracellular factors
ct.tl.spatial_communication(
    lung_fov4,
    database_name='cellchat', 
    df_ligrec=df_cellchat_filtered, 
    dis_thr=ub, 
    heteromeric=True, 
    pathway_sum=True
)

In [None]:
# Processing for membrane-bound signaling
df_cellchat = ct.pp.ligand_receptor_database(species='human', signaling_type='Cell-Cell Contact', database='CellChat')
df_cellchat_filtered = ct.pp.filter_lr_database(df_cellchat, lung_fov4, min_cell_pct=0.05)

ct.tl.spatial_communication(
    lung5_fov4,
    database_name='cellchat', 
    df_ligrec=df_cellchat_filtered, 
    dis_thr=lb, 
    heteromeric=True, 
    pathway_sum=True
)

In [None]:
# Save AnnData object with COMMOT info- this will also be uploaded to the Dropbox
path_to_cosmx_commot = "/mnt/d/SCData/Spateo_data/CosMx/fov_4_COMMOT.h5ad"
lung5_fov4.write_h5ad(path_to_cosmx_commot)

In [None]:
# Compute signal received for each cell predicted by COMMOT:
commot_signal_received = pd.DataFrame(0, columns=lung5_fov4.obsp.keys(), index=lung5_fov4.obs_names)
for key in lung5_fov4.obsp.keys():
    sig_array = lung5_fov4.obsp[key]
    test = np.array(sig_array.sum(axis=0)).reshape(-1)
    commot_signal_received.loc[:, key] = test

In [None]:
save_path = "/mnt/d/SCData/Spateo_data/CosMx/fov_4_COMMOT_signal_received.csv"
commot_signal_received.to_csv(save_path)

### Figure S5p- comparison of COMMOT signal to Spateo signal

In [None]:
path_to_commot_signal_received = "/mnt/d/SCData/Spateo_data/CosMx/fov_4_COMMOT_signal_received.csv"
commot_signal_received = pd.read_csv(path_to_commot_signal_received, index_col=0)
lung5_fov4_spateo = anndata.read_h5ad(path_to_cosmx)

In [None]:
# Path to Spateo model design matrix
spateo_dm_path = "/mnt/d/SCData/Spateo_data/CosMx/design_matrix_full.csv"
spateo_dm = pd.read_csv(spateo_dm_path, index_col=0)

In [None]:
# We define the presence or absence of a received signal in each cell using the Jaccard index, applied to both the Spateo signal array and the COMMOT signal array
def jaccard(x, y):
    """Compute the Jaccard index between two binary vectors."""
    from scipy.spatial.distance import cdist
    
    # Ensure the input vectors are boolean
    x = x.astype(bool)
    y = y.astype(bool)
    
    # Compute the Jaccard distance using cdist function
    jaccard_distance = cdist(x.values.reshape(1, -1), y.values.reshape(1, -1), metric='jaccard')
    
    # Convert Jaccard distance to Jaccard index
    jaccard_index = 1 - jaccard_distance[0][0]
    
    return jaccard_index

In [None]:
# This is to match the interactions between the Spateo array and the COMMOT array
def match_columns(commot_columns, design_columns):
    matched_columns = {}
    for col in commot_columns:
        # Only consider columns with three dashes
        if col.count("-") != 3:
            continue

        # Extracting the left and right parts of the commot column
        _, _, prefix, suffix = col.split("-")
        expected_col_name = f"{prefix}:{suffix}"
        
        # Checking if the expected column name exists in the design columns
        if expected_col_name in design_columns:
            matched_columns[col] = expected_col_name
    return matched_columns

def compute_jaccard(matched_columns, commot_df, design_df):
    output = []
    for commot_col, design_col in matched_columns.items():
        commot_data = commot_df[commot_col]
        design_data = design_df[design_col]
        
        # Computing the Jaccard index
        jaccard_index = jaccard(commot_data > 0, design_data > 0)
        output.append((commot_col, design_col, jaccard_index))
    
    return output

In [None]:
matched_columns = match_columns(
    commot_signal_received.columns,
    spateo_dm.columns
)

In [None]:
jaccard_indices = compute_jaccard(
    matched_columns,
    commot_signal_received,
    spateo_dm
)

In [None]:
jaccard_df = pd.DataFrame(
    jaccard_indices,
    columns=['Commot Column', 'Design Column', 'Jaccard Index']
)
jaccard_df

In [None]:
# Number of "active signals" for each cell:
commot_features = jaccard_df["Commot Column"]
spateo_features = jaccard_df["Design Column"]

commot_sub = commot_signal_received[commot_features]
spateo_dm_sub = spateo_dm[spateo_features]

In [None]:
# Number of "active signals" for each cell:
commot_sub_nz = commot_sub.applymap(lambda x: 1 if x != 0 else 0)
spateo_dm_sub_nz = spateo_dm_sub.applymap(lambda x: 1 if x != 0 else 0)

active_signals_commot = commot_sub_nz.sum(axis=1)
active_signals_spateo = spateo_dm_sub_nz.sum(axis=1)

In [None]:
from scipy.stats import pearsonr, spearmanr
rp_nonzero, _ = pearsonr(active_signals_commot, active_signals_spateo)
r_nonzero, _ = spearmanr(active_signals_commot, active_signals_spateo)

# Plot the scatter plot for nonzero y values
plt.scatter(active_signals_commot, active_signals_spateo, s=50, facecolors='darkorange', edgecolors='black', linewidths=0.75)

# Set the title and axis labels
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title(f"Active signals for COMMOT and Spateo\nSpearman r = {r_nonzero:.3f}, Pearson r = {rp_nonzero:.3f}", fontsize=16)
plt.xlabel("Predicted signals per cell- COMMOT", fontsize=16)
plt.ylabel("Predicted signals per cell- Spateo", fontsize=16)

# Show the plot
plt.show()

### Figure S5q- Spearman comparison

In [None]:
# Find the local path where target genes were expressed
cci_input_directory = "/mnt/d/SCAnalysis/Spateo_CosMx_benchmark/CCI_inputs"
cci_targets_file = os.path.join(cci_input_directory, "targets.txt")

In [None]:
with open(cci_targets_file, "r") as file:
    lines = file.readlines()

targets = [line.strip() for line in lines]
targets

#### Use COMMOT signal to predict gene expression (skip to the next section if the Spearman correlations file was already saved)

In [None]:
lung5_fov4_spateo = anndata.read_h5ad(path_to_cosmx)

In [None]:
path_to_commot_signal_received = "/mnt/d/SCData/Spateo_data/CosMx/fov_4_COMMOT_signal_received.csv"
commot_signal_received = pd.read_csv(path_to_commot_signal_received, index_col=0)

In [None]:
adata_targets = lung5_fov4_spateo[:, targets].copy()

In [None]:
targets_df = pd.DataFrame(adata_targets.X.toarray(), columns=targets, index=adata_targets.obs_names)
targets_df

In [None]:
# Iteratively perform Poisson regression on each column of AnnData object
models = {}
pearson_correlations_COMMOT = {}
spearman_correlations_COMMOT = {}
pearson_correlations_nz_subset_COMMOT = {}
spearman_correlations_nz_subset_COMMOT = {}
predictions = pd.DataFrame(0, columns=targets, index=adata_targets.obs_names)
not_modeled = []

for col in targets_df.columns:
    print(f"Performing Poisson regression on {col}")
    y = targets_df[col].values
    nonzero_names = targets_df[col][targets_df[col] != 0].index.tolist()
    y_nz = targets_df.loc[nonzero_names, col].values

    X = commot_signal_received
    try:
        model = sm.GLM(y, X, family=sm.families.Poisson()).fit()
        models[col] = model
        y_pred = model.predict(X).values
        predictions[col] = y_pred
        y_pred_nz = predictions.loc[nonzero_names, col].values

        rp, _ = stats.pearsonr(y, y_pred)
        rs, _ = stats.spearmanr(y, y_pred)
        pearson_correlations_COMMOT[col] = rp
        spearman_correlations_COMMOT[col] = rs
    
        print(f"Pearson correlation coefficient for {col}: {rp}")
        print(f"Spearman correlation coefficient for {col}: {rs}")

        rp, _ = stats.pearsonr(y_nz, y_pred_nz)
        rs, _ = stats.spearmanr(y_nz, y_pred_nz)
        pearson_correlations_nz_subset_COMMOT[col] = rp
        spearman_correlations_nz_subset_COMMOT[col] = rs

        print(f"Pearson correlation coefficient for nonzero {col}: {rp}")
        print(f"Spearman correlation coefficient for nonzero {col}: {rs}")
    except:
        not_modeled.append(col)

In [None]:
# Save results- for the figure panel, only the Spearman correlation is shown, so save this file
save_dir = "/mnt/d/SCData/Spateo_data/CosMx"
pd.DataFrame.from_dict(spearman_correlations_COMMOT, orient='index', columns=['spearman']).to_csv(os.path.join(save_dir, "spearman_correlations.csv"))
# Save list of not-modeled genes:
with open(os.path.join(save_dir, "COMMOT_not_modeled.txt"), "w") as file:
    for g in not_modeled:
        file.write(f"{g}\n")

In [None]:
# Also save the models
model_dir = "/mnt/d/SCData/Spateo_data/CosMx/COMMOT_models"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

for model_name, model in models.items():
    with open(os.path.join(save_path, "models", f"{model_name}.pkl"), "wb") as file:
        pickle.dump(model, file)

#### Plot

In [None]:
lung5_fov4_spateo = anndata.read_h5ad(path_to_cosmx)
adata_targets = lung5_fov4_spateo[:, targets].copy()

In [None]:
targets_df = pd.DataFrame(adata_targets.X.toarray(), columns=targets, index=adata_targets.obs_names)
targets_df

In [None]:
# Load Spateo predictions to compute metrics for those as well:
cci_output_directory = "/mnt/d/SCAnalysis/Spateo_CosMx_benchmark/CCI_outputs"
spateo_save_path = os.path.join(cci_output_directory, "predictions.csv")
spateo_reconst_df = pd.read_csv(spateo_save_path, index_col=0)

In [None]:
save_dir = "/mnt/d/SCData/Spateo_data/CosMx"
commot_correlations_path = os.path.join(save_dir, "spearman_correlations.csv")
spearman_correlations_COMMOT = pd.read_csv(commot_correlations_path, index_col=0)

In [None]:
# Model fitting may have errored for some of these genes for the COMMOT model:
save_path = "/mnt/d/SCData/Spateo_data/CosMx/fov_4_COMMOT_fit_results.csv"

with open(save_dir, "COMMOT_not_modeled.txt")) as file:
    content = file.read()
    # Assuming each item is on a separate line
    not_modeled = content.splitlines()

not_modeled

In [None]:
pearson_correlations_spateo = {}
spearman_correlations_spateo = {}
pearson_correlations_spateo_nz = {}
spearman_correlations_spateo_nz = {}

for col in spateo_reconst_df.columns:
    # Load this prediction separately:
    if col not in not_modeled:
        y = targets_df[col].values.reshape(-1)
        y_pred = spateo_reconst_df[col].values.reshape(-1)

        nonzero_names = targets_df[col][targets_df[col] != 0].index.tolist()
        y_nz = targets_df.loc[nonzero_names, col].values
        y_pred_nz = spateo_reconst_df.loc[nonzero_names, col].values

        rp, _ = stats.pearsonr(y, y_pred)
        rs, _ = stats.spearmanr(y, y_pred)
        pearson_correlations_spateo[col] = rp
        spearman_correlations_spateo[col] = rs
    
        print(f"Pearson correlation coefficient for {col}: {rp}")
        print(f"Spearman correlation coefficient for {col}: {rs}")

        rp, _ = stats.pearsonr(y_nz, y_pred_nz)
        rs, _ = stats.spearmanr(y_nz, y_pred_nz)
        pearson_correlations_spateo_nz[col] = rp
        spearman_correlations_spateo_nz[col] = rs
    
        print(f"Pearson correlation coefficient for {col}, nonzero subset: {rp}")
        print(f"Spearman correlation coefficient for {col}, nonzero subset: {rs}")

In [None]:
# Comparative barplot:
# Create dataframe
spearman_df = pd.DataFrame({'Labels': list(spearman_correlations_COMMOT.index), 
                            'COMMOT-derived': spearman_correlations_COMMOT.values.reshape(-1), 
                            'Spateo': list(spearman_correlations_spateo.values())}).melt('Labels', var_name='Model', value_name='Correlation')

In [None]:
pastel_colors = sns.color_palette("pastel")
pastel_colors[0]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(30, 8))
sns.barplot(x='Labels', y='Correlation', hue='Model', data=spearman_df, palette=colors, edgecolor='black')
ax.set_title('Spearman correlations for modeled genes', fontsize=48)
ax.set_xlabel('Genes', fontsize=36)
ax.set_ylabel(r'Spearman ${r}$', fontsize=36)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=32)  # Rotate x-axis labels
ax.set_ylim(0, 1)
# Get current y-tick labels and convert them to string with desired format
y_tick_labels = [f'{label:.2f}' for label in ax.get_yticks()]
# Set new y-tick labels
ax.set_yticklabels(y_tick_labels, fontsize=32)

plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=30)

plt.tight_layout()
plt.show()

### Figure S5r, s- examples of predicted effects for specific "signaling families"

In [None]:
lung5_fov4_spateo = anndata.read_h5ad(path_to_cosmx)

In [None]:
model_dir = "/mnt/d/SCData/Spateo_data/CosMx/COMMOT_models"

In [None]:
target_gene = "KRT19"

#### Plots for the COMMOT models

In [None]:
with open(os.path.join(model_dir, f"{target_gene}.pkl"), "rb") as file:
    model = pickle.load(file)

In [None]:
sm_coeffs_data = model.summary().tables[1].data
sm_coeffs_df = pd.DataFrame(sm_coeffs_data[1:], columns=sm_coeffs_data[0])
sm_coeffs_df = sm_coeffs_df.set_index(sm_coeffs_df.columns[0])
sm_coeffs_df = sm_coeffs_df.astype(float)
sm_coeffs_df

In [None]:
if target_gene == "KRT19":
    interactions = ["COL6A3-ITGA3_ITGB1", "COL4A2-ITGA2_ITGB1", "COL4A2-ITGA3_ITGB1", "COL6A1-ITGA2_ITGB1", "COL6A1-ITGA3_ITGB1", "COL9A2-ITGA3_ITGB1", "COL9A2-ITGA2_ITGB1"]
elif target_gene == "DDR1":
    interactions = [col for col in sm_coeffs_df.index if "WNT5A" in col and "FZD" in col]

matching_columns = [col for col in sm_coeffs_df.index if any(interaction in col for interaction in interactions)]
filtered_df = sm_coeffs_df.loc[matching_columns]
filtered_df

In [None]:
filtered_df.index = [label.replace('commot-cellchat-', '') for label in filtered_df.index]
filtered_df = filtered_df.sort_values('coef', ascending=False)
filtered_df['Interaction'] = filtered_df.index
filtered_df

In [None]:
filtered_df['coef'] = pd.to_numeric(filtered_df['coef'], errors='coerce')
filtered_df = filtered_df.sort_values('coef', ascending=False)

In [None]:
import matplotlib.colors as mcolors
# Use the seismic colormap
colormap = plt.get_cmap('seismic')

# Determine the colors based on the coefficients
norm = mcolors.Normalize(vmin=-1, vmax=1)
colors = [colormap(norm(value)) for value in filtered_df['coef']]

fig, ax = plt.subplots(figsize=(3.5, 2))
sns.barplot(x='Interaction', y='coef', data=filtered_df, ax=ax, palette=colors, edgecolor='black')
ax.set_title(f'COMMOT-derived model \n predicted effects on {target_gene}', fontsize=14)
ax.set_xlabel('L:R interactions', fontsize=18)
ax.set_ylabel('Effect size', fontsize=18)
plt.xticks(rotation=90, fontsize=14)
plt.yticks(fontsize=10)
plt.show()

#### Plots for the Spateo models

In [None]:
target_cells = np.where(lung5_fov4_spateo[:, target_gene].X.toarray() > 0)[0]
adata_target = lung5_fov4_spateo[target_cells].copy()

In [None]:
cci_output_directory = "/mnt/d/SCAnalysis/Spateo_CosMx_benchmark/CCI_outputs"
# If this directory/file do not exist (indicating during model training a different path was specified, change this below):
cci_output_id = os.path.join(cci_output_directory, f"lung_fov4_target_genes_{target_gene}.csv")
target_coeffs = pd.read_csv(cci_output_id, index_col=0)
target_coeffs = target_coeffs[[c for c in target_coeffs.columns if "b_" in c]]
target_coeffs

In [None]:
target_coeffs = target_coeffs.loc[target_cells]
target_coeffs

In [None]:
if target_gene == "DDR1":
    target_coeffs_sub = target_coeffs.loc[:, [col for col in target_coeffs.columns if ((("ITGB" in col) and ("COL" in col)) or (("ITGB" in col) and ("FN1" in col)) or (("CD44" in col) and ("COL" in col)) or (("CD44" in col) and ("FN1" in col)))]]
    target_coeffs_sub.columns = [replace_col_with_collagens(c) for c in target_coeffs_sub.columns]
    means = target_coeffs_sub.apply(lambda x: x[x > 0].mean())
    means = pd.DataFrame(means, columns=["coeff"])
    means["Interaction"] = [idx.replace("b_", "") for idx in means.index]
    means = means.sort_values('coeff', ascending=False)

    import matplotlib.colors as mcolors
    # Use the seismic colormap
    colormap = plt.get_cmap('seismic')
    
    # Determine the colors based on the coefficients
    norm = mcolors.Normalize(vmin=-means['coeff'].max(), vmax=means['coeff'].max())
    colors = [colormap(norm(value)) for value in means['coeff']]
    
    fig, ax = plt.subplots(figsize=(3.5, 2))
    sns.barplot(x='Interaction', y='coeff', data=means, ax=ax, palette=colors, edgecolor='black')
    ax.set_title(f'Spateo model predicted \n effects on {target_gene}- ECM', fontsize=18)
    ax.set_xlabel('L:R interactions', fontsize=14)
    ax.set_ylabel('Normalized \n mean effect size', fontsize=14)
    plt.xticks(rotation=90, fontsize=14)
    plt.yticks(fontsize=10)
    plt.show()
elif target_gene == "KRT19":
    target_coeffs_sub = target_coeffs.loc[:, [col for col in target_coeffs.columns if "WNT5A" in col and "FZD" in col]]
    means = target_coeffs_sub.apply(lambda x: x[x > 0].mean())
    means = pd.DataFrame(means, columns=["coeff"])
    means["Interaction"] = [idx.replace("b_", "") for idx in means.index]
    means = means.sort_values('coeff', ascending=False)

    import matplotlib.colors as mcolors
    # Use the seismic colormap
    colormap = plt.get_cmap('seismic')
    
    # Determine the colors based on the coefficients
    norm = mcolors.Normalize(vmin=-means['coeff'].max(), vmax=means['coeff'].max())
    colors = [colormap(norm(value)) for value in means['coeff']]
    
    fig, ax = plt.subplots(figsize=(3.5, 2))
    sns.barplot(x='Interaction', y='coeff', data=means, ax=ax, palette=colors, edgecolor='black')
    ax.set_title(f'Spateo model predicted \n effects on {target_gene}- WNT', fontsize=18)
    ax.set_xlabel('L:R interactions', fontsize=14)
    ax.set_ylabel('Normalized \n mean effect size', fontsize=14)
    plt.xticks(rotation=90, fontsize=14)
    plt.yticks(fontsize=10)
    plt.show()