## Notebook containing code used for manuscript figure 6

In [None]:
import anndata
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import dynamo as dyn
import spateo as st
import os
import seaborn as sns
import sys
from tqdm import tqdm

from scipy.spatial import KDTree

In [None]:
%config InlineBackend.print_figure_kwargs={'dpi': 300.0}

## Resources used here can be found: https://www.dropbox.com/scl/fo/7wqqasdedsz4vocytbfl1/AO9H5ArH1ArQvwL9hy3CUDg?rlkey=mr77qbw1apxzdm5mlt7wf7f0j&st=qz9yk66j&dl=0
## Database files used here can be found: https://www.dropbox.com/scl/fo/dcd95so9zhkb8lnjkkxep/ANwmkFeb-sgtS89leHQezlU?rlkey=saiul4j5rr1vt6lwjl4hirmwh&st=brpjqw2c&dl=0

### Make sure to change each file path to the relevant local folder

### Load AnnData for ZLI flanking subset and diencephalic ring subset

In [None]:
# Replace with wherever the object is stored locally
path_to_zli = "/mnt/d/SCData/Spateo_data/Spateo_E11.5_embryo_ZLI/E11.5_ZLI_final.h5ad"
path_to_dien_ring = "/mnt/d/SCData/Spateo_data/Spateo_E11.5_embryo_ZLI/E11.5_diencephalic_ring_final.h5ad"
# Replace with wherever the L:R database is stored locally
lr_db = pd.read_csv("/mnt/c/Users/danie/Desktop/Github/Github/spateo-release-main/spateo/tools/database/lr_db_mouse.csv", index_col=0)

In [None]:
e115_zli = anndata.read_h5ad(path_to_zli)
e115_zli.uns["__type"] = "UMI"

In [None]:
e115_zli.write_h5ad(path_to_zli)

In [None]:
e115_dien_ring = anndata.read_h5ad(path_to_dien_ring)
e115_dien_ring.uns["__type"] = "UMI"

### Figure 6a ZLI and others plot

In [None]:
e115_zli.obs["zli_and_others"] = e115_zli.obs["mapped_celltype"].copy()
mask = e115_zli.obs["mapped_celltype"] != "Zona limitans intrathalamica"
e115_zli.obs[mask, "zli_and_others"] = "Other"

In [None]:
e115_dien_ring.obs["zli_and_others"] = e115_dien_ring.obs["mapped_celltype"].copy()
mask = e115_dien_ring.obs["mapped_celltype"] != "Zona limitans intrathalamica"
e115_dien_ring.obs[mask, "zli_and_others"] = "Other"

### Figure 6b

In [None]:
# Comments: digitization axes in "rc_coord" and "dv_coord" for ZLI flanking region, "heat" and "heat_orth" for diencephalic ring

### Figure 6d ZLI spatial enrichment heatmap

In [None]:
pos_rate = np.sum(e115_zli.X.A!=0,axis=0) / e115_zli.n_obs

In [None]:
e115_zli = e115_zli[:, pos_rate > 0.01].copy()
e115_zli

In [None]:
e115_zli.uns['pp'] = {}
e115_zli.X = e115_zli.layers['count'].copy()
dyn.pp.normalize_cell_expr_by_size_factors(e115_zli)

In [None]:
# Change this to change the save path:
save_path = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/zli_glm_deg_calibratedRC.csv"

In [None]:
dyn.tl.glm_degs(e115_zli, fullModelFormulaStr='~rc_coord')
e115_zli.uns['glm_degs'].to_csv(save_path)

In [None]:
e115_zli.uns['glm_degs'] = pd.read_csv(save_path, index_col=0)

In [None]:
df = e115_zli.uns['glm_degs'].copy()
df = df.sort_values('pval',ascending=True)
df['gene'] = df.index.to_list()
df['pval'] = df['pval'].astype(np.float32)

glm_genes = df.loc[df['pval'] < 0.01,:].index.to_list()
len(glm_genes)

In [None]:
exp_mtx = e115_zli.X.todense()
exp_mtx = pd.DataFrame(
    exp_mtx,
    index=e115_zli.obs.index,
    columns=e115_zli.var.index
)

In [None]:
# Shh is expressed along the floor plate as well as the ZLI- we would like only the portion that clusters with the ZLI along the R-C axis
exp_mtx.loc[e115_zli.obs["mapped_celltype"] != "Zona limitans intrathalamica", "Shh"] = 0
exp_mtx.loc[:, "Shh"].sum()

In [None]:
exp_mtx["digital_column"] = e115_zli.obs["rc_coord"].astype(int).to_list()
agg_exp_column = exp_mtx.groupby(["digital_column"]).mean()
agg_exp_column = agg_exp_column.transpose().sort_index(axis=1)

In [None]:
from scipy.ndimage import gaussian_filter1d

agg_exp_column_tmp = agg_exp_column.iloc[:,16:].copy()


agg_exp_column_tmp = agg_exp_column_tmp.apply(
    lambda x: gaussian_filter1d(x, 3).tolist(),
    axis=1,
)
agg_exp_column_tmp = pd.DataFrame(
    agg_exp_column_tmp.to_list(),
    index=agg_exp_column.index,
    columns=agg_exp_column.columns[16:],
)

agg_exp_column_tmp = agg_exp_column_tmp.loc[glm_genes]
agg_exp_column_tmp = agg_exp_column_tmp.loc[agg_exp_column_tmp.idxmax(axis=1).sort_values().index]

#### Cluster expression along R-C

In [None]:
from scipy.cluster.hierarchy import linkage, fcluster
from scipy.spatial.distance import pdist

In [None]:
agg_exp_column_tmp_stand = ((agg_exp_column_tmp.T - agg_exp_column_tmp.T.min()) / (agg_exp_column_tmp.T.max() - agg_exp_column_tmp.T.min())).T
agg_exp_column_tmp_stand

correlations = agg_exp_column_tmp_stand.T.corr()
correlations_array = np.asarray(agg_exp_column_tmp_stand.T.corr())

row_linkage = linkage(
    distance.pdist(correlations_array), method='average')

correlations['clust'] = fcluster(row_linkage, criterion="maxclust",t=7)

pal = sns.hls_palette(len(set(correlations['clust'])), l=0.5, s=0.8)
np.random.shuffle(pal)
lut = dict(zip(set(correlations['clust']), pal))
row_colors = pd.DataFrame(correlations['clust'])['clust'].map(lut)


g = sns.clustermap(correlations.iloc[:,:-1],
                   row_linkage=row_linkage, col_linkage=row_linkage,
                   row_colors=row_colors, col_colors=row_colors,
                   method="average",figsize=(5, 5),
                   cmap="RdBu_r",
                   rasterized=True,xticklabels=False,
                  )


reordered_labels = agg_exp_column_tmp.index[g.dendrogram_col.reordered_ind].tolist()
# Change these to any genes based on preference
use_labels = ["Shh", "Ptn","Tmem132c", "Fgf8","Ezr",
              "Irx3", "Dcx","Eml5","Stmn2",
              "Six3", "Wnt7b",
             ]
use_ticks = [reordered_labels.index(label) + .5 for label in use_labels]
g.ax_heatmap.set(yticks=use_ticks, yticklabels=use_labels)
plt.show(g)

### Figure 6e- polarity plots (for gene expression)

In [None]:
def polarity(
    test_df,
    column_dict: dict,
    region_key: str,
    palette: list,
    mode: str = "density",
    itv_rpt: int = 1,
    width: int = 5,
    height: int = 3.5,
    region_lower_bound=None,  # Optional parameters for region bounds
    region_upper_bound=None,
):
    """Visualize data variation along regions using columns from a DataFrame.

    Args:
        test_df (DataFrame): Data to be visualized.
        column_dict (dict): Specifies columns to be visualized, potentially grouped by annotations.
        region_key (str): Column name in test_df indicating regions.
        mode (str, optional): Visualization mode ('density' or 'exp'). Defaults to "density".
        region_lower_bound (int, optional): Lower bound of the region to highlight.
        region_upper_bound (int, optional): Upper bound of the region to highlight.
    """
    import scipy.stats as stat
    digi_region = np.array([])
    column_list = np.array([])
    column_mean = np.array([])
    column_mean_low = np.array([])
    column_mean_high = np.array([])

    if mode == "exp":
        for i in np.unique(test_df[region_key]):
            df_tmp = test_df[test_df[region_key] == i]
            for anno in list(column_dict.keys()):
                for column in column_dict[anno]:
                    column_mean_tmp = df_tmp[column].to_numpy()
                    digi_region = np.append(digi_region, np.repeat(i, len(df_tmp)))
                    column_list = np.append(column_list, np.repeat(column + " " + anno, len(df_tmp)))
                    column_mean = np.append(column_mean, column_mean_tmp)
        df_plt = pd.DataFrame({region_key: digi_region, "Column": column_list, "Mean value": column_mean})
        ax = sns.relplot(data=df_plt, x=region_key, y="Mean value", hue="Column")
    elif mode == "density":
        for i in np.unique(test_df[region_key]):
            df_tmp = test_df[test_df[region_key] == i]
            for anno in list(column_dict.keys()):
                for column in column_dict[anno]:
                    digi_region = np.append(digi_region, i)
                    column_list = np.append(column_list, column + " " + anno)
                    column_mean = np.append(column_mean, np.mean(df_tmp[column]))
                    data = df_tmp[column].to_numpy()
                    data = np.repeat(data, itv_rpt)
                    l, h = stat.t.interval(alpha=0.90, df=len(data)-1, loc=np.mean(data), scale=stat.sem(data))
                    if np.isnan(l):
                        l = np.mean(data)
                    if np.isnan(h):
                        h = np.mean(data)
                    column_mean_low = np.append(column_mean_low, max(0, l))
                    column_mean_high = np.append(column_mean_high, h)
        column_mean_low = column_mean_low + 1e-10
        column_mean_high = column_mean_high + 1e-10
        df_plt = pd.DataFrame({region_key: digi_region, "Interaction": column_list, "Avg. effect": column_mean})
        df_plt['Avg. effect'] = df_plt['Avg. effect'].clip(lower=0)
        plt.figure()
        p1 = sns.kdeplot(data=df_plt, x=region_key, common_norm=False, weights="Avg. effect", hue="Interaction", linewidth=3)
        #p1.set_xlim(0, max(adata.obs[region_key]))
        plt.close()
        df_plt = pd.DataFrame({region_key: digi_region, "Interaction": column_list, "Avg. effect": column_mean_low})
        plt.figure()
        p2 = sns.kdeplot(data=df_plt, x=region_key, common_norm=False, weights="Avg. effect", hue="Interaction", linewidth=3)
        #p2.set_xlim(0, max(adata.obs[region_key]))
        plt.close()
        df_plt = pd.DataFrame({region_key: digi_region, "Interaction": column_list, "Avg. effect": column_mean_high})
        plt.figure()
        p3 = sns.kdeplot(data=df_plt, x=region_key, common_norm=False, weights="Avg. effect", hue="Interaction", linewidth=3)
        #p3.set_xlim(0, max(adata.obs[region_key]))
        plt.close()
        fig, ax = plt.subplots()
        fig.set_size_inches((width, height))
        uq_cl = np.unique(column_list)
        for k in range(len(uq_cl)):
            ax.plot(p1.get_children()[k].get_data()[0], p1.get_children()[k].get_data()[1], '-', color=palette[len(palette)-k-1])
            ax.fill_between(
                p1.get_children()[k].get_data()[0],
                p2.get_children()[k].get_data()[1]*np.sum(column_mean_low[column_list==uq_cl[-(k+1)]]) / np.sum(column_mean[column_list==uq_cl[-(k+1)]]),
                p3.get_children()[k].get_data()[1]*np.sum(column_mean_high[column_list==uq_cl[-(k+1)]])/ np.sum(column_mean[column_list==uq_cl[-(k+1)]]), color=palette[len(palette)-k-1], alpha=0.2)

        if region_lower_bound is not None and region_upper_bound is not None:
            plot_width = region_upper_bound - region_lower_bound
            region_box = mpl.patches.Rectangle(
                (region_lower_bound, ax.get_ylim()[0]),
                plot_width,
                ax.get_ylim()[1] - ax.get_ylim()[0],
                linewidth=1,
                edgecolor="#1CE6FF",
                facecolor="#1CE6FF",
                alpha=0.1,
            )
            ax.add_patch(region_box)
    return ax

In [None]:
# Wnt genes
genes = ["Wnt7b", "Wnt8b", "Wnt9a", "Wnt3a", "Wnt5a", "Wnt3"]

In [None]:
temp_df = pd.DataFrame(0, index=e115_zli.obs_names, columns=genes)
for gene in genes:
    temp_df[gene] = e115_zli[:, gene].X.toarray()

In [None]:
temp_df["rc_coord"] = e115_zli.obs["rc_coord"]

In [None]:
gene_dict = {
    "1": ["Wnt7b"],
    "2": ["Wnt8b"],
    "3": ["Wnt9a"],
    "4": ["Wnt3a"],
    "5": ["Wnt5a"],
    "6": ["Wnt3"],
}

In [None]:
import matplotlib as mpl
palette = [mpl.colors.to_hex(i) for i in sns.color_palette("tab10", n_colors=len(gene_dict))]
palette

In [None]:
ax = polarity(temp_df, gene_dict, region_key="rc_coord",
              palette=palette,
              itv_rpt=1,
             )

ax.tick_params(direction="out")
ax.set_yticks([])
# Remove the tick marks but keep the labels for the x-axis
ax.tick_params(axis='x', length=0, labelsize=24)

plt.xlim([-20, 120])
for spine in ax.spines.values():
    spine.set_linewidth(1.5)

In [None]:
# Replace path with appropriate local path for you
plt.savefig("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/KDE_expression_Wnts.pdf", bbox_inches = "tight", transparent=True, width=2, height=1)

In [None]:
# Lhx genes
genes = ["Lhx2", "Lhx1", "Gbx2", "Lhx5", "Lhx1os", "Lhx9"]

In [None]:
temp_df = pd.DataFrame(0, index=e115_zli.obs_names, columns=genes)
for gene in genes:
    temp_df[gene] = e115_zli[:, gene].X.toarray()

In [None]:
temp_df["rc_coord"] = e115_zli.obs["rc_coord"]

In [None]:
gene_dict = {
    "1": ["Lhx2"],
    "2": ["Lhx1"],
    "3": ["Gbx2"],
    "4": ["Lhx5"],
    "5": ["Lhx1os"],
    "6": ["Lhx9"],
}

In [None]:
palette = [mpl.colors.to_hex(i) for i in sns.color_palette("tab10" ,n_colors=len(gene_dict))]
palette

In [None]:
ax = polarity(temp_df, gene_dict, region_key="rc_coord",
              palette=palette,
              itv_rpt=1,
              region_lower_bound=45,
              region_upper_bound=80,
             )

ax.tick_params(direction="out")
ax.set_yticks([])
# Remove the tick marks but keep the labels for the x-axis
ax.tick_params(axis='x', length=0, labelsize=24)

plt.xlim([-20, 120])
for spine in ax.spines.values():
    spine.set_linewidth(1.5)

In [None]:
# Replace path with appropriate local path for you
plt.savefig("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/KDE_expression_Lhx.pdf", bbox_inches = "tight", transparent=True, width=2, height=1)

### For figure 6e-g: target genes for modeling

In [None]:
# Reload AnnData object (in case): 
e115_zli = anndata.read_h5ad(path_to_zli)
e115_zli.uns["__type"] = "UMI"

#### Ligands and receptors

In [None]:
# Change path to the local path that stores Spateo's database
lr_db = pd.read_csv("/mnt/c/Users/danie/Desktop/Github/Github/spateo-release-main/spateo/tools/database/lr_db_mouse.csv", index_col=0)

In [None]:
# Extract unique values from 'from' and 'to' columns
unique_l = lr_db['from'].unique().tolist()
unique_r = lr_db['to'].unique().tolist()

# Combine and remove duplicates
combined = set(unique_l + unique_r)

# Split elements containing '_'
split_elements = [item.split('_') for item in combined if '_' in item]
flat_list = [item for sublist in split_elements for item in sublist]

# Add split elements to combined set
combined.update(flat_list)

# Remove the elements containing '_' as they are already split
combined = {x for x in combined if '_' not in x}
print(len(combined))

unique_l = [item.split('_') for item in unique_l]
unique_l = set([item for sublist in unique_l for item in sublist])
print(len(unique_l))
unique_r = [item.split('_') for item in unique_r]
unique_r = set([item for sublist in unique_r for item in sublist])
print(len(unique_r))

In [None]:
combined_sub = [g for g in combined if g in e115_zli.var_names]
print(len(combined_sub))

unique_l = [g for g in unique_l if g in e115_zli.var_names]
print(len(unique_l))

unique_r = [g for g in unique_r if g in e115_zli.var_names]
print(len(unique_r))

In [None]:
df_expression = pd.DataFrame(e115_zli[:, combined_sub].X.toarray(), index=e115_zli.obs_names, columns=combined_sub)

In [None]:
# Save ligands and receptors expressed in over n cells:
n = 2000
gene_counts = (df_expression > 0).sum()

# Filter ligands and receptors
expressed_ligands = gene_counts[unique_l][gene_counts > n].index.tolist()
expressed_receptors = gene_counts[unique_r][gene_counts > n].index.tolist()

len(expressed_ligands), len(expressed_receptors)

In [None]:
with open("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/zli_ligands.txt", "w") as file:
    for item in expressed_ligands:
        file.write("%s\n" % item)

In [None]:
with open("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/zli_receptors.txt", "w") as file:
    for item in expressed_receptors:
        file.write("%s\n" % item)

#### Target genes

In [None]:
target_genes = [
    "Amot", "Arl4a", "Axin2", "Cdh8", "Celf4", "Chl1", "Cited2", "Clic6", "Colec12", "Cntn5",
    "Dcx", "Dlx2", "Eda", "Edil3", "Egfem1", "Eml5", "Ezr", "Ext1", "Flrt2", "Foxd1",
    "Gap43", "Gng3", "Igfbp5", "Id1", "Id4", "Ina", "Kcnip4", "Kcnq3", "Lrrc4c", "Magi1",
    "Mapk1", "Mdga2", "Meg3", "Mllt11", "Mycn", "Nkd1", "Nlgn1", "Nrxn3", "Ntn1", "Otx1",
    "Otx2os1", "Pcdh9", "Peg10", "Pdia6", "Plcl1", "Pten", "Ptk2", "Ptprn2", "Ptch1",
    "Rspo3", "Rtn1", "Sema5b", "Sfrp2", "Sgms1", "Slc1a2", "Slc22a23", "Slc6a15", "Spon1",
    "Stmn2", "Sulf1", "Sulf2", "Tead1", "Tenm4", "Th", "Thsd7a", "Tle1", "Trpm3", "Tmem132c",
    "Ybx1", "Zcchc18", "Zfp804b"
]

In [None]:
with open("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/zli_targets.txt", "w") as file:
    for item in target_genes:
        file.write("%s\n" % item)

In [None]:
# Set to the folders to save the inputs (ligands list, receptors list, targets list) and outputs (model results) to:
cci_input_directory = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs"
cci_output_directory = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_outputs"
cci_output_id = os.path.join(cci_output_directory, "zli_target_genes.csv")

#### Initialize CCI model

In [None]:
e115_zli_lb = st.tl.find_neighbors.find_bw_for_n_neighbors(
    e115_zli,
    coords_key="z_correction",
    n_anchors=2000,
    target_n_neighbors=27,
    initial_bw=20,
    exclude_self=True
)
e115_zli_lb

In [None]:
e115_zli_ub = st.tl.find_neighbors.find_bw_for_n_neighbors(
    e115_zli,
    coords_key="z_correction",
    n_anchors=2000,
    target_n_neighbors=250,
    initial_bw=20,
    exclude_self=True
)
e115_zli_ub

In [None]:
# Change all paths in below block to the appropriate locations on your local machine

In [None]:
adata_path = path_to_zli
output_path = cci_output_id
# Use the ligand/receptor paths from the model fitting:
ligand_path = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/zli_ligands.txt"
# Note that even though receptor path is given, this is not actually used for figure 6, which uses only the ligands because the data is very sparse- but if "mod_type" is "lr", this will be used.
receptor_path = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/zli_receptors.txt"  
target_path = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/zli_targets.txt"
cci_dir_path = "/mnt/c/Users/danie/Desktop/Github/Github/spateo-release-main/spateo/tools/database"
mod_type = "ligand"
species = "mouse"
group_key = "mapped_celltype"
coords_key = "z_correction"
distance_membrane_bound = 6.6
n_neighbors_membrane_bound = 27
distance_secreted = 16.5
# Effective radius for each cell that assumes cell size = 10um, diameter signaling range = 250um, and cells are not tightly packed, w/ a (erring on the larger side)
# distance of 30um between neighbors
n_neighbors_secreted = 250
minbw = 10.0
maxbw = 23.0
spatial_subsample = True

In [None]:
parser, args_list = st.tl.define_spateo_argparse(
    adata_path=adata_path,
    custom_lig_path=ligand_path,
    custom_rec_path=receptor_path,
    targets_path=target_path,
    cci_dir=cci_dir_path,
    mod_type=mod_type,
    species=species,
    group_key=group_key,
    coords_key=coords_key,
    distance_membrane_bound=distance_membrane_bound,
    n_neighbors_membrane_bound=n_neighbors_membrane_bound,
    distance_secreted=distance_secreted,
    n_neighbors_secreted=n_neighbors_secreted,
    minbw=minbw,
    maxbw=maxbw,
    spatial_subsample=spatial_subsample,
    output_path=output_path,
)

In [None]:
model = st.tl.MuSIC(parser, args_list)
model._set_up_model()
model.fit()
model.predict_and_save(adjust_for_subsampling=True)

#### Downstream model initialization

In [None]:
downstream_model = st.tl.MuSIC_Interpreter(parser, args_list)

##### Figure 6e (polarity plots for predicted CCI effects)

In [None]:
pairs = ["Vegfa-Dcx", "Agrn-Th", "Slit2-Ina", "Bmp6-Ext1", "Bmp7-Id1", "Ptn-Gap43", "Agrn-Stmn2", "Bmp7-Eml5"]

In [None]:
temp_df = pd.DataFrame(0, index=e115_zli.obs_names, columns=pairs) 

for pair in pairs:
    file_path = f"{cci_output_id[:-4]}_{pair.split('-')[1]}.csv"
    coeffs_df = pd.read_csv(file_path, index_col=0)
    temp_df[pair] = coeffs_df[f"b_{pair.split('-')[0]}"]

In [None]:
temp_df["rc_coord"] = e115_zli.obs["rc_coord"]

In [None]:
column_dict = {
    "1": ["Vegfa-Dcx"],
    "2": ["Agrn-Th"],
    "3": ["Slit2-Ina"],
    "4": ["Bmp6-Ext1"],
    "5": ["Bmp7-Id1"],
    "6": ["Ptn-Gap43"],
    "7": ["Agrn-Stmn2"],
    "8": ["Bmp7-Eml5"],
}

In [None]:
import matplotlib as mpl
palette = [mpl.colors.to_hex(i) for i in sns.color_palette("tab10" ,n_colors=len(column_dict))]
palette

In [None]:
ax = polarity(temp_df, column_dict, region_key="rc_coord",
              palette=palette,
              itv_rpt=1,
              region_lower_bound=45,
              region_upper_bound=80,
             )
ax.tick_params(direction="out")
ax.set_yticks([])
ax.tick_params(axis='x', length=0, labelsize=24)
plt.xlim([0, 110])
plt.ylim([0.00, 0.075])

for spine in ax.spines.values():
    spine.set_linewidth(1.5)

In [None]:
# Replace path with appropriate local path for you
plt.savefig("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/KDE_CCI_effects.pdf", bbox_inches = "tight", transparent=True, width=2, height=1)

##### Run downstream TF-target model for figure 6f insights

In [None]:
# Comments: can toggle "use_targets" and "use_ligands" between True and False for TF-target model where targets are downstream target genes of the CCI model and 
# ligands of the CCI model, respectively. Only one should be True at one time. For convenience, here the blocks are repeated with the appropriate adjustment.

In [None]:
downstream_model.CCI_deg_detection_setup(
    group_key="mapped_celltype",
    use_ligands=False,
    use_receptors=False,
    use_targets=True
)

In [None]:
downstream_model.CCI_deg_detection(
    group_key="mapped_celltype",
    cci_dir_path=cci_dir_path,
    use_ligands=False,
    use_receptors=False,
    use_targets=True,
    use_dim_reduction=False,
    distr="poisson"
)

In [None]:
downstream_model.CCI_deg_detection_setup(
    group_key="mapped_celltype",
    use_ligands=True,
    use_receptors=False,
    use_targets=False
)

In [None]:
downstream_model.CCI_deg_detection(
    group_key="mapped_celltype",
    cci_dir_path=cci_dir_path,
    use_ligands=True,
    use_receptors=False,
    use_targets=False,
    use_dim_reduction=False,
    distr="poisson"
)

##### Visualize predicted effect of ligand on target

In [None]:
affected_gene = "Id1"
ligand = "Bmp6"

In [None]:
# Change the save path to appropriate local path:
save_path = f"/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/ZLI_{affected_gene}_{ligand}_effect.html"

In [None]:
downstream_model.plot_interaction_effect_3D(
    target=affected_gene,
    interaction=ligand,
    save_path=save_path,
    pcutoff=99.9,
    size=3.5,
)

##### Visualize predicted effect of TF on ligand, target, etc.

In [None]:
affected_gene = "Id1"
tf = "Smad4"

In [None]:
# Change the save path to appropriate local path:
save_path = f"/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/ZLI_{affected_gene}_{tf}_effect.html"

In [None]:
downstream_model.plot_tf_effect_3D(
    target=affected_gene,
    tf=tf,
    save_path=save_path,
    ligand_targets=True,
    target_gene_targets=False,
    size=3.5
)

##### Summarize interaction effects (resource used for figure 6g network construction)

In [None]:
interaction_effects_df = downstream_model.summarize_interaction_effects()
interaction_effects_df

In [None]:
# Change to appropriate path
save_path = f"/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/ZLI_interaction_effect_summary.csv"

In [None]:
interaction_effects_df.to_csv(save_path)

In [None]:
interaction_effects_df = pd.read_csv(save_path, index_col=0)

In [None]:
# Top n ligands for each target gene
n = 10
threshold = 0.0

In [None]:
# Optional: remove particular rows that are very common- also some of the things that are not intercellular ligands:
remove = ["Agrn", "Cdh2", "Ptn", "Igf2", "Mdk", "Mif", "Lipa"]

In [None]:
if "remove" in locals():
    main_targets_interaction_effects_summary = interaction_effects_df.loc[[i for i in interaction_effects_df.index if i not in remove], :]

In [None]:
# Get the most enriched effectors (ligands) for each target
main_targets_interaction_effects_summary = (main_targets_interaction_effects_summary - main_targets_interaction_effects_summary.min()) / (main_targets_interaction_effects_summary.max() - main_targets_interaction_effects_summary.min())

In [None]:
top_n_columns_per_row = main_targets_interaction_effects_summary.apply(lambda row: row[row > threshold].nlargest(n).index.tolist(), axis=1)

# This was used to identify, e.g. target genes specific to a particular ligand/ligand family
for i, top_columns in enumerate(top_n_columns_per_row):
    print(f"Row {top_n_columns_per_row.index[i]}: Top columns: {top_columns}")

### Figure 6b (CCI model result for the diencephalic ring)

#### Ligands and receptors

In [None]:
# Change path to the local path that stores Spateo's database
lr_db = pd.read_csv("/mnt/c/Users/danie/Desktop/Github/Github/spateo-release-main/spateo/tools/database/lr_db_mouse.csv", index_col=0)

In [None]:
# Extract unique values from 'from' and 'to' columns
unique_l = lr_db['from'].unique().tolist()
unique_r = lr_db['to'].unique().tolist()

# Combine and remove duplicates
combined = set(unique_l + unique_r)

# Split elements containing '_'
split_elements = [item.split('_') for item in combined if '_' in item]
flat_list = [item for sublist in split_elements for item in sublist]

# Add split elements to combined set
combined.update(flat_list)

# Remove the elements containing '_' as they are already split
combined = {x for x in combined if '_' not in x}
print(len(combined))

unique_l = [item.split('_') for item in unique_l]
unique_l = set([item for sublist in unique_l for item in sublist])
print(len(unique_l))
unique_r = [item.split('_') for item in unique_r]
unique_r = set([item for sublist in unique_r for item in sublist])
print(len(unique_r))

In [None]:
combined_sub = [g for g in combined if g in e115_dien_ring.var_names]
print(len(combined_sub))

unique_l = [g for g in unique_l if g in e115_dien_ring.var_names]
print(len(unique_l))

unique_r = [g for g in unique_r if g in e115_dien_ring.var_names]
print(len(unique_r))

In [None]:
df_expression = pd.DataFrame(e115_dien_ring[:, combined].X.toarray(), index=e115_dien_ring.obs_names, columns=combined)

In [None]:
# Save ligands and receptors expressed in over n cells:
n = int(0.05 * e115_dien_ring.n_obs)
gene_counts = (df_expression > 0).sum()

# Filter ligands and receptors
expressed_ligands = gene_counts[unique_l][gene_counts > n].index.tolist()
expressed_receptors = gene_counts[unique_r][gene_counts > n].index.tolist()

len(expressed_ligands), len(expressed_receptors)

In [None]:
with open("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/diencephalic_ring_ligands.txt", "w") as file:
    for item in expressed_ligands:
        file.write("%s\n" % item)

In [None]:
with open("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/diencephalic_ring_receptors.txt", "w") as file:
    for item in expressed_receptors:
        file.write("%s\n" % item)

#### Target genes

In [None]:
target_genes = [
    "Ckb", "Neurog2", "Cenpw", "Zic1", "Rmst", "Gm29478", "Tubb2b", "Cpe", "Tuba1a", "Cdkn1c",
    "Igfbp2", "Zic4", "Hes5", "Tubb3", "Sox9", "Miat", "Ext1", "Clybl", "Nes", "Emb", "Serpinh1",
    "Elavl4", "Igdcc3", "Gli3", "Mecom", "Mest", "Stmn1", "Nxn", "Ptprd", "Nfia", "Id3", "Rgma",
    "Tmeff1", "Zfp503", "Zic5", "Gja1", "Zic3", "Tmsb4x", "Nrarp", "Calm1", "Map2", "Itm2c", "Sox11",
    "Tpm1", "Map1b", "Mllt3", "Ppp1r1a", "Tle4", "Ezr", "Rspo1", "Spry1", "Glis3", "Dusp6", "Neurog2",
    "Stmn1", "Map2", "Sox9", "Rgma", "Hes5", "Gja1", "Itm2c", "Gli3", "Zic4", "Serpinh1", "Ezr",
    "Rmst", "Sufu"
]

In [None]:
with open("/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/diencephalic_ring_targets.txt", "w") as file:
    for item in target_genes:
        file.write("%s\n" % item)

#### Initialize CCI model

In [None]:
cci_lower_bound = st.tl.find_neighbors.find_bw_for_n_neighbors(
    e115_dien_ring,
    coords_key="z_correction",
    n_anchors=2000,
    target_n_neighbors=27,
    initial_bw=200,
    exclude_self=True
)
cci_lower_bound

In [None]:
cci_upper_bound = st.tl.find_neighbors.find_bw_for_n_neighbors(
    e115_dien_ring,
    coords_key="spatial",
    n_anchors=2000,
    target_n_neighbors=250,
    initial_bw=200,
    exclude_self=True
)
cci_upper_bound

In [None]:
# Change all paths in below blocks to the appropriate locations on your local machine

In [None]:
cci_input_directory = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs"
cci_output_directory = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_outputs/dien_ring_outputs"
cci_output_id = os.path.join(cci_output_directory, "dien_ring_target_genes.csv")

In [None]:
adata_path = path_to_dien_ring
output_path = cci_output_id
# Use the ligand/receptor paths from the model fitting:
ligand_path = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/diencephalic_ring_ligands.txt"
# Note that even though receptor path is given, this is not actually used for figure 6, which uses only the ligands because the data is very sparse- but if "mod_type" is "lr", this will be used.
receptor_path = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/diencephalic_ring_receptors.txt"  
target_path = "/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/CCI_inputs/diencephalic_ring_targets.txt"
cci_dir_path = "/mnt/c/Users/danie/Desktop/Github/Github/spateo-release-main/spateo/tools/database"
mod_type = "ligand"
species = "mouse"
group_key = "mapped_celltype"
coords_key = "z_correction"
distance_membrane_bound = 6.37
n_neighbors_membrane_bound = 27
distance_secreted = 16.17
# Effective radius for each cell that assumes cell size = 10um, diameter signaling range = 250um, and cells are not tightly packed, w/ a (erring on the larger side)
# distance of 30um between neighbors
n_neighbors_secreted = 250
minbw = 9.55
maxbw = 24.25
spatial_subsample = True

In [None]:
parser, args_list = st.tl.define_spateo_argparse(
    adata_path=adata_path,
    custom_lig_path=ligand_path,
    custom_rec_path=receptor_path,
    targets_path=target_path,
    cci_dir=cci_dir_path,
    mod_type=mod_type,
    species=species,
    group_key=group_key,
    coords_key=coords_key,
    distance_membrane_bound=distance_membrane_bound,
    n_neighbors_membrane_bound=n_neighbors_membrane_bound,
    distance_secreted=distance_secreted,
    n_neighbors_secreted=n_neighbors_secreted,
    minbw=minbw,
    maxbw=maxbw,
    spatial_subsample=spatial_subsample,
    output_path=output_path,
)

In [None]:
downstream_model = st.tl.MuSIC_Interpreter(parser, args_list)

In [None]:
# Fgf8 effect on Sufu
target = "Sufu"
ligand = "Fgf8"

In [None]:
save_path = f"/mnt/d/SCAnalysis/Spateo_E11.5_embryo_ZLI/diencephalic_ring_Fgf8_Sufu_effect.html"

In [None]:
downstream_model.plot_interaction_effect_3D(
    target=target,
    interaction=interaction,
    size=3.5,
    save_path=save_path
)