In [1]:
import os, warnings
from pathlib import Path

import numpy as np
import dynamo as dyn
import spateo as st
import scanpy as sc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

2023-09-19 23:04:20.595148: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-19 23:04:20.728986: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from typing import Optional, Tuple, Union

import matplotlib as mpl
import numpy as np
from pyvista import PolyData, UniformGrid, UnstructuredGrid

try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal


def add_model_labels(
        model: Union[PolyData, UnstructuredGrid, UniformGrid],
        labels: np.ndarray,
        key_added: str = "groups",
        where: Literal["point_data", "cell_data"] = "cell_data",
        colormap: Union[str, list, dict, np.ndarray] = "rainbow",
        alphamap: Union[float, list, dict, np.ndarray] = 1.0,
        mask_color: Optional[str] = "gainsboro",
        mask_alpha: Optional[float] = 0.0,
        inplace: bool = False,
) -> Tuple[Optional[PolyData or UnstructuredGrid], Optional[Union[str]]]:
    model = model.copy() if not inplace else model
    labels = np.asarray(labels).flatten()

    cu_arr = np.sort(np.unique(labels), axis=0).astype(object)
    raw_labels_hex = labels.copy().astype(object)
    raw_labels_alpha = labels.copy().astype(object)
    raw_labels_hex[raw_labels_hex == "mask"] = mpl.colors.to_hex(mask_color)
    raw_labels_alpha[raw_labels_alpha == "mask"] = mask_alpha

    # Set raw hex.
    if isinstance(colormap, str):
        if colormap in list(mpl.colormaps()):
            lscmap = mpl.cm.get_cmap(colormap)
            raw_hex_list = [mpl.colors.to_hex(lscmap(i)) for i in np.linspace(0, 1, len(cu_arr))]
            for label, color in zip(cu_arr, raw_hex_list):
                raw_labels_hex[raw_labels_hex == label] = color
        else:
            raw_labels_hex[raw_labels_hex != "mask"] = mpl.colors.to_hex(colormap)
    elif isinstance(colormap, dict):
        for label, color in colormap.items():
            raw_labels_hex[raw_labels_hex == label] = mpl.colors.to_hex(color)
    elif isinstance(colormap, list) or isinstance(colormap, np.ndarray):
        raw_hex_list = np.array([mpl.colors.to_hex(color) for color in colormap]).astype(object)
        for label, color in zip(cu_arr, raw_hex_list):
            raw_labels_hex[raw_labels_hex == label] = color
    else:
        raise ValueError("`colormap` value is wrong." "\nAvailable `colormap` types are: `str`, `list` and `dict`.")

    # Set raw alpha.
    if isinstance(alphamap, float) or isinstance(alphamap, int):
        raw_labels_alpha[raw_labels_alpha != "mask"] = alphamap
    elif isinstance(alphamap, dict):
        for label, alpha in alphamap.items():
            raw_labels_alpha[raw_labels_alpha == label] = alpha
    elif isinstance(alphamap, list) or isinstance(alphamap, np.ndarray):
        raw_labels_alpha = np.asarray(alphamap).astype(object)
    else:
        raise ValueError(
            "`alphamap` value is wrong." "\nAvailable `alphamap` types are: `float`, `list` and `dict`."
        )

    # Set rgba.
    labels_rgba = [mpl.colors.to_rgba(c, alpha=a) for c, a in zip(raw_labels_hex, raw_labels_alpha)]
    labels_rgba = np.array(labels_rgba).astype(np.float32)

    # Added rgba of the labels.
    if where == "point_data":
        model.point_data[f"{key_added}_rgba"] = labels_rgba
    else:
        model.cell_data[f"{key_added}_rgba"] = labels_rgba

    plot_cmap = None

    # Added labels.
    if where == "point_data":
        model.point_data[key_added] = labels
    else:
        model.cell_data[key_added] = labels

    return model if not inplace else None, plot_cmap

## Load the data

In [3]:
sample_id = "E7_8h"
os.chdir(f"/media/pc001/Yao/Projects/Project_drosophila/Data_v4/migration-hotspot/drosophila_{sample_id}_germ_layer_new")
cpo = [(41, 1209, 57), (13, 8, -3), (0, 0, 1)]

out_image_path = f"image/germband_morphofield/GO"
Path(out_image_path).mkdir(parents=True, exist_ok=True)

In [4]:
germ_adata = st.read_h5ad(f"h5ad/{sample_id}_germband_v4_acceleration.h5ad")
germ_adata.X= germ_adata.layers["X_counts"].copy()
sc.pp.normalize_total(germ_adata)
sc.pp.log1p(germ_adata)

germ_pc = st.tdr.read_model(f"morpho_models/germband_morphometrics_pc_model.vtk")
germ_mesh = st.tdr.read_model(f"morpho_models/germband_morphometrics_mesh_model.vtk")
trajectory_model = st.tdr.read_model(f"morpho_models/germband_morphofield_trajectory_model.vtk")
germ_adata



AnnData object with n_obs × n_vars = 12585 × 8120
    obs: 'area', 'slices', 'anno_cell_type', 'anno_tissue', 'anno_germ_layer', 'actual_stage', 'scc', 'anno_tissue_new', 'V_z', 'anno_germ_layer_new', 'backbone_nodes', 'backbone_scc', 'segments', 'morpho_acceleration', 'morpho_curvature', 'morpho_curl', 'morpho_torsion'
    uns: 'VecFld_morpho', 'glm_degs', 'log1p', 'pp'
    obsm: '2d_realign_spatial', '3d_align_spatial', 'V_cells_mapping', 'X_cells_mapping', 'X_pca', 'X_umap', 'morpho_acceleration', 'morpho_curl', 'morpho_curvature', 'morpho_velocity', 'raw_spatial', 'tdr_spatial'
    layers: 'X_counts', 'spliced', 'unspliced'

In [5]:
glm_data = germ_adata.uns["glm_degs"]["glm_result"]
glm_data= glm_data.loc[glm_data["log-likelihood"] >= -3000, :]
glm_data= glm_data.loc[glm_data["pval"] <= 1e-5, :]
selected_genes = list(glm_data.index)
glm_data

Unnamed: 0,status,family,log-likelihood,pval,qval
CG13159,ok,NB2,-2724.717041,0.000000e+00,0.000000e+00
pip,ok,NB2,-1099.084595,0.000000e+00,0.000000e+00
Abd-B,ok,NB2,-758.048340,1.760276e-39,2.382240e-36
CG34190,ok,NB2,-681.737549,1.196937e-35,1.079903e-32
Papss,ok,NB2,-672.873108,2.431555e-33,1.974422e-30
...,...,...,...,...,...
Antp,ok,NB2,-912.598938,8.563871e-06,4.373499e-04
CG8353,ok,NB2,-1302.346924,8.702624e-06,4.416582e-04
E(spl)m7-HLH,ok,NB2,-2032.674194,9.543007e-06,4.791381e-04
TER94,ok,NB2,-2238.718262,9.559159e-06,4.791381e-04


In [6]:
import gseapy as gp
gp.get_library_name(organism="fly")

['Allele_LoF_Phenotypes_from_FlyBase_2017',
 'Allele_Phenotypes_from_FlyBase_2017',
 'Anatomy_AutoRIF',
 'Anatomy_AutoRIF_Predicted_zscore',
 'Anatomy_GeneRIF',
 'Anatomy_GeneRIF_Predicted_zscore',
 'Coexpression_Predicted_GO_Biological_Process_2018',
 'Coexpression_Predicted_GO_Cellular_Component_2018',
 'Coexpression_Predicted_GO_Molecular_Function_2018',
 'GO_Biological_Process_2018',
 'GO_Biological_Process_AutoRIF',
 'GO_Biological_Process_AutoRIF_Predicted_zscore',
 'GO_Biological_Process_GeneRIF',
 'GO_Biological_Process_GeneRIF_Predicted_zscore',
 'GO_Cellular_Component_2018',
 'GO_Cellular_Component_AutoRIF',
 'GO_Cellular_Component_AutoRIF_Predicted_zscore',
 'GO_Cellular_Component_GeneRIF',
 'GO_Cellular_Component_GeneRIF_Predicted_zscore',
 'GO_Molecular_Function_2018',
 'GO_Molecular_Function_AutoRIF',
 'GO_Molecular_Function_AutoRIF_Predicted_zscore',
 'GO_Molecular_Function_GeneRIF',
 'GO_Molecular_Function_GeneRIF_Predicted_zscore',
 'Human_Disease_from_FlyBase_2017',
 

In [7]:
acceleration_genes = [
    "Gmap", "otp","CG32425","DNaseII","SpdS","Pdp1","CG45116","CG8036","CG2930","CG33099",
    "CG42565","cad","CG32267","GstE5","CG8468","CG44286","peb","Glo1","CG43051","mRpL27","clu",
]

curl_genes = [
    "Acbp5", "Abd-B", "fkh", "nyo", "Dph5", "CG6910", "CG7841", "CG7519", "tai", "Rab32", "Gart", "CG31463", "tai", "CG7519",
    "CG33993", "CG33099", "CG2930", "RhoGEF64C", "CG44286", "neo", "FoxL1", "CG1273", "Tep4", "bbg", "Doc3", "CG4069",
    "CG32425", "Npc2g", "Pdp1", "fok", "DNaseII", "CG6937", "CG8468", "clu", "CG11563", "Galt", "Surf6", "ppl", "CG12522", "mRpL23", "Chchd2", "ppan", "peb"
]

In [8]:
morpho_genes = acceleration_genes + curl_genes
morpho_genes= list(set(morpho_genes))
morpho_genes

['peb',
 'clu',
 'ppl',
 'Galt',
 'Chchd2',
 'DNaseII',
 'ppan',
 'CG42565',
 'Abd-B',
 'GstE5',
 'CG6910',
 'RhoGEF64C',
 'CG32267',
 'mRpL27',
 'Pdp1',
 'FoxL1',
 'CG11563',
 'CG8468',
 'Tep4',
 'fkh',
 'CG45116',
 'CG8036',
 'Doc3',
 'CG33099',
 'CG31463',
 'SpdS',
 'CG6937',
 'CG44286',
 'Npc2g',
 'CG4069',
 'neo',
 'Gart',
 'Dph5',
 'CG33993',
 'CG12522',
 'CG43051',
 'otp',
 'Gmap',
 'mRpL23',
 'CG32425',
 'CG2930',
 'CG7519',
 'CG7841',
 'tai',
 'Surf6',
 'cad',
 'CG1273',
 'nyo',
 'Acbp5',
 'Glo1',
 'bbg',
 'fok',
 'Rab32']

In [9]:
# GO-biologial process
go_folder = os.path.join(out_image_path, f"morpho_acceleration_GO_all")
Path(go_folder).mkdir(parents=True, exist_ok=True)

go_bp = gp.enrichr(
        gene_list=morpho_genes,
        gene_sets="GO_Biological_Process_2018",
        organism="fly",
        outdir=go_folder,
        no_plot=True,
        verbose=True
)
go_bp_results = pd.read_csv(os.path.join(go_folder, f"GO_Biological_Process_2018.fly.enrichr.reports.txt"), sep="\t")
gp.barplot(go_bp_results, column='Combined Score', title='GO_Biological_Process_2018', cutoff=0.05, top_term=50, figsize=(5, 20), color="black",
            ofname=os.path.join(go_folder, f"{sample_id}_GO_Biological_Process_2018_selected_barplot.pdf"))
gp.dotplot(go_bp_results, column='Combined Score', title='GO_Biological_Process_2018', cmap='Spectral', cutoff=0.05, top_term=50, figsize=(5, 20),
            ofname=os.path.join(go_folder, f"{sample_id}_GO_Biological_Process_2018_selected_dotplot.pdf"))

2023-09-19 23:04:48,100 [INFO] Run: GO_Biological_Process_2018 
2023-09-19 23:04:53,261 [INFO] Save enrichment results for GO_Biological_Process_2018 
2023-09-19 23:04:53,264 [INFO] Done.


In [10]:
go_folder = os.path.join(out_image_path, f"morpho_acceleration_GO_all")
Path(go_folder).mkdir(parents=True, exist_ok=True)

go_cc = gp.enrichr(
        gene_list=morpho_genes,
        gene_sets="GO_Cellular_Component_2018",
        organism="fly",
        outdir=go_folder,
        no_plot=True,
        verbose=True
)
go_cc_results = pd.read_csv(os.path.join(go_folder, f"GO_Cellular_Component_2018.fly.enrichr.reports.txt"), sep="\t")
gp.barplot(go_cc_results, column='Combined Score', title='GO_Cellular_Component_2018', cutoff=0.05, top_term=50, figsize=(5, 20), color="black",
            ofname=os.path.join(go_folder, f"{sample_id}_GO_Cellular_Component_2018_selected_barplot.pdf"))
gp.dotplot(go_cc_results, column='Combined Score', title='GO_Cellular_Component_2018', cmap='Spectral', cutoff=0.05, top_term=50, figsize=(5, 20),
            ofname=os.path.join(go_folder, f"{sample_id}_GO_Cellular_Component_2018_selected_dotplot.pdf"))

2023-09-19 23:04:55,782 [INFO] Run: GO_Cellular_Component_2018 
2023-09-19 23:05:01,692 [INFO] Save enrichment results for GO_Cellular_Component_2018 
2023-09-19 23:05:01,693 [INFO] Done.


In [11]:
go_folder = os.path.join(out_image_path, f"morpho_acceleration_GO_all")
Path(go_folder).mkdir(parents=True, exist_ok=True)

go_mf = gp.enrichr(
        gene_list=morpho_genes,
        gene_sets="GO_Molecular_Function_2018",
        organism="fly",
        outdir=go_folder,
        no_plot=True,
        verbose=True
)
go_mf_results = pd.read_csv(os.path.join(go_folder, f"GO_Molecular_Function_2018.fly.enrichr.reports.txt"), sep="\t")
gp.barplot(go_mf_results, column='Combined Score', title='GO_Molecular_Function_2018', cutoff=0.05, top_term=50, figsize=(5, 20), color="black",
            ofname=os.path.join(go_folder, f"{sample_id}_GO_Molecular_Function_2018_selected_barplot.pdf"))
gp.dotplot(go_mf_results, column='Combined Score', title='GO_Molecular_Function_2018', cmap='Spectral', cutoff=0.05, top_term=50, figsize=(5, 20),
            ofname=os.path.join(go_folder, f"{sample_id}_GO_Molecular_Function_2018_selected_dotplot.pdf"))

2023-09-19 23:05:05,999 [INFO] Run: GO_Molecular_Function_2018 
2023-09-19 23:05:11,002 [INFO] Save enrichment results for GO_Molecular_Function_2018 
2023-09-19 23:05:11,005 [INFO] Done.


## Selected GO terms

In [12]:
go_bp_results = pd.read_csv(os.path.join(go_folder, f"GO_Biological_Process_2018.fly.enrichr.reports.txt"), sep="\t")
go_bp_results.to_csv(os.path.join(go_folder, f"GO_Biological_Process_2018.fly.enrichr.reports_selected.txt"), sep="\t")

In [13]:
go_bp_results = pd.read_csv(os.path.join(go_folder, f"GO_Biological_Process_2018.fly.enrichr.reports_selected.txt"), sep="\t")
gp.barplot(go_bp_results, column='Combined Score', title='GO_Biological_Process_2018', cutoff=0.05, top_term=50, figsize=(5, 20), color="black",
            ofname=os.path.join(go_folder, f"{sample_id}_GO_Biological_Process_2018_barplot_final.pdf"))
gp.dotplot(go_bp_results, column='Combined Score', title='GO_Biological_Process_2018', cmap='viridis_r', cutoff=0.05, top_term=50, figsize=(5, 20),
            ofname=os.path.join(go_folder, f"{sample_id}_GO_Biological_Process_2018_dotplot_final.pdf"))

In [14]:
go_bp_results

Unnamed: 0.1,Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Z-score,Combined Score,Genes
0,7,GO_Biological_Process_2018,endoderm development (GO:0007492),1/7,0.018406,0.093523,0.027996,0.137288,-4.320488,17.26071,fkh
1,8,GO_Biological_Process_2018,ectodermal digestive tract development (GO:000...,1/6,0.015797,0.093523,0.024539,0.137288,-4.15405,17.23073,fkh
2,10,GO_Biological_Process_2018,amnioserosa maintenance (GO:0046665),1/7,0.018406,0.093523,0.027996,0.137288,-4.138053,16.53187,peb
3,11,GO_Biological_Process_2018,apical constriction (GO:0003383),2/25,0.001988,0.026091,0.00402,0.060471,-2.643654,16.4453,nyo;neo
4,15,GO_Biological_Process_2018,regulation of striated muscle tissue developme...,1/8,0.021008,0.093523,0.031442,0.137288,-3.793862,14.65512,Abd-B
5,16,GO_Biological_Process_2018,epithelial cell morphogenesis (GO:0003382),2/23,0.001682,0.025233,0.003451,0.060471,-2.282581,14.58037,nyo;neo
6,22,GO_Biological_Process_2018,regulation of cytoplasmic translational elonga...,1/7,0.018406,0.093523,0.027996,0.137288,-3.422631,13.6737,Dph5
7,24,GO_Biological_Process_2018,germ-band extension (GO:0007377),1/11,0.028774,0.094414,0.04171,0.137288,-3.838995,13.62185,cad
8,34,GO_Biological_Process_2018,regulation of embryonic cell shape (GO:0016476),2/32,0.003248,0.037893,0.006327,0.073816,-2.013621,11.53751,nyo;neo
9,36,GO_Biological_Process_2018,mesenchyme development (GO:0060485),1/9,0.023603,0.093523,0.034877,0.137288,-3.034415,11.36802,Abd-B


In [15]:
selected_genes = []
for i in go_bp_results["Genes"]:
    selected_genes.extend([j for j in str(i).split(";")])
selected_genes = list(set(selected_genes))
print(selected_genes)

['mRpL23', 'peb', 'clu', 'tai', 'mRpL27', 'Npc2g', 'cad', 'neo', 'Abd-B', 'nyo', 'Dph5', 'fkh', 'RhoGEF64C', 'bbg', 'Rab32']


In [16]:
for gn in selected_genes:
    _gn = str(gn).replace(":", "_") if ":" in gn else gn
    
    out_image_genes = os.path.join(out_image_path, "morpho_GO_genes")
    Path(out_image_genes).mkdir(parents=True, exist_ok=True)
    
    pc_index=germ_pc.point_data["obs_index"].tolist()
    exp = germ_adata[pc_index, gn].X.A.flatten()
    exp[exp <= 0] = 0
    
    amap = exp.copy()
    amap = amap / np.max(amap)
    amap[amap <= 0.2] = 0.2
    add_model_labels(model=germ_pc, labels=exp, key_added=gn, alphamap=amap, where="point_data",inplace=True, colormap="afmhot_r")
    
    st.pl.three_d_plot(
        model=germ_pc,
        key=gn,
        model_style="points",
        model_size=14,
        opacity=1,
        colormap=None,
        show_legend=True,
        jupyter=False,
        off_screen=True,
        cpo=cpo,
        window_size=(2560, 2048),
        text=gn,
        filename=os.path.join(out_image_genes, f"{sample_id}_germband_GO_model_{_gn}.pdf")
    )