In [None]:
import os

import anndata
import networkx as nx
import numpy as np
import pandas as pd
import scanpy as sc
from matplotlib import rcParams

import scglue

In [None]:
scglue.plot.set_publication_params()
rcParams["figure.figsize"] = (4, 4)

PATH = "s02_glue"
os.makedirs(PATH, exist_ok=True)

In [None]:
PRIOR = os.environ.get("PRIOR", "d")
SEED = int(os.environ.get("SEED", "0"))

# Read data

In [None]:
rna = anndata.read_h5ad("s01_preprocessing/rna.h5ad")
atac = anndata.read_h5ad("s01_preprocessing/atac.h5ad")

In [None]:
prior = nx.read_graphml(f"s01_preprocessing/{PRIOR}_prior.graphml.gz")

# Assign highly variable genes

In [None]:
rna.var["highly_variable"] = rna.var[f"{PRIOR}_highly_variable"]
atac.var["highly_variable"] = atac.var[f"{PRIOR}_highly_variable"]
rna.var["highly_variable"].sum(), atac.var["highly_variable"].sum()

# Train model

In [None]:
scglue.models.configure_dataset(rna, "NB", use_highly_variable=True, use_rep="X_pca")
scglue.models.configure_dataset(atac, "NB", use_highly_variable=True, use_rep="X_lsi")

In [None]:
glue = scglue.models.SCGLUEModel(
    {"rna": rna, "atac": atac}, sorted(prior.nodes),
    random_seed=SEED
)

In [None]:
glue.compile()
glue.fit(
    {"rna": rna, "atac": atac},
    prior, edge_weight="weight", edge_sign="sign",
    directory=f"{PATH}/{PRIOR}_glue/seed_{SEED}"
)
glue.save(f"{PATH}/{PRIOR}_glue/seed_{SEED}/final.dill")

# Embeddings

## Cell embeddings

In [None]:
rna.obsm["X_glue"] = glue.encode_data("rna", rna)
atac.obsm["X_glue"] = glue.encode_data("atac", atac)

In [None]:
combined = anndata.AnnData(
    obs=pd.concat([rna.obs, atac.obs], join="inner"),
    obsm={"X_glue": np.concatenate([rna.obsm["X_glue"], atac.obsm["X_glue"]])}
)

In [None]:
sc.pp.neighbors(combined, n_pcs=50, use_rep="X_glue", metric="cosine")
sc.tl.umap(combined)

In [None]:
fig = sc.pl.umap(combined, color="cell_type", title="Cell type", return_fig=True)
fig.savefig(f"{PATH}/prior:{PRIOR}/seed:{SEED}/combined_glue_ct.pdf")

In [None]:
fig = sc.pl.umap(combined, color="domain", title="Omics layer", return_fig=True)
fig.savefig(f"{PATH}/prior:{PRIOR}/seed:{SEED}/combined_glue_domain.pdf")

In [None]:
rna.write(f"{PATH}/prior:{PRIOR}/seed:{SEED}/rna_glue.h5ad", compression="gzip")
atac.write(f"{PATH}/prior:{PRIOR}/seed:{SEED}/atac_glue.h5ad", compression="gzip")
combined.write(f"{PATH}/prior:{PRIOR}/seed:{SEED}/combined_glue.h5ad", compression="gzip")

## Feature embeddings

In [None]:
feature_embeddings = pd.DataFrame(
    glue.encode_graph(prior, "weight", "sign"),
    index=glue.vertices
)
feature_embeddings.iloc[:5, :5]

In [None]:
feature_embeddings.to_csv(f"{PATH}/prior:{PRIOR}/seed:{SEED}/feature_embeddings.csv", index=True, header=False)