In [None]:
import os

import anndata
import networkx as nx
from networkx.algorithms.bipartite import biadjacency_matrix

import scglue

In [None]:
PATH = "e01_preprocessing"
os.makedirs(PATH, exist_ok=True)

# Read data

In [None]:
rna = anndata.read_h5ad("../../data/dataset/Cao-2020.h5ad")
atac = anndata.read_h5ad("../../data/dataset/Domcke-2020.h5ad")

In [None]:
rna_pp = anndata.read_h5ad("s01_preprocessing/rna.h5ad", backed="r")
atac_pp = anndata.read_h5ad("s01_preprocessing/atac.h5ad", backed="r")

In [None]:
graph = nx.read_graphml("s01_preprocessing/full.graphml.gz")

# Update meta

In [None]:
rna.var["highly_variable"] = [item in rna_pp.var_names for item in rna.var_names]
atac.var["highly_variable"] = [item in atac_pp.var_names for item in atac.var_names]

In [None]:
rna.obs["pseudocell"] = rna_pp.obs["pseudocell"]
atac.obs["pseudocell"] = atac_pp.obs["pseudocell"]
rna.obs["organ_balancing"] = rna_pp.obs["organ_balancing"]
atac.obs["organ_balancing"] = atac_pp.obs["organ_balancing"]
rna.obs["n_cells"] = 1
atac.obs["n_cells"] = 1

In [None]:
rna.obsm["X_pca"] = rna_pp.obsm["X_pca"]
rna.obsm["X_umap"] = rna_pp.obsm["X_umap"]
atac.obsm["X_lsi"] = atac_pp.obsm["X_lsi"]
atac.obsm["X_umap"] = atac_pp.obsm["X_umap"]

# Aggregation

In [None]:
rna_agg = scglue.data.aggregate_obs(
    rna, by="pseudocell", X_agg="sum",
    obs_agg={
        "cell_type": "majority", "Organ": "majority", "domain": "majority",
        "n_cells": "sum", "organ_balancing": "sum"
    },
    obsm_agg={"X_pca": "mean", "X_umap": "mean"}
)
rna_agg

In [None]:
atac_agg = scglue.data.aggregate_obs(
    atac, by="pseudocell", X_agg="sum",
    obs_agg={
        "cell_type": "majority", "tissue": "majority", "domain": "majority",
        "n_cells": "sum", "organ_balancing": "sum"
    },
    obsm_agg={"X_lsi": "mean", "X_umap": "mean"}
)
atac_agg

# Convert data

In [None]:
atac2rna_agg = anndata.AnnData(
    X=atac_agg.X @ biadjacency_matrix(graph, atac_agg.var_names, rna_agg.var_names),
    obs=atac_agg.obs, var=rna_agg.var
)

# Save data

In [None]:
rna_agg.write(f"{PATH}/rna_agg.h5ad", compression="gzip")
atac_agg.write(f"{PATH}/atac_agg.h5ad", compression="gzip")
atac2rna_agg.write(f"{PATH}/atac2rna_agg.h5ad", compression="gzip")