In [None]:
import os

import anndata
import networkx as nx
from networkx.algorithms.bipartite import biadjacency_matrix

In [None]:
PATH = "t01_preprocessing"
os.makedirs(PATH, exist_ok=True)

# Read data

In [None]:
rna = anndata.read_h5ad("../../data/dataset/Cao-2020.h5ad")
atac = anndata.read_h5ad("../../data/dataset/Domcke-2020.h5ad")

In [None]:
rna_pp = anndata.read_h5ad("s01_preprocessing/rna.h5ad", backed="r")
atac_pp = anndata.read_h5ad("s01_preprocessing/atac.h5ad", backed="r")

In [None]:
graph = nx.read_graphml("s01_preprocessing/full.graphml.gz")

# Update meta

In [None]:
rna.var["highly_variable"] = [item in rna_pp.var_names for item in rna.var_names]
atac.var["highly_variable"] = [item in atac_pp.var_names for item in atac.var_names]

# Subsample

In [None]:
rna = rna[rna_pp.obs["mask"], :]
atac = atac[atac_pp.obs["mask"], :]

# Convert data

In [None]:
atac2rna = anndata.AnnData(
    X=atac.X @ biadjacency_matrix(graph, atac.var_names, rna.var_names),
    obs=atac.obs, var=rna.var
)

# Save data

In [None]:
rna.write(f"{PATH}/rna.h5ad", compression="gzip")
atac.write(f"{PATH}/atac.h5ad", compression="gzip")
atac2rna.write(f"{PATH}/atac2rna.h5ad", compression="gzip")