In [None]:
import os

import anndata
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import umap
from matplotlib import patches, rcParams

import scglue

In [None]:
scglue.plot.set_publication_params()
rcParams["figure.figsize"] = (7, 7)

PATH = "e03_visualize"
os.makedirs(PATH, exist_ok=True)

# Read aggregated data

In [None]:
rna = anndata.read_h5ad("e01_preprocessing/rna_agg.h5ad", backed="r")
atac = anndata.read_h5ad("e01_preprocessing/atac_agg.h5ad", backed="r")

# Seurat v3

## Read latent

In [None]:
rna.obsm["X_latent"] = pd.read_csv(
    "e02_cca_anchor/rna_latent.csv", header=None, index_col=0
).loc[rna.obs_names].to_numpy()
atac.obsm["X_latent"] = pd.read_csv(
    "e02_cca_anchor/atac_latent.csv", header=None, index_col=0
).loc[atac.obs_names].to_numpy()

In [None]:
combined = anndata.AnnData(
    obs=pd.concat([rna.obs, atac.obs], join="inner"),
    obsm={"X_latent": np.concatenate([rna.obsm["X_latent"], atac.obsm["X_latent"]])}
)

## Plotting

In [None]:
sc.pp.neighbors(
    combined, use_rep="X_latent",
    n_pcs=combined.obsm["X_latent"].shape[1],
    metric="cosine"
)
sc.tl.umap(combined)

In [None]:
fig = sc.pl.umap(
    combined, color="cell_type", 
    title="Cell type", return_fig=True
)
ct_handles, ct_labels = fig.axes[0].get_legend_handles_labels()
fig.axes[0].get_legend().remove()
fig.savefig(f"{PATH}/cca_combined_agg_ct.pdf")

In [None]:
fig = sc.pl.umap(
    combined, color="domain",
    title="Omics layer", return_fig=True
)
domain_handles, domain_labels = fig.axes[0].get_legend_handles_labels()
fig.axes[0].get_legend().remove()
fig.savefig(f"{PATH}/cca_combined_agg_domain.pdf")

In [None]:
fig, ax = plt.subplots()
ax.set_visible(False)
placeholder = patches.Rectangle((0, 0), 1, 1, visible=False)
handles = [placeholder, *domain_handles, placeholder, placeholder, *ct_handles]
labels = ["Omics layer", *domain_labels, "", "Cell type", *ct_labels]
fig.legend(handles, labels, ncol=5, frameon=False)
fig.savefig(f"{PATH}/cca_combined_agg_legend.pdf")

In [None]:
combined_umap = pd.DataFrame(combined.obsm["X_umap"], index=combined.obs_names)
combined_umap.to_csv(f"{PATH}/cca_combined_agg_umap.csv", header=False, index=True)

# iNMF

In [None]:
rna.obsm["X_latent"] = pd.read_csv(
    "e02_inmf.agg/rna_latent.csv", header=None, index_col=0
).loc[rna.obs_names].to_numpy()
atac.obsm["X_latent"] = pd.read_csv(
    "e02_inmf.agg/atac_latent.csv", header=None, index_col=0
).loc[atac.obs_names].to_numpy()

In [None]:
combined = anndata.AnnData(
    obs=pd.concat([rna.obs, atac.obs], join="inner"),
    obsm={"X_latent": np.concatenate([rna.obsm["X_latent"], atac.obsm["X_latent"]])}
)

## Plotting

In [None]:
combined.obsm["X_latent"] += np.random.RandomState(0).randn(
    *combined.obsm["X_latent"].shape
) * 2e-5  # Add a slight amount of noise to avoid UMAP segfault

In [None]:
sc.pp.neighbors(
    combined, use_rep="X_latent",
    n_pcs=combined.obsm["X_latent"].shape[1],
    metric="cosine"
)
sc.tl.umap(combined)

In [None]:
fig = sc.pl.umap(
    combined, color="cell_type", 
    title="Cell type", return_fig=True
)
ct_handles, ct_labels = fig.axes[0].get_legend_handles_labels()
fig.axes[0].get_legend().remove()
fig.savefig(f"{PATH}/inmf_combined_agg_ct.pdf")

In [None]:
fig = sc.pl.umap(
    combined, color="domain",
    title="Omics layer", return_fig=True
)
domain_handles, domain_labels = fig.axes[0].get_legend_handles_labels()
fig.axes[0].get_legend().remove()
fig.savefig(f"{PATH}/inmf_combined_agg_domain.pdf")

In [None]:
fig, ax = plt.subplots()
ax.set_visible(False)
placeholder = patches.Rectangle((0, 0), 1, 1, visible=False)
handles = [placeholder, *domain_handles, placeholder, placeholder, *ct_handles]
labels = ["Omics layer", *domain_labels, "", "Cell type", *ct_labels]
fig.legend(handles, labels, ncol=5, frameon=False)
fig.savefig(f"{PATH}/inmf_combined_agg_legend.pdf")

In [None]:
combined_umap = pd.DataFrame(combined.obsm["X_umap"], index=combined.obs_names)
combined_umap.to_csv(f"{PATH}/inmf_combined_agg_umap.csv", header=False, index=True)

# Read full data

In [None]:
rna = anndata.read_h5ad("e01_preprocessing/rna.h5ad", backed="r")
atac = anndata.read_h5ad("e01_preprocessing/atac.h5ad", backed="r")

# iNMF

In [None]:
rna.obsm["X_latent"] = pd.read_csv(
    "e02_inmf/rna_latent.csv", header=None, index_col=0
).loc[rna.obs_names].to_numpy()
atac.obsm["X_latent"] = pd.read_csv(
    "e02_inmf/atac_latent.csv", header=None, index_col=0
).loc[atac.obs_names].to_numpy()

In [None]:
combined = anndata.AnnData(
    obs=pd.concat([rna.obs, atac.obs], join="inner"),
    obsm={"X_latent": np.concatenate([rna.obsm["X_latent"], atac.obsm["X_latent"]])}
)

## Plotting

In [None]:
combined.obsm["X_latent"] += np.random.RandomState(0).randn(
    *combined.obsm["X_latent"].shape
) * 2e-4  # Add a slight amount of noise to avoid UMAP segfault

In [None]:
sc.pp.neighbors(
    combined, use_rep="X_latent",
    n_pcs=combined.obsm["X_latent"].shape[1],
    metric="cosine"
)
sc.tl.umap(combined)

In [None]:
fig = sc.pl.umap(
    combined, color="cell_type", 
    title="Cell type", return_fig=True
)
ct_handles, ct_labels = fig.axes[0].get_legend_handles_labels()
fig.axes[0].get_legend().remove()
fig.savefig(f"{PATH}/inmf_combined_ct.pdf")

In [None]:
fig = sc.pl.umap(
    combined, color="domain",
    title="Omics layer", return_fig=True
)
domain_handles, domain_labels = fig.axes[0].get_legend_handles_labels()
fig.axes[0].get_legend().remove()
fig.savefig(f"{PATH}/inmf_combined_domain.pdf")

In [None]:
fig, ax = plt.subplots()
ax.set_visible(False)
placeholder = patches.Rectangle((0, 0), 1, 1, visible=False)
handles = [placeholder, *domain_handles, placeholder, placeholder, *ct_handles]
labels = ["Omics layer", *domain_labels, "", "Cell type", *ct_labels]
fig.legend(handles, labels, ncol=5, frameon=False)
fig.savefig(f"{PATH}/inmf_combined_legend.pdf")

In [None]:
combined_umap = pd.DataFrame(combined.obsm["X_umap"], index=combined.obs_names)
combined_umap.to_csv(f"{PATH}/inmf_combined_umap.csv", header=False, index=True)