kernal: pertpy

In [None]:
!which R

In [None]:
!Rscript -e 'library(edgeR)'

In [None]:
import rpy2.robjects as robjects
print(robjects.r('R.home()'))

# Set up

In [None]:
import os, sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.backends.backend_pdf as mpdf
from matplotlib.pyplot import rc_context

import anndata as ad
import scanpy as sc
import muon as mu
import pertpy as pt
milo = pt.tl.Milo()

In [None]:
pt.__version__

In [None]:
import warnings
from numba.core.errors import NumbaDeprecationWarning
warnings.filterwarnings(action='once')
warnings.simplefilter(action='once')
warnings.simplefilter(action="ignore", category=NumbaDeprecationWarning)
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=PendingDeprecationWarning)

In [None]:
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(8, 7), facecolor="white")
sc.logging.print_versions()

# Load data

mdata for pertpy

In [None]:
work_dir = '/work/DevM_analysis/02.abundance/Milo_FL_PCW250401'
dataset = "FL_wnn"
new_anno = "anno_wnn_v51"

In [None]:
adata = sc.read_h5ad('data/adata_4_milo.h5ad')
mdata = milo.load(adata, feature_key='rna')
mdata

# Prepare

Random cells

In [None]:
np.random.seed(0)
random_indices = np.random.permutation(list(range(mdata.shape[0])))

PCW as continuous

In [None]:
mdata['rna'].obs['PCW'] = mdata['rna'].obs['PCW'].astype("category").cat.codes
mdata['rna'].obs['PCW']

# Define neighbourhoods

In [None]:
p = 0.05
milo.make_nhoods(mdata["rna"], prop=p)

In [None]:
nhood_size = np.array(mdata["rna"].obsm["nhoods"].sum(0)).ravel()
plt.hist(nhood_size, bins=100)
plt.xlabel("# cells in nhood")
plt.ylabel("# nhoods");

In [None]:
np.median(nhood_size)

# Count cells in neighbourhoods

In [None]:
mdata = milo.count_nhoods(mdata, sample_col="donorID")
mdata

In [None]:
mdata["milo"]

In [None]:
mean_n_cells = mdata["milo"].X.toarray().mean(0)
plt.plot(nhood_size, mean_n_cells, ".")
plt.xlabel("# cells in nhood")
plt.ylabel("Mean # cells per donor in nhood")

# Run DA test

In [None]:
milo.da_nhoods(mdata, design="~ Sex + PCW")
mdata["milo"].obs

In [None]:
def plot_milo_diagnostics(mdata=None, alpha = 0.1):
    with matplotlib.rc_context({"figure.figsize": [12, 12]}):
        ## Check P-value histogram
        plt.subplot(2, 2, 1)
        plt.hist(mdata["milo"].var["PValue"], bins=20)
        plt.xlabel("Uncorrected P-value")

        ## Visualize extent of multiple-testing correction
        plt.subplot(2, 2, 2)
        plt.scatter(
            mdata["milo"].var["PValue"],
            mdata["milo"].var["SpatialFDR"],
            s=3,
        )
        plt.xlabel("Uncorrected P-value")
        plt.ylabel("SpatialFDR")

        ## Visualize volcano plot
        plt.subplot(2, 2, 3)
        plt.scatter(
            mdata["milo"].var["logFC"],
            -np.log10(mdata["milo"].var["SpatialFDR"]),
            s=3,
        )
        plt.axhline(
            y=-np.log10(alpha),
            color="red",
            linewidth=1,
            label=f"{int(alpha*100)} % SpatialFDR",
        )
        plt.legend()
        plt.xlabel("log-Fold Change")
        plt.ylabel("- log10(SpatialFDR)")
        plt.tight_layout()

        ## Visualize MA plot
        df = mdata["milo"].var
        emp_null = df[df["SpatialFDR"] >= alpha]["logFC"].mean()
        df["Sig"] = df["SpatialFDR"] < alpha

        plt.subplot(2, 2, 4)
        sns.scatterplot(data=df, x="logCPM", y="logFC", hue="Sig")
        plt.axhline(y=0, color="grey", linewidth=1)
        plt.axhline(y=emp_null, color="purple", linewidth=1)
        plt.legend(title=f"< {int(alpha*100)} % SpatialFDR")
        plt.xlabel("Mean log-counts")
        plt.ylabel("log-Fold Change")
        plt.show()

plot_milo_diagnostics(mdata)

In [None]:
milo.build_nhood_graph(mdata)
with rc_context({"figure.figsize": [10, 9]}):
    milo.plot_nhood_graph(mdata, alpha=0.1, min_size=1, plot_edges=False, return_fig=True)
    plt.savefig(f"plots/{dataset}_milo_nhood_graph.pdf", bbox_inches="tight")

# Save

Save before annotating nhoods

In [None]:
mdata

In [None]:
mdata.write(f"data/FL_wnn_milo.h5mu")

# Visualize result by celltype

## All

In [None]:
milo.annotate_nhoods(mdata, anno_col=new_anno)

In [None]:
plt.hist(mdata["milo"].var["nhood_annotation_frac"], bins=30)
plt.xlabel("celltype fraction")

In [None]:
# Define as mixed if fraction of cells in nhood with same label is lower than 0.7
mdata["milo"].var['nhood_annotation'] = mdata["milo"].var['nhood_annotation'].astype('str')
mdata["milo"].var.loc[mdata["milo"].var["nhood_annotation_frac"] < 0.7, "nhood_annotation"] = "Mixed"

In [None]:
mdata["milo"].var['nhood_annotation'].value_counts()

In [None]:
with rc_context({"figure.figsize": [5, 12]}):
    milo.plot_da_beeswarm(mdata, alpha=0.1, return_fig=True)
    plt.savefig(f"{work_dir}/plots/{dataset}_milo_da_beeswarm.pdf", bbox_inches="tight")

# Check top

In [None]:
mdata["milo"].var

In [None]:
df = mdata["milo"].var.copy()
df

In [None]:
df.shape

In [None]:
df[(df['SpatialFDR'] < 0.1)].shape

In [None]:
df[(df['SpatialFDR'] < 0.01)].shape

## HSC

In [None]:
df[(df['nhood_annotation'] == "HSC")].shape

In [None]:
df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == "HSC")].shape

In [None]:
df[(df['SpatialFDR'] < 0.01) & (df['nhood_annotation'] == "HSC")].shape

Up, random

In [None]:
df_up = df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == "HSC") & (df['logFC'] > 0)].sample(50, axis=0, random_state=1)
pl_nhoods = df_up.index
milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=pl_nhoods, log_counts=False)

In [None]:
ncol = 5
nrow = int(len(pl_nhoods)/ncol)
with matplotlib.rc_context({"figure.figsize": [20, 3*nrow]}):
    for i, nh in enumerate(pl_nhoods):
        plt.subplot(nrow, ncol, i + 1)
        milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=nh, log_counts=False, return_fig=True)
        plt.title(f"Nhood {nh}")
    plt.tight_layout()

Top dn

In [None]:
df_dn = df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == "HSC") & (df['logFC'] < 0)].sample(50, axis=0, random_state=1)
pl_nhoods = df_dn.index
milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=pl_nhoods, log_counts=False)

In [None]:
ncol = 5
nrow = int(len(pl_nhoods)/ncol)
with matplotlib.rc_context({"figure.figsize": [20, 3*nrow]}):
    for i, nh in enumerate(pl_nhoods):
        plt.subplot(nrow, ncol, i + 1)
        milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=nh, log_counts=False, return_fig=True)
        plt.title(f"Nhood {nh}")
    plt.tight_layout()

## Kupffer

In [None]:
celltype = "Kupffer"

In [None]:
df[(df['nhood_annotation'] == celltype)].shape

In [None]:
df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == celltype)].shape

In [None]:
df[(df['SpatialFDR'] < 0.01) & (df['nhood_annotation'] == celltype)].shape

Up, random

In [None]:
df_up = df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == celltype) & (df['logFC'] > 0)]
pl_nhoods = df_up.index
milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=pl_nhoods, log_counts=False, return_fig=True)

In [None]:
ncol = 5
nrow = 1
with matplotlib.rc_context({"figure.figsize": [20, 3*nrow]}):
    for i, nh in enumerate(pl_nhoods):
        plt.subplot(nrow, ncol, i + 1)
        milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=nh, log_counts=False, return_fig=True)
        plt.title(f"Nhood {nh}")
    plt.tight_layout()

Top dn

In [None]:
df_dn = df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == celltype) & (df['logFC'] < 0)]
pl_nhoods = df_dn.index
milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=pl_nhoods, log_counts=False, return_fig=True)

In [None]:
len(pl_nhoods)

In [None]:
ncol = 4
nrow = 5
with matplotlib.rc_context({"figure.figsize": [20, 3*nrow]}):
    for i, nh in enumerate(pl_nhoods):
        plt.subplot(nrow, ncol, i + 1)
        milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=nh, log_counts=False, return_fig=True)
        plt.title(f"Nhood {nh}")
    plt.tight_layout()

## Monocyte

In [None]:
celltype="Monocyte"

In [None]:
df[(df['nhood_annotation'] == celltype)].shape

In [None]:
df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == celltype)].shape

In [None]:
df[(df['SpatialFDR'] < 0.01) & (df['nhood_annotation'] == celltype)].shape

Up, random

In [None]:
df_up = df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == celltype) & (df['logFC'] > 0)].sample(30, axis=0, random_state=1)
pl_nhoods = df_up.index
milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=pl_nhoods, log_counts=False, return_fig=True)

In [None]:
ncol = 5
nrow = int(len(pl_nhoods)/ncol)
with matplotlib.rc_context({"figure.figsize": [20, 3*nrow]}):
    for i, nh in enumerate(pl_nhoods):
        plt.subplot(nrow, ncol, i + 1)
        milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=nh, log_counts=False, return_fig=True)
        plt.title(f"Nhood {nh}")
    plt.tight_layout()

Top dn

In [None]:
df_dn = df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == celltype) & (df['logFC'] < 0)].sample(30, axis=0, random_state=1)
pl_nhoods = df_dn.index
milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=pl_nhoods, log_counts=False, return_fig=True)

In [None]:
ncol = 5
nrow = int(len(pl_nhoods)/ncol)
with matplotlib.rc_context({"figure.figsize": [20, 3*nrow]}):
    for i, nh in enumerate(pl_nhoods):
        plt.subplot(nrow, ncol, i + 1)
        milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=nh, log_counts=False, return_fig=True)
        plt.title(f"Nhood {nh}")
    plt.tight_layout()

## IM-B

In [None]:
celltype="IM-B"

In [None]:
df[(df['nhood_annotation'] == celltype)].shape

In [None]:
df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == celltype)].shape

In [None]:
df[(df['SpatialFDR'] < 0.01) & (df['nhood_annotation'] == celltype)].shape

Up, random

In [None]:
df_up = df[(df['SpatialFDR'] < 0.1) & (df['nhood_annotation'] == celltype) & (df['logFC'] > 0)].sample(30, axis=0, random_state=1)
pl_nhoods = df_up.index
milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=pl_nhoods, log_counts=False, return_fig=True)

In [None]:
ncol = 5
nrow = int(len(pl_nhoods)/ncol)
with matplotlib.rc_context({"figure.figsize": [20, 3*nrow]}):
    for i, nh in enumerate(pl_nhoods):
        plt.subplot(nrow, ncol, i + 1)
        milo.plot_nhood_counts_by_cond(mdata, test_var="PCW", subset_nhoods=nh, log_counts=False, return_fig=True)
        plt.title(f"Nhood {nh}")
    plt.tight_layout()

# Library effects?

Export the count matrix by donor/sample to investigate in R

In [None]:
mdata

Counts by donor

In [None]:
count_byDonor = pd.DataFrame(mdata['milo'].X.toarray())
count_byDonor.index = mdata['milo'].obs_names
count_byDonor = count_byDonor.T
count_byDonor.to_csv(f"data/FL_wnn_milo_countByDonor.csv", index=False)

Counts by sample

In [None]:
mdata2 = milo.count_nhoods(mdata, sample_col="sampleID")
count_bySample = pd.DataFrame(mdata2['milo'].X.toarray())
count_bySample.index = mdata2['milo'].obs_names
count_bySample = count_bySample.T
count_bySample.to_csv(f"data/FL_wnn_milo_countBySample.csv", index=False)