## Benchmark BayesTME in fine-grained spatial dependent simulations

In [1]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import torch

In [2]:
import matplotlib.pyplot as plt
import matplotlib.font_manager
from matplotlib import rcParams

import seaborn as sns
sns.set_style('white')

font_list = []
fpaths = matplotlib.font_manager.findSystemFonts()
for i in fpaths:
    try:
        f = matplotlib.font_manager.get_font(i)
        font_list.append(f.family_name)
    except RuntimeError:
        pass

font_list = set(font_list)
plot_font = 'Helvetica' if 'Helvetica' in font_list else 'FreeSans'

rcParams['font.family'] = plot_font
rcParams.update({'font.size': 10})
rcParams.update({'figure.dpi': 300})
rcParams.update({'figure.figsize': (3,3)})
rcParams.update({'savefig.dpi': 500})

import warnings
warnings.filterwarnings('ignore')

In [3]:
from bayestme import data, deconvolution, gene_filtering

### (1). Major cell type simulation (`simu_5`)

#### Load data

In [4]:
# Load ground-truth proportions
data_path = '../data/simu_5/'
prop_df = pd.read_csv(os.path.join(data_path, 'proportions.st_synth.csv'), index_col=0)
mem_df = pd.read_csv(os.path.join(data_path, 'members.st_synth.csv'), index_col=0)
prop_df.head()

Unnamed: 0,CAFs,Cancer_Epithelial,Myeloid,Normal_Epithelial,T-cells
location_0,0.333333,0.0,0.0,0.0,0.666667
location_1,0.5,0.0,0.0,0.0,0.5
location_2,0.545455,0.0,0.181818,0.0,0.272727
location_3,0.625,0.0,0.375,0.0,0.0
location_4,0.444444,0.0,0.555556,0.0,0.0


In [5]:
# Load simulated ST data
adata_st = sc.AnnData(pd.read_csv(os.path.join(data_path, 'counts.st_synth.csv'), index_col=0))

# Add spatial locations
xx, yy = np.meshgrid(np.arange(50), np.arange(50), indexing='ij')
xx, yy = xx.flatten(), yy.flatten()
adata_st.obsm["location"] = np.column_stack((yy, xx))

adata_st

AnnData object with n_obs × n_vars = 2500 × 29631
    obsm: 'location'

#### Run BayesTME

In [7]:
# Filtering genes
stddev_filtered = gene_filtering.select_top_genes_by_standard_deviation(
    stdata, n_gene=1000)

spot_threshold_filtered = gene_filtering.filter_genes_by_spot_threshold(
    stddev_filtered, spot_threshold=0.95)

stdata_filtered = gene_filtering.filter_ribosome_genes(spot_threshold_filtered)

In [8]:
print('{}/{} genes selected'.format(len(stdata_filtered.gene_names), len(stdata.gene_names)))

788/29631 genes selected


Number of cell types known apriori (`k=5`), set smoothing parameter $\lambda=1000$

In [9]:
deconvolution_result = deconvolution.deconvolve(
    reads=stdata_filtered.reads,
    edges=stdata_filtered.edges,
    n_gene=1000,
    n_components=5,
    lam2=1000,
    n_samples=100,
    n_burnin=100,
    n_thin=1,
    bkg=False,
    lda=False)

Deconvolution:   0%|                                                                                                          | 0/200 [3:36:28<?, ?it/s]

KeyboardInterrupt



### (2). Fine-grained cell-type simulation (`simu_10`)