# LMD Demo
Preprocesses (PCA + neighbors) and computes LMD

In [1]:
import scanpy as sc
import rapids_singlecell as rsc
import numpy as np
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
import cupyx as cpx
from pylmd.lmd import lmd
import time

In [2]:
adata_cpu = sc.read_10x_h5("/mnt/d/scRNA/wtwoundfiltered_feature_bc_matrix.h5")
adata_cpu.var_names_make_unique()

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


In [3]:
adata_gpu = sc.read_10x_h5("/mnt/d/scRNA/wtwoundfiltered_feature_bc_matrix.h5")
adata_gpu.var_names_make_unique()
adata_gpu.X = cpx.scipy.sparse.csr_matrix(adata_gpu.X)

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


In [4]:
adata_cpu

AnnData object with n_obs × n_vars = 4305 × 31053
    var: 'gene_ids', 'feature_types', 'genome'

In [5]:
adata_gpu

AnnData object with n_obs × n_vars = 4305 × 31053
    var: 'gene_ids', 'feature_types', 'genome'

In [6]:
start = time.perf_counter()

sc.pp.filter_cells(adata_cpu, min_genes=100)
sc.pp.filter_genes(adata_cpu, min_cells=3)
sc.pp.normalize_total(adata_cpu)
sc.pp.log1p(adata_cpu)

sc.pp.highly_variable_genes(adata_cpu, n_top_genes=2000)
sc.pp.scale(adata_cpu,max_value=10)

sc.tl.pca(adata_cpu)
sc.pp.neighbors(adata_cpu)
sc.tl.umap(adata_cpu)

end = time.perf_counter()
print(f"Elapsed time: {end - start} seconds")

Elapsed time: 15.524061693999101 seconds


In [7]:
start = time.perf_counter()

rsc.pp.normalize_total(adata_gpu)
rsc.pp.log1p(adata_gpu)

rsc.pp.highly_variable_genes(adata_gpu, n_top_genes=2000)
rsc.pp.scale(adata_gpu,max_value=10)

rsc.tl.pca(adata_gpu)
rsc.pp.neighbors(adata_gpu)
rsc.tl.umap(adata_gpu)

end = time.perf_counter()
print(f"Elapsed time: {end - start} seconds")

Elapsed time: 3.465422897999815 seconds


In [8]:
start = time.perf_counter()

# Compute LMD
res_cpu = lmd(adata_cpu, n_neighbors=15, use_rep='X_pca', max_time_pow=6, min_cells=3, correction=False, device='cpu')

end = time.perf_counter()
print(f"Elapsed time: {end - start} seconds")

Elapsed time: 15.559674375999748 seconds


In [9]:
type(adata_gpu)

anndata._core.anndata.AnnData

In [10]:
start = time.perf_counter()

# Compute LMD
res_gpu = lmd(adata_gpu, n_neighbors=15, use_rep='X_pca', max_time_pow=6, min_cells=3, correction=False, device='gpu')

end = time.perf_counter()
print(f"Elapsed time: {end - start} seconds")

TypeError: Argument 'array' has incorrect type (expected cupy._core.core._ndarray_base, got numpy.ndarray)

In [None]:
# Show top genes
genes_cpu = res_cpu['genes'] if res_cpu['genes'] is not None else [f'gene_{i}' for i in range(n_genes)]
order_cpu = res_cpu['rank']
lmds_cpu = res_cpu['lmds']

In [None]:
plt.plot([genes_cpu[i] for i in order_cpu[::100]], [lmds_cpu[i] for i in order_cpu[::100]])

In [None]:
res_cpu['knee_index']

In [None]:
res_cpu['score_profile']

In [None]:
res_cpu['score_profile']

In [None]:
scores_over_time = []

for i in res_cpu['score_profile']:
    if i.startswith('score'):
        scores_over_time.append(res_cpu['score_profile'][i][2000])
        
plt.plot(scores_over_time)

In [None]:
scores_over_time = []

for i in res_cpu['score_profile']:
    if i.startswith('score'):
        scores_over_time.append(res_cpu['score_profile'][i][1])
        
plt.plot(scores_over_time)

In [None]:
# Show top genes
genes_gpu = res_gpu['genes'] if res_gpu['genes'] is not None else [f'gene_{i}' for i in range(n_genes)]
order_gpu = res_gpu['rank']
lmds_gpu = res_gpu['lmds']

In [None]:
top_gpu = [(genes_gpu[i], float(lmds_gpu[i])) for i in order_gpu[:10]]
top_gpu

In [None]:
genes_cpu[:10]

In [None]:
top_cpu = [(genes_cpu[i], float(lmds_cpu[i])) for i in order_cpu[:10]]
top_cpu

In [None]:
bot_gpu = [(genes_gpu[i], float(lmds_gpu[i])) for i in order_gpu[(len(order_gpu) - 10):]]
bot_gpu

In [None]:
bot_cpu = [(genes_cpu[i], float(lmds_cpu[i])) for i in order_cpu[(len(order_cpu) - 10):]]
bot_cpu

In [None]:
top_genes_gpu = [(genes_gpu[i]) for i in order_gpu[:20]]
top_genes_gpu

In [None]:
top_genes_cpu = [(genes_cpu[i]) for i in order_cpu[:20]]
top_genes_cpu

In [None]:
bot_genes_gpu = [(genes_gpu[i]) for i in order_gpu[(len(order_gpu) - 10):]]
bot_genes_gpu

In [None]:
bot_genes_cpu = [(genes_cpu[i]) for i in order_cpu[(len(order_cpu) - 10):]]
bot_genes_cpu

In [None]:
sc.pl.umap(adata_gpu, color = top_genes_gpu)

In [None]:
sc.pl.umap(adata_cpu, color = top_genes_cpu)

In [None]:
sc.pl.umap(adata_gpu, color = bot_genes_gpu)

In [None]:
sc.pl.umap(adata_cpu, color = bot_genes_cpu)

In [None]:
check_genes_cpu = [(genes_cpu[i]) for i in order_cpu[::1000]]
check_genes_cpu
sc.pl.umap(adata_cpu, color = check_genes_cpu)