In [1]:
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
from gprofiler import GProfiler
import rpy2.rinterface_lib.callbacks
import logging
from rpy2.robjects import pandas2ri
import anndata2ri
import doubletdetection as dd
from gseapy.plot import barplot, dotplot
import os, sys
import gseapy as gp
from anndata import AnnData

import scipy.stats
import diffxpy.api as de
import anndata
from batchglm.api.models.tf1.glm_nb import Simulator

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()


plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 0   # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=200, dpi_save=300, facecolor='white')
sc.logging.print_versions()
sc.logging.print_header()

# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
%load_ext rpy2.ipython

%matplotlib inline

plt.rcParams.update({'font.size': 22})

import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

-----
anndata     0.7.5
scanpy      1.6.1
sinfo       0.3.1
-----
Crypto                              3.10.1
OpenSSL                             20.0.1
PIL                                 8.1.0
absl                                NA
anndata                             0.7.5
anndata2ri                          1.0.6
appdirs                             1.4.4
asciitree                           NA
astunparse                          1.6.3
backcall                            0.2.0
batchglm                            v0.7.4
bcrypt                              3.2.0
beta_ufunc                          NA
binom_ufunc                         NA
bioservices                         1.7.11
boto3                               1.17.107
botocore                            1.20.107
brotli                              NA
bs4                                 4.10.0
cairo                               1.20.0
certifi                             2021.10.08
cffi                                1.14.4
chardet

In [2]:
import pickle

# Getting back the objects:
with open('adata_leiden_r1_rank_genes.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    adata = pickle.load(f)

In [4]:
result = adata.uns['rank_genes_r1']
groups = result['names'].dtype.names
dat = pd.DataFrame({group + '_' + key[:1]: result[key][group] for group in groups for key in ['names', 'logfoldchanges','scores','pvals']})
dat.to_csv("rank_genes_groups.csv")

In [8]:
!python ./SCSA/SCSA.py -d ./SCSA/whole.db -i rank_genes_groups.csv -s scanpy -E -f 1.5 -p 0.01 -o result -g mouse -m txt

Version V1.1 [2020/07/03]
DB load: 47347 3 3 48257 37440
Namespace(Gensymbol=True, MarkerDB=None, celltype='normal', cluster='all', db='./SCSA/whole.db', foldchange=1.5, input='rank_genes_groups.csv', list_tissue=False, noprint=False, norefdb=False, outfmt='txt', output='result', pvalue=0.01, source='scanpy', species='Mouse', target='cellmarker', tissue='All', weight=100.0)
Version V1.1 [2020/07/03]
DB load: 47347 3 3 48257 37440
load markers: 45409
############################## Cluster 0 ##############################

Traceback (most recent call last):
  File "./SCSA/SCSA.py", line 1284, in <module>
    p.run_cmd(args)
  File "./SCSA/SCSA.py", line 1250, in run_cmd
    outs = anno.run_detail_cmd()
  File "./SCSA/SCSA.py", line 1178, in run_detail_cmd
    outs = self.calcu_scanpy_group(self.args.input,self.args.Gensymbol)
  File "./SCSA/SCSA.py", line 551, in calcu_scanpy_group
    h_values,colnames = self.get_cell_matrix(newexps,ltitle,fid,gcol,ccol,abs_tag)
  File "./SCSA/SCSA.py",

In [10]:
SCSA_results = pd.read_csv('./result', sep = '\t')
SCSA_results

Unnamed: 0,Cell Type,Z-score,Cluster
0,Ciliated cell,8.010131,0
1,Basal cell,4.026794,0
2,Brush cell (Tuft cell),3.490123,0
3,Epithelial cell,2.530696,0
4,Neural stem cell,1.617789,0
...,...,...,...
4565,Cardiac progenitor cell,-0.298524,9
4566,Unrestricted somatic stem cell,-0.298524,9
4567,Mesenchymal progenitor cell,-0.298524,9
4568,Osteocyte,-0.298668,9


In [12]:
len(adata.obs['leiden_r1'].cat.categories)

25

In [13]:
adata.obs['leiden_r1'].cat.categories

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24'],
      dtype='object')

In [None]:
adata.rename_categories('leiden_r1', 
    ['Basal_1','Fibroblasts_1', 'Basal_2',
     'Basal_3', 'Luminal_1', 'Fibroblasts_2',  
     'Fibroblasts_3', 'Macrophage_1', 'Seminal_vesicle_1',
     'Luminal_2',  'Luminal_3',  'T_cells_1', 
     'Macrophage_2', 'Endothelial', 'Fibroblasts_4', 
     'Luminal_4', 'Dendritic_cells', 'Basal_4', 
     'T_cells_2', 'Luminal_5', 'Fibroblasts_5', 
     'Macrophage_3', 'Nerve_cells', 'Luminal_6', 'Seminal_vesicle_2'])

In [18]:
for i in range(25):
    tmp = SCSA_results[SCSA_results['Cluster']==i]['Cell Type']
    print('{}:{},{}'.format(i, tmp.iloc[0], tmp.iloc[1]))

0:Ciliated cell,Basal cell
1:Endothelial cell,Ciliated cell
2:Epithelial cell,Basal cell
3:Epithelial cell,Basal cell
4:Ciliated cell,Brush cell (Tuft cell)
5:Ciliated cell,Neuroendocrine cell
6:Ciliated cell,Endothelial cell
7:Macrophage,Microglial cell
8:Ciliated cell,Brush cell (Tuft cell)
9:Ciliated cell,Brush cell (Tuft cell)
10:Ciliated cell,Brush cell (Tuft cell)
11:T cell,Natural killer cell
12:Macrophage,Brush cell (Tuft cell)
13:Endothelial cell,Hematopoietic stem cell
14:Ciliated cell,Endothelial cell
15:Epithelial cell,Brush cell (Tuft cell)
16:B cell,Brush cell (Tuft cell)
17:Brush cell (Tuft cell),Epithelial cell
18:T cell,Brush cell (Tuft cell)
19:Brush cell (Tuft cell),Epithelial cell
20:Ciliated cell,Endothelial cell
21:Macrophage,Microglial cell
22:Astrocyte,Brush cell (Tuft cell)
23:Brush cell (Tuft cell),Ciliated cell
24:Brush cell (Tuft cell),Ciliated cell


In [19]:
adata.obs['leiden_r1'].cat.categories

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24'],
      dtype='object')

In [20]:
adata.obs['leiden_r1'].value_counts()

0     6901
1     4335
2     3369
3     3285
4     3243
5     3231
6     3070
7     2410
8     2279
9     2105
10    2060
11    1932
12    1540
13    1511
14    1448
15    1432
16    1002
17     912
18     876
19     752
20     507
21     308
22     171
23     159
24      88
Name: leiden_r1, dtype: int64