In [9]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
import anndata as ad
import hdf5plugin
from sklearn.decomposition import TruncatedSVD
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams
import os 
#external modules
#pretty plotting
import seaborn as sb
import sys 
import config as general_paths
import warnings
warnings.filterwarnings("ignore")
sb.set_context(context='poster')

import copy

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80)

scanpy==1.9.8 anndata==0.9.2 umap==0.5.5 numpy==1.24.4 scipy==1.10.1 pandas==1.5.3 scikit-learn==1.3.2 statsmodels==0.14.1 igraph==0.10.8 louvain==0.8.1 pynndescent==0.5.11


In [10]:
figure_path = general_paths.figures_path
data_path = general_paths.data_path
output_path = general_paths.output_path
local_path = general_paths.local_path
input_path = output_path

## Data loading

In [11]:
# figure_path = r'C:\Users\ilosz01\OneDrive - Linköpings universitet\MarcinLab\SingleCellSequencing\scs_analysis\figuresR'
# input_path = r'C:\Users\ilosz01\OneDrive - Linköpings universitet\MarcinLab\SingleCellSequencing\scs_analysis\out'


adata_umap_path = os.path.join(input_path,'control_scs_umap_from_R.h5ad')
adata_path = os.path.join(input_path,'campari_scs_from_R.h5ad')


NO_PCA = 20

In [12]:
adata_umap = ad.read_h5ad(adata_umap_path)

## EXPLORE

In [13]:
### Define list of relevant genes 

marker_genes = ['Trpm3','Trpm2','Smr2','Sstr2','Bmpr1b','Trpm8','Trpv1','Piezo2','Piezo1','Nppb',
                'Sst','Pvalb','Prokr2','Mrgprd','Mrgpra3','Cd34',
                'Th','Trpa1','Ntrk3','Ntrk2','Ntrk1','Ret','Tac1','Calca','Calcb','Nefh',
                'S100b','Scn10a','Slc17a8','Atf3','Pou4f3','Calb1','Calb2','Avil','Asic3',
                'Asic2','Asic1','Pou6f2','Avpr1a','Pou4f2','Sox10','Casq2','Chrna7','Chrna3',
                'P2rx3','Gfra2','Ldhb','Necab2','Spp1','Adm','Hpse','Adra2a']

In [14]:
# made some names shorter to display
marker_genes_dict = {
                    'RA-LTMR': ['Ntrk2'],
                    'C-LTMR': ['Cd34', 'Th'],
                    'Itch': ['Nppb','Mrgpra3'],
                    'Nonpept-Nocicept': ['Mrgprd'],
                    'Pept-Nocicept': ['Sstr2'],
                    'Interoceptors': ['Adra2a','Tac1'],
                    'A-HTMR': ['Smr2','Bmpr1b'],
                    'Propioceptors': ['Pvalb']
}
# marker_genes_dict = {
#                     'RA-LTMR': ['Ntrk2'],
#                     'C-LTMR': ['Cd34', 'Th'],
#                     'Itch': ['Nppb','Mrgpra3'],
#                     'Nonpeptidergic-Nociceptors': ['Mrgprd'],
#                     'Peptidergic-Nociceptors': ['Sstr2'],
#                     'Interoceptors': ['Adra2a','Tac1'],
#                     'A-HTMR': ['Smr2','Bmpr1b'],
#                     'Propioceptores': ['Pvalb']
# }

## Stimuli
* 0: control
* 1: pinch
* 2: new_stroke (after december)
* 3: ballon
* 4: AG (ano-genital brush)
* 5: AD (anal distension)
* 6: heating
* 7: mock
* 8: old_stoke (first experiments with stroke)
* 9: anal pinch
* 10: mock with poop
* 11: vaginal distansion
* 12: tomatoe
* 13: poop
* 14: Bladder
* 15: Colon-rectum distension (CRD)
* 16: AD + CFA (anal distension + inflamation)
* 17: AGB + CFA (ano-genital brush + inflamation)
* 18: Heating + CFA (heating + inflamation)
* 19: Tail Brush
* 20: TRPM8 td Tomato
* 21: AD + Pulse
* 22: AGB-CFA 1 W
* 23: AD-CFA 1W
* 24: HP
* 25: Penis
* 26: Clitoris Vibration
* 27: Max Cells
* 28: Tracing Ano Genital Skin injection
* 29: Crazy Mock
* 30: Tracing Anal injection
* 31: Tracing Vagina

In [15]:
labels=['Control','Pinch','New_stroke','Ballon','AG','AD','Heating','Mock','Old_stroke','Anal_pinch','Mock_w_poop','Vaginal_distension','Tomatoe','Poop', 
        'Bladder','CRD','AD+CFA','AGB+CFA','Heating+CFA','Tail Brush','TRPM8 td tomato',
        'AD + Pulse', 'AGB-CFA 1 W','AD-CFA 1W','HP','Penis','Clitoris Vibration','Max Cells',
        'Tracing Ano Genital Skin injection','Crazy Mock','Tracing Anal injection', 'Tracing Vagina']
stim_str = ['0', '1', '2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31']

In [16]:
adata_umap

AnnData object with n_obs × n_vars = 2422 × 16000
    obs: 'barcode', 'stimulus', 'red', 'green', 'well_id', 'plate_number', 'batch', 'n_counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'louvain_r0.5', 'louvain_r1', 'louvain_r1.5', 'louvain_r2', 'louvain_r2.5'
    var: 'gene_id', 'gene_name', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'hvg', 'log1p', 'louvain', 'louvain_r0.5_colors', 'louvain_r1.5_colors', 'louvain_r1_colors', 'louvain_r2.5_colors', 'louvain_r2_colors', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

In [17]:
# remove some cells from data to be ranked (mock_w_poop,bladder,poop,CRD?)
stim2exclude = [10,13,14,15]
# Remove items with the value x from 'obs_variable'
for el in stim2exclude:
    adata_umap = adata_umap[adata_umap.obs['stimulus'] != el]

In [18]:
# # there is just one poop cell and it ruined gene ranking, so: replace 13=poop with 5=AD
# stims = list(adata_umap.obs['stimulus'].values)
# idx=stims.index(13)
# # replace 13=poop with 5=AD
# stims[idx] = 5
# adata_umap.obs['stimulus'] = stims

In [19]:
# # Correct for ranking baloon=3 should be AD=5
# stims = adata_umap.obs['stimulus'].values
# stims[stims == 3] = 5
# adata_umap.obs['stimulus'] = stims

In [20]:
# create category type obs variable that will be used for ranking
adata_umap.obs['stimulus_'] = adata_umap.obs['stimulus'].astype('category') # copy stimulus data to new obs for ranking 
new_values = [str(int(el)) for el in adata_umap.obs['stimulus_'].values]    # convert stimulus to string categories
adata_umap.obs['stimulus_'] = new_values                                    # replace float values with new string values (changed type to object)
adata_umap.obs['stimulus_'].astype('category')                              # make it category type again

0-sample1-sample12-sample123-sample1234-sample12345-sample123456     2
1-sample1-sample12-sample123-sample1234-sample12345-sample123456     2
2-sample1-sample12-sample123-sample1234-sample12345-sample123456     2
4-sample1-sample12-sample123-sample1234-sample12345-sample123456     1
5-sample1-sample12-sample123-sample1234-sample12345-sample123456     1
                                                                    ..
379-sample7                                                         30
380-sample7                                                         30
381-sample7                                                         28
382-sample7                                                         28
383-sample7                                                         30
Name: stimulus_, Length: 2405, dtype: category
Categories (28, object): ['0', '1', '11', '12', ..., '6', '7', '8', '9']

In [21]:
# create ordered list of stim names
stim_labels = []
for stim in adata_umap.obs['stimulus_'].values:
    idx = stim_str.index(stim)
    new_label = labels[idx]
    stim_labels.append(new_label)

In [22]:
adata_umap.obs['stimulus_'] = stim_labels 
adata_umap.obs['stimulus_']

0-sample1-sample12-sample123-sample1234-sample12345-sample123456                            New_stroke
1-sample1-sample12-sample123-sample1234-sample12345-sample123456                            New_stroke
2-sample1-sample12-sample123-sample1234-sample12345-sample123456                            New_stroke
4-sample1-sample12-sample123-sample1234-sample12345-sample123456                                 Pinch
5-sample1-sample12-sample123-sample1234-sample12345-sample123456                                 Pinch
                                                                                   ...                
379-sample7                                                                     Tracing Anal injection
380-sample7                                                                     Tracing Anal injection
381-sample7                                                         Tracing Ano Genital Skin injection
382-sample7                                                         Traci

In [23]:
# sc.pp.log1p(adata_umap) # it was already logaritmized but adata.uns['log1p'] has no 'base' key
# adata_umap.uns['log1p']

In [24]:
sc.tl.rank_genes_groups(adata_umap, 'stimulus_', method='wilcoxon')

ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:05)


In [25]:
pd.DataFrame(adata_umap.uns['rank_genes_groups']['names']).head(10)

Unnamed: 0,AD,AD + Pulse,AD+CFA,AD-CFA 1W,AG,AGB+CFA,AGB-CFA 1 W,Anal_pinch,Ballon,Clitoris Vibration,...,Old_stroke,Penis,Pinch,TRPM8 td tomatoe,Tail Brush,Tomatoe,Tracing Anal injection,Tracing Ano Genital Skin injection,Tracing Vagina,Vaginal_distension
0,mt-Nd4,Atp5g3,Gm13394,Psmc4,Mcam,Pfkm,Gm7308,Rpl10-ps5,Gm17494,Sncb,...,campari2,Gm14648,Hmgb1,AA465934,mt-Nd1,1500015A07Rik,Rps28,Gm7308,Mt1,mt-Rnr2
1,mt-Nd2,Hspb1,Gm29216,Rtl8b,Ak5,Ttc9,Agpat4,Gm7476,Gm6063,Gm14648,...,cre,Btbd19,Mbnl1,A930005H10Rik,Gm10925,Vcp-rs,Gm14648,Gm17214,Ubc,mt-Rnr1
2,Scd2,Slc25a3,Cltc,Ngfr,Map7d1,Iars,Atp5g3,Phc3,campari2,Pet100,...,Gm6063,Gm9844,Serf1,Rps3a3,mt-Cytb,Rps3a3,Gm6109,Rps28,Cebpd,CT010467.1
3,Csmd1,Eif2s3y,Scd2,Tuba4a,Tpbgl,Gm29216,Gm14648,Kctd16,Ninj1,Tubb4a,...,Gm17494,Gm7308,mt-Rnr2,Jpt1,Pdcd10,Ftl1-ps1,Rpl17-ps8,Gm14648,Zfp36,Ddx3x
4,Rian,Gm9817,Dync1h1,Tmem41a,mt-Nd4,Calr,Slc25a39,Rpl10-ps6,Gm6682,Iqsec2,...,Eif2s3y,Lrfn4,Fkbp1b,Celf4,Ramac,Gm13453,Rpl36a-ps3,Gm6745,Sdc4,Bnip3l
5,Nr4a1,Psmd13,Akap12,Mt3,Stac2,Ehd3,Mpp3,Samd12,Gm5844,Abtb1,...,Tmem176b,Rpl36a-ps3,Gstt1,Gap43,Tceal8,Gm20305,Mt1,Gm5850,Cxcl1,Synm
6,campari2,Gsto1,Kif5a,Lgmn,Slc25a33,Nsf,Gpi-ps,Prepl,Gm8399,mt-Atp8,...,Gm24447,Gm20570,Uqcc2,Rbfox3,Gpx2,Ap3s1,Rps27,Gm6109,Mt2,Ttc3
7,cre,Cox5a,Ank2,Gm9234,Myh14,Lrp11,Gm12892,Atg9a,Trappc3l,Mtfr1l,...,Rn7sk,Bcam,D130009I18Rik,Ntrk1,Sdcbp,Ptges3,Gm16335,Fabp5,Rps28,Nipbl
8,Abca8a,Sgk1,Syn1,Gm9844,Pcyt1b,Gm13341,Gm3534,Eef1a2,Tomm40,Nutf2,...,Gm6682,Tmsb10,Pou4f1,Zdhhc22,Aopep,Gm15590,Gm17214,Rpl27a-ps4,Fkbp1b,Camta1
9,Arhgef10,Hba-a1,Gm8355,Gm20570,Gm29216,Nacc2,Ubc,Tom1l2,C230057A21Rik,Cox5a,...,Ddx3y,Cdk5r2,Nrp1,Kit,Pla2g7,Rpl9-ps6,Gm4600,2410006H16Rik,Fabp5,Xist


In [26]:
# there are some gene name duplicates
def unique(list1):
 
    # initialize a null list
    unique_list = []
    unique_index = []
 
    # traverse for all elements
    count = 0
    for x in list1:
        # check if exists in unique_list or not
        if x not in unique_list:
            unique_list.append(x)
            unique_index.append(count)
        count+=1
    return unique_list,unique_index
        
gene_name = []
for i in range(len(adata_umap.var['gene_name'])):
    gene_name.append(adata_umap.var['gene_name'].iloc[i])

# remove duplicates
unique_genes , unique_genes_index = unique(gene_name)

adata_umap = adata_umap[:,np.array(unique_genes_index)]

In [29]:
# show plot as pop-up
%matplotlib tk 
sc.pl.heatmap(adata_umap, marker_genes_dict, groupby='stimulus_', cmap='rainbow', use_raw=False, figsize=(14,18), 
              show=True,show_gene_labels=True,var_group_rotation=45, dendrogram = True,vmin=-3,vmax=3)

categories: AD, AD + Pulse, AD+CFA, etc.
var_group_labels: RA-LTMR, C-LTMR, Itch, etc.
