### Prepare scRNA-seq for spatial-sc model

In [1]:
import numpy as np
import pandas as pd
import scipy, sklearn
import matplotlib.pyplot as plt
import scanpy as sc
import loompy

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


### Load scRNA-seq data

In [44]:
gene_subset = pd.read_csv('../data/spat_gene_names.csv', header=None)[0]
gene_subset.head()

0      Agt
1    Aldoc
2     Ano1
3     Aqp4
4    Atoh1
Name: 0, dtype: object

In [9]:
# Load Loom file (takes a long time)
scrna_dat = sc.read_loom("../../../2209_hybiss_deconvolution/data/l5_all.loom")

  axis_df[k] = v
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


In [46]:
# We subset to use only genes in the subset coming from spatial expression data
scrna_dat_sub = scrna_dat[:,scrna_dat.var_names.isin(gene_subset)]

In [53]:
# Handling duplicate genes in scRNA-seq data
non_unique_gene_names = np.unique(scrna_dat_sub.var_names, return_counts=True)[0][np.unique(scrna_dat_sub.var_names, return_counts=True)[1] > 1]
scrna_dat_sub = scrna_dat_sub[:,~scrna_dat_sub.var_names.duplicated(keep='first')]

In [97]:
# check if any non-unique gene names is present
len(np.unique(scrna_dat_sub.var_names, return_counts=True)[0][np.array(np.unique(scrna_dat_sub.var_names, return_counts=True)[1] > 1)])

0

Here is all the information about clustering we have from the scRNA-seq data:

In [63]:
scrna_dat_sub.obs[['Class', 'ClusterName', 'Clusters', 'TaxonomyRank1', 'TaxonomyRank2',
                   'TaxonomyRank3', 'TaxonomyRank4', 'TaxonomySymbol',
                   'Taxonomy_group']].head(3)

Unnamed: 0_level_0,Class,ClusterName,Clusters,TaxonomyRank1,TaxonomyRank2,TaxonomyRank3,TaxonomyRank4,TaxonomySymbol,Taxonomy_group
CellID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10X82_2_TCTCTCACCAGTTA-,Neurons,ENT9,0,Neurons,PNS neurons,Enteric neurons,Enteric neurons,Enne,Enteric neurons
10X82_2_TATTATCTACCAGA-,Neurons,ENT9,0,Neurons,PNS neurons,Enteric neurons,Enteric neurons,Enne,Enteric neurons
10X82_2_TATCCCAGATGGCA-,Neurons,ENT9,0,Neurons,PNS neurons,Enteric neurons,Enteric neurons,Enne,Enteric neurons


We will rely on `Class`, `Clusters`, `TaxonomyRank1`, `TaxonomyRank2`, `TaxonomyRank3`, and `TaxonomyRank4` groupings. All are derived from 265 `Clusters`.

In [96]:
taxonomy_ranks = ['Class','Clusters','TaxonomyRank1','TaxonomyRank2','TaxonomyRank3','TaxonomyRank4']
for taxonomy in taxonomy_ranks:
    print(taxonomy)
    n_groups = len(np.unique(scrna_dat_sub.obs[taxonomy])) # number of clusters
    
    # iterate over clusters and compute the mean expression over cells belonging to group 'g'
    cell_types = np.unique(scrna_dat_sub.obs[taxonomy])
    mu_list = [] # record in list
    for k in cell_types:
        mu_list.append(np.array(scrna_dat_sub[scrna_dat_sub.obs[taxonomy].isin([k]),:].X.mean(axis=0))[0,:])
    # assemble results in pandas with group names and gene names
    mu_X = pd.DataFrame(np.array(mu_list), index = cell_types, columns = scrna_dat_sub.var_names).T
    mu_X.to_csv(f"../data/scrna_muX_clust{n_groups}_{taxonomy}.csv")

Class
Clusters
TaxonomyRank1
TaxonomyRank2
TaxonomyRank3
TaxonomyRank4


Hereafter, how the different Taxonomy relates to each other.

In [117]:
np.unique(scrna_dat.obs.TaxonomyRank1 + '_' + scrna_dat.obs.TaxonomyRank2 + '_' + scrna_dat.obs.TaxonomyRank3 + '_' + scrna_dat.obs.TaxonomyRank4)

array(['Glia_CNS glia_Astroependymal cells_Astrocytes',
       'Glia_CNS glia_Astroependymal cells_Choroid epithelial cells',
       'Glia_CNS glia_Astroependymal cells_Dentate gyrus radial glia-like cells',
       'Glia_CNS glia_Astroependymal cells_Ependymal cells',
       'Glia_CNS glia_Astroependymal cells_Subcommissural organ hypendymal cells',
       'Glia_CNS glia_Astroependymal cells_Subventricular zone radial glia-like cells',
       'Glia_CNS glia_Oligodendrocytes_Oligodendrocytes',
       'Glia_Neural crest-like glia_Neural crest-like glia_Enteric glia',
       'Glia_Neural crest-like glia_Neural crest-like glia_Olfactory ensheathing cells',
       'Glia_Neural crest-like glia_Neural crest-like glia_Oligodendrocyte precursor cells',
       'Glia_Neural crest-like glia_Neural crest-like glia_Satellite glia',
       'Glia_Neural crest-like glia_Neural crest-like glia_Schwann cells',
       'Immune cells_Immune cells_Immune cells_Microglia',
       'Immune cells_Immune cells_Im