__Author:__ Bram Van de Sande
   
__Date:__ 9 FEB 2018

__Outline:__ Characterize the different cells in a single-cell transcriptomics experiment by the enrichment of the regulomes. Enrichment of a regulome is measures as AUC of the recovery curve of the genes that define this regulome.

In [1]:
import pickle
import os
import pandas as pd
from collections import defaultdict
from pyscenic.aucell import create_rankings, enrichment
from pyscenic.regulome import df2regulomes
import seaborn

In [2]:
RESOURCES_FOLDER="/Users/bramvandesande/Projects/lcb/resources"
DATA_FOLDER="/Users/bramvandesande/Projects/lcb/tmp"

Load and rank expression profiles from single-cell experiment.

In [3]:
ex_mtx = pd.read_csv(os.path.join(RESOURCES_FOLDER, 'GSE60361_C1-3005-Expression.txt'), sep='\t', header=0, index_col=0)

In [4]:
rnk_mtx = create_rankings(ex_mtx)

In [5]:
rnk_mtx.T.shape

(3005, 19972)

Load regulomes discovered in previous phase.

In [6]:
df = pd.read_csv(os.path.join(RESOURCES_FOLDER, "regulomes_zeisel_2015.csv"),
                 index_col=[0,1], header=[0,1], skipinitialspace=True)

In [7]:
df[('Enrichment', 'TargetGenes')] = df[('Enrichment', 'TargetGenes')].apply(lambda e: eval(e))

In [8]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment
Unnamed: 0_level_1,Unnamed: 1_level_1,AUC,NES,MotifSimilarityQvalue,OrthologousIdentity,Annotation,Context,TargetGenes
TF,MotifID,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Acaa1b,transfac_pro__M01543,0.076742,4.846509,0.0,0.517986,motif similar to transfac_pro__M04671 ('F$POT1...,"frozenset({'top50', '500bp'})","[(Col7a1, 105), (Lmna, 107), (Crh, 109), (Htr1..."
Acaa1b,transfac_pro__M04671,0.07638,4.821018,0.0,0.517986,motif is annotated for orthologous gene YIL160...,"frozenset({'top50', '500bp'})","[(Col7a1, 101), (Lmna, 106), (Crh, 109), (Htr1..."
Ar,homer__CCAGGAACAG_AR-halfsite,0.050721,7.485099,0.0,0.877642,gene is orthologous to ENSG00000169083 in H. s...,"frozenset({'top50', '500bp'})","[(Bmpr1b, 17), (Kcns1, 45), (Ptpru, 119)]"
Arid3a,transfac_pro__M04727,0.033484,3.115786,0.000255,0.798669,gene is orthologous to ENSG00000116017 in H. s...,"frozenset({'top50', '500bp'})","[(Sdk2, 6)]"
Arid3a,cisbp__M4455,0.035068,3.291999,9e-06,0.798669,gene is orthologous to ENSG00000116017 in H. s...,"frozenset({'top50', '500bp'})","[(Hbegf, 30), (Tnfaip8l2, 102)]"


In [9]:
regulomes = df2regulomes(df, "MGI")

In [10]:
len(regulomes)

423

In [11]:
regulomes[0]

Regulome(name='Acaa1b', nomenclature='MGI', gene2weights=<frozendict {'Col7a1': 1.0, 'Lmna': 1.0, 'Crh': 1.0, 'Htr1f': 1.0, 'Rilpl2': 1.0, 'Phf21b': 1.0, 'Poli': 1.0, 'Mtfr1': 1.0}>, transcription_factor='Acaa1b', context=frozenset(), score=2.510423682097521)

Calculate enrichment as AUC (NES is not valid because AUC are not normally distributed) of regulomes in cells.

Best to calculate the rankings for a subset of the regulomes, i.e. per database and regulome definition.

In [12]:
auc_heatmap = pd.concat([enrichment(rnk_mtx.T, regulome) for regulome in regulomes]).unstack('Regulome')

In [13]:
auc_heatmap

Unnamed: 0_level_0,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC,AUC
Regulome,Acaa1b,Ahr,Ar,Arid3a,Arid5b,Arnt2,Arntl,Arx,Ascl1,Atf1,...,Zfp787,Zfp90,Zfp932,Zfp94,Zic1,Zic2,Zic3,Zkscan1,Zkscan3,Zscan29
Cell,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1772058148_A01,0.000000,0.036427,0.000000,0.000000,0.000000,0.037643,0.000000,0.032448,0.000000,0.012818,...,0.048674,0.140474,0.000000,0.084960,0.079107,0.145474,0.354169,0.000000,0.053253,0.000000
1772058148_A03,0.000000,0.037728,0.014948,0.000000,0.097264,0.043805,0.166333,0.036309,0.032385,0.061993,...,0.113614,0.000000,0.076362,0.000000,0.047728,0.136405,0.054573,0.000000,0.192793,0.000000
1772058148_A04,0.323073,0.082962,0.164031,0.000000,0.013847,0.085080,0.000000,0.170131,0.089266,0.088338,...,0.097723,0.000000,0.134611,0.000000,0.039213,0.024844,0.000000,0.000000,0.292893,0.000000
1772058148_A05,0.000000,0.054247,0.000000,0.053171,0.009927,0.025727,0.000000,0.023907,0.000000,0.006187,...,0.000000,0.000000,0.000000,0.000000,0.027458,0.035774,0.048085,0.000000,0.000000,0.000000
1772058148_A06,0.000000,0.061429,0.000000,0.000000,0.014181,0.054170,0.111445,0.036530,0.000000,0.036425,...,0.113113,0.105772,0.057724,0.000000,0.040812,0.035841,0.000000,0.000000,0.000000,0.000000
1772058148_A07,0.000000,0.053367,0.000000,0.000000,0.076743,0.031977,0.000000,0.037271,0.056939,0.012957,...,0.000000,0.000000,0.000000,0.000000,0.129908,0.198077,0.361695,0.000000,0.000000,0.000000
1772058148_A09,0.000000,0.104423,0.021088,0.111288,0.154571,0.064974,0.000000,0.091793,0.000000,0.087657,...,0.000000,0.155489,0.105868,0.028403,0.039122,0.057406,0.048345,0.000000,0.000000,0.043877
1772058148_A10,0.000000,0.036157,0.000000,0.000000,0.067568,0.046995,0.000000,0.051727,0.000000,0.071113,...,0.079705,0.000000,0.136946,0.098849,0.227644,0.152219,0.102769,0.000000,0.000000,0.000000
1772058148_A11,0.122998,0.051244,0.021154,0.077077,0.052135,0.047437,0.000000,0.014001,0.000000,0.067637,...,0.000000,0.000000,0.098813,0.098223,0.073963,0.092764,0.234679,0.240741,0.129730,0.000000
1772058148_A12,0.149775,0.066609,0.000000,0.000000,0.062563,0.051021,0.000000,0.189371,0.057764,0.053943,...,0.087588,0.309643,0.074980,0.063313,0.026881,0.080490,0.047344,0.000000,0.000000,0.000000


In [None]:
ax = seaborn.heatmap(auc_heatmap)