At least for the human samples we should check to see if the brain samples contain human skin cells, indicating contamination during library building.

In [1]:
import pandas
import collections

from rsemcache import RSEMCache

In [2]:
rsems = RSEMCache('rsem-genes.h5')

In [3]:
gencode_store = pandas.HDFStore('gencode.vV19-tRNAs-ERCC.h5', 'r')

In [4]:
query_type = 'gene'
contamination_genes = collections.OrderedDict()
for gene_name in ['KRT5', 'KRT14', 'KRT17', 'KRT4', 'CRNN']:
    gene_id = gencode_store.select(
        'v19_tRNAs_ERCC',
        where='gene_name == gene_name & type == query_type')['gene_id'].values[0]
    contamination_genes[gene_id] = gene_name

In [5]:
dict(contamination_genes)

{'ENSG00000128422.11': 'KRT17',
 'ENSG00000143536.7': 'CRNN',
 'ENSG00000170477.8': 'KRT4',
 'ENSG00000186081.7': 'KRT5',
 'ENSG00000186847.5': 'KRT14'}

#Report Contamination Genes in Human experiments

##Hs_purkinje_poolsplit

In [6]:
df = rsems.get_gene_expression(rsems.experiments['Hs_purkinje_poolsplit'], contamination_genes)
df

Unnamed: 0,13645,13646,13647,13648,13649,13650,13651,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664
ENSG00000186081.7,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0
ENSG00000186847.5,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0
ENSG00000128422.11,0.5,1.22,0.86,1.18,0,1.06,0,1.39,4.33,2.78,5.39,2.07,0,0,0,4.6,0,0,0,0
ENSG00000170477.8,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0
ENSG00000143536.7,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0


##Hs_purkinje_single

In [7]:
df = rsems.get_gene_expression(rsems.experiments['Hs_purkinje_single'], contamination_genes)
df

Unnamed: 0,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634,13635,13636,13637,13638,13639,13640,13641,13642,13643,13644
ENSG00000186081.7,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0
ENSG00000186847.5,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0
ENSG00000128422.11,3.39,0,0.76,6.26,5.67,4.86,1.07,1.66,0.61,1.72,0,0,3.81,0.41,4.55,1.19,0,0.17,3.31,3.05
ENSG00000170477.8,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0
ENSG00000143536.7,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0


##Hs_asp_purkinje_UMB5294 poolsplit

In [8]:
df = rsems.get_gene_expression(rsems.experiments['Hs_asp_purkinje_UMB5294_poolsplit'], contamination_genes)
df

Unnamed: 0,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855,13856,13857,13858,13859,13860
ENSG00000186081.7,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0
ENSG00000186847.5,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0
ENSG00000128422.11,0,0,0,0,0.3,0,0,1.09,0,0,0,0,6.48,0,0,0,0,0
ENSG00000170477.8,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0
ENSG00000143536.7,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0


##Hs_asp_purkinje_UMB5294 single

In [9]:
df = rsems.get_gene_expression(rsems.experiments['Hs_asp_purkinje_UMB5294_single'], contamination_genes)
df

Unnamed: 0,13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839,13840,13841,13842
ENSG00000186081.7,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSG00000186847.5,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSG00000128422.11,0,5.53,2.26,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSG00000170477.8,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ENSG00000143536.7,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
