__Author:__ Bram Van de Sande

__Date:__ 4 DEC 2018

__Outline:__ Example notebook that demonstrates how to create a loom file.

In [1]:
import os
import pandas as pd
from pyscenic.export import export2loom
from pyscenic.utils import load_motifs
from pyscenic.transform import df2regulons

In [2]:
RESOURCES_FOLDER = "/Users/bramvandesande/Projects/lcb/resources/zeisel_et_al/"
EXP_MTX_FNAME = os.path.join(RESOURCES_FOLDER, "expression_matrix.csv")
MOTIFS_FNAME = os.path.join(RESOURCES_FOLDER, "zeisel_et_al.motifs.csv")
ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDER, "annotations.csv") 
LOOM_FILE = os.path.join(RESOURCES_FOLDER, "zeisel_et_al.loom")

## Load necessary files

In [3]:
exp_mtx = pd.read_csv(EXP_MTX_FNAME, index_col=0)

In [4]:
exp_mtx.head()

Unnamed: 0,Tspan12,Tshz1,Fnbp1l,Adamts15,Cldn12,Rxfp1,2310042E22Rik,Sema3c,Jam2,Apbb1ip,...,Rab9,Tceanc,Msl3,Arhgap6,Mid1,Vamp7,Tmlhe,Zf12,Kdm5d,Uty
1772071015_C02,0,3,3,0,1,0,0,11,1,0,...,7,0,0,0,0,5,0,0,0,0
1772071017_G12,0,1,1,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1772071017_A05,0,0,6,0,1,0,2,25,1,0,...,1,0,2,0,0,3,0,0,0,7
1772071014_B06,3,2,4,0,0,0,3,1,0,0,...,3,0,1,0,0,0,0,0,0,0
1772067065_H06,0,2,1,0,0,0,0,10,0,0,...,0,0,4,0,0,3,0,0,0,0


Check if expression matrix has the correct format.

In [11]:
def is_valid_exp_matrix(mtx):
    return (all(isinstance(idx, str) for idx in mtx.index) 
            and all(isinstance(idx, str) for idx in mtx.columns)
            and (mtx.index.nlevels == 1)
            and (mtx.columns.nlevels == 1))

In [12]:
is_valid_exp_matrix(exp_mtx)

True

In [16]:
motifs = load_motifs(MOTIFS_FNAME)

In [17]:
motifs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment
Unnamed: 0_level_1,Unnamed: 1_level_1,AUC,Annotation,Context,MotifSimilarityQvalue,NES,OrthologousIdentity,RankAtMax,TargetGenes
TF,MotifID,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Arnt2,cisbp__M5290,0.012173,gene is orthologous to FBgn0264075 in D. melan...,"(activating, mm9-500bp-upstream-7species, weig...",0.000646,3.286012,0.408708,994,"[(Fam134a, 0.4679974726788937), (Ubap2l, 0.509..."
Arnt2,taipale__BHLHB3_full_NKCACGTGMN_repr,0.012137,motif similar to flyfactorsurvey__tgo_cyc_SANG...,"(activating, mm9-500bp-upstream-7species, weig...",3.6e-05,3.264832,0.453271,1414,"[(Ubap2l, 0.4679974726788937), (Fam134a, 0.509..."
Arnt2,hocomoco__BHE40_HUMAN.H10MO.A,0.011742,gene is orthologous to FBgn0264075 in D. melan...,"(activating, mm9-500bp-upstream-7species, weig...",0.000524,3.03106,0.408708,625,"[(Tmem55b, 0.4679974726788937), (Ubap2l, 0.509..."
Arnt2,swissregulon__hs__AHR_ARNT_ARNT2.p2,0.011815,gene is orthologous to ENSG00000172379 in H. s...,"(activating, mm9-500bp-upstream-7species, weig...",0.0,3.074206,0.977528,2493,"[(Ntrk2, 0.4679974726788937), (Rqcd1, 0.509033..."
Arnt2,swissregulon__hs__SREBF1_2.p2,0.011959,motif similar to flyfactorsurvey__tgo_cyc_SANG...,"(activating, mm9-500bp-upstream-7species, weig...",0.000575,3.159713,0.453271,126,"[(Ubap2l, 0.4679974726788937), (Ube2b, 0.50903..."


In [18]:
regulons = df2regulons(motifs)

In [19]:
len(regulons)

425

In [20]:
regulons[0]

Regulon(name='A430033K04Rik(+)', gene2weight=<frozendict {'Zfhx4': 1.2992339182219363, 'Frem1': 1.5581168405452308, 'Zbtb9': 1.0}>, transcription_factor='A430033K04Rik', context=frozenset({'swissregulon__sacCer__AZF1.png', 'activating'}), score=0.42059077585411814)

In [21]:
!head {ANNOTATIONS_FNAME}

"","level1class"
"1772071015_C02","interneurons"
"1772071017_G12","interneurons"
"1772071017_A05","interneurons"
"1772071014_B06","interneurons"
"1772067065_H06","interneurons"
"1772071017_E02","interneurons"
"1772067065_B07","interneurons"
"1772067060_B09","interneurons"
"1772071014_E04","interneurons"


In [22]:
with open(ANNOTATIONS_FNAME, "rt") as f:
    annotations = dict(line.strip().replace("\"", "").split(",") for idx, line in enumerate(f) if idx > 0)

Check if annotation mapping has the correct format.

In [23]:
def is_valid_annotation_mapping(m):
    return (all(isinstance(k, str) for k in m.keys()) 
            and all(isinstance(v, str) for v in m.values()))

In [24]:
is_valid_annotation_mapping(annotations)

True

In [12]:
annotations

{'1772071015_C02': 'interneurons',
 '1772071017_G12': 'interneurons',
 '1772071017_A05': 'interneurons',
 '1772071014_B06': 'interneurons',
 '1772067065_H06': 'interneurons',
 '1772071017_E02': 'interneurons',
 '1772067065_B07': 'interneurons',
 '1772067060_B09': 'interneurons',
 '1772071014_E04': 'interneurons',
 '1772071015_D04': 'interneurons',
 '1772071015_C11': 'interneurons',
 '1772071017_D04': 'interneurons',
 '1772071017_D06': 'interneurons',
 '1772067082_D07': 'interneurons',
 '1772071017_F09': 'interneurons',
 '1772071017_A09': 'interneurons',
 '1772067094_C05': 'interneurons',
 '1772067059_B06': 'interneurons',
 '1772067096_E05': 'interneurons',
 '1772066089_C05': 'interneurons',
 '1772067094_F04': 'interneurons',
 '1772071045_A01': 'interneurons',
 '1772071015_C08': 'interneurons',
 '1772071045_D06': 'interneurons',
 '1772071017_A03': 'interneurons',
 '1772071017_F07': 'interneurons',
 '1772071017_E06': 'interneurons',
 '1772067066_C10': 'interneurons',
 '1772071017_B05': '

## Create loom file

In [13]:
export2loom(exp_mtx, regulons[0:100], annotations,
                LOOM_FILE,
                title = "Zeisel et al.",
                nomenclature = "MGI")

  return_n_iter=True)


The metadata can become too big and therefore we provide an option to compress it.

__Cave:__ Should only be used if the loom file is going to be used with SCope.

In [14]:
export2loom(exp_mtx, regulons, annotations,
                LOOM_FILE,
                title = "Zeisel et al.",
                nomenclature = "MGI", compress=True)