In [None]:
import pandas as pd
import numpy as np
import os, glob
import pickle

from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2

from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell

import seaborn as sns

In [None]:
RESOURCES_FOLDER="Data/SCENIC/Resources/"
DATABASE_FOLDER = "Data/SCENIC/Databases/"

DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "mm9-*.mc9nr.feather")
MOTIF_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDER, "motifs-v9-nr.mgi-m0.001-o0.0.tbl")

MM_TFS_FNAME = os.path.join(RESOURCES_FOLDER, 'mm_mgi_tfs.txt')

# Preliminary work

### Derive list of Transcription Factors(TF) for _Mus musculus_

In [None]:
tf_names = load_tf_names(MM_TFS_FNAME)

### Load ranking databases

In [None]:
db_fnames = glob.glob(DATABASES_GLOB)
def name(fname):
    return os.path.splitext(os.path.basename(fname))[0]
dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
dbs

# SCENIC

### B16 vs B16il33

In [None]:
ex_matrix = pd.read_csv("Data/Cells/CD4/cd4.matrix.csv", sep=' ', header=0, index_col=0).T
ex_matrix

In [None]:
DATA_FOLDER="Data/Cells/CD4/SCENIC/"

ADJACENCIES_FNAME = os.path.join(DATA_FOLDER, "adjacencies.tsv")
MODULES_FNAME = os.path.join(DATA_FOLDER, "modules.p")
MOTIFS_FNAME = os.path.join(DATA_FOLDER, "motifs.csv")
REGULONS_FNAME = os.path.join(DATA_FOLDER, "regulons.p")
AUC_FNAME = os.path.join(DATA_FOLDER, "auc_mtx.csv")

adjacencies = grnboost2(expression_data=ex_matrix, tf_names=tf_names, verbose=True)

print(adjacencies.head())

adjacencies.to_csv(ADJACENCIES_FNAME, index=False, sep='\t')
#adjacencies = pd.read_csv(ADJACENCIES_FNAME, sep='\t')

modules = list(modules_from_adjacencies(adjacencies, ex_matrix))

with open(MODULES_FNAME, 'wb') as f:
    pickle.dump(modules, f)

df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME)

print(df.head())

df.to_csv(MOTIFS_FNAME)

regulons = df2regulons(df)

with open(REGULONS_FNAME, 'wb') as f:
    pickle.dump(regulons, f)

auc_mtx = aucell(ex_matrix, regulons)

sns.clustermap(auc_mtx, figsize=(12,12))

auc_mtx.to_csv(AUC_FNAME)
