# Template SCENIC Analysis

**Authorship:**
Adam Klie, *02/16/2022*
***
**Description:**
Template notebook for loading SCENIC results after running the SCENICprotocol
***
**TODOs:**
 - <font color='green'> Done TODO </font>
 - <font color='orange'> WIP TODO </font>
 - <font color='red'> Queued TODO </font>
***

## Set-up

In [2]:
# The classics
import os
import glob
import pickle
import pandas as pd
import numpy as np
import scanpy as sc
import loompy as lp

from dask.diagnostics import ProgressBar

from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2

from ctxcore.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies, load_motifs
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell

import matplotlib.pyplot as plt
import seaborn as sns

DATA_FOLDER="/cellar/users/aklie/data/SCENIC/test"
RESOURCES_FOLDER="/cellar/users/aklie/data/SCENIC"
DATABASE_FOLDER = "/cellar/users/aklie/data/SCENIC/databases"
DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "mm9-*.mc9nr.feather")
MOTIF_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDER, "motifs-v9-nr.mgi-m0.001-o0.0.tbl")
MM_TFS_FNAME = os.path.join(RESOURCES_FOLDER, 'mm_tfs.txt')

REGULONS_FNAME = os.path.join(DATA_FOLDER, "regulons.p")
MOTIFS_FNAME = os.path.join(DATA_FOLDER, "motifs.csv")

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

# Load SCENIC results

## Visualize the AUC matrix

In [13]:
import json
import zlib
import base64

# collect SCENIC AUCell output
lf = lp.connect("out/test_pyscenic_output.loom", mode='r+', validate=False )
meta = json.loads(zlib.decompress(base64.b64decode( lf.attrs.MetaData )))
exprMat = pd.DataFrame( lf[:,:], index=lf.ra.Gene, columns=lf.ca.CellID).T
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)


In [14]:
# create a dictionary of regulons:
regulons = {}
for i,r in pd.DataFrame(lf.ra.Regulons,index=lf.ra.Gene).iteritems():
    regulons[i] =  list(r[r==1].index.values)

In [20]:
lf.close()



In [21]:
adata = sc.read("out/test_pyscenic_output.loom", validate=False)

In [22]:
adata

AnnData object with n_obs × n_vars = 10280 × 20292
    obs: 'RegulonsAUC', 'nGene', 'nUMI'
    var: 'Regulons'

In [15]:
# cell annotations from the loom column attributes:
cellAnnot = pd.concat(
    [
        pd.DataFrame( lf.ca.Celltype_Garnett, index=lf.ca.CellID ),
        pd.DataFrame( lf.ca.ClusterID, index=lf.ca.CellID ),
        pd.DataFrame( lf.ca.Louvain_clusters_Scanpy, index=lf.ca.CellID ),
        pd.DataFrame( lf.ca.Percent_mito, index=lf.ca.CellID ),
        pd.DataFrame( lf.ca.nGene, index=lf.ca.CellID ),
        pd.DataFrame( lf.ca.nUMI, index=lf.ca.CellID ),
    ],
    axis=1
)
cellAnnot.columns = [
 'Celltype_Garnett',
 'ClusterID',
 'Louvain_clusters_Scanpy',
 'Percent_mito',
 'nGene',
 'nUMI']

AttributeError: '<class 'loompy.attribute_manager.AttributeManager'>' object has no attribute 'Celltype_Garnett'

In [6]:
import umap
from MulticoreTSNE import MulticoreTSNE as TSNE
# UMAP
runUmap = umap.UMAP(n_neighbors=10, min_dist=0.4, metric='correlation').fit_transform
dr_umap = runUmap( auc_mtx )
pd.DataFrame(dr_umap, columns=['X', 'Y'], index=auc_mtx.index).to_csv( "out/test_scenic_umap.txt", sep='\t')

In [7]:
# tSNE
tsne = TSNE( n_jobs=16 )
dr_tsne = tsne.fit_transform( auc_mtx )
pd.DataFrame(dr_tsne, columns=['X', 'Y'], index=auc_mtx.index).to_csv( "out/test_scenic_tsne.txt", sep='\t')

## Part 2
Description

# Scratch
Place for old or testing code

# References