## Notebook Setup

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import os
import csv

import singlecell_dash.common as common
import singlecell_dash.network_util as nutil

import numpy as np
import pandas as pd
import hermione as hm
import seaborn as sns
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

from tissue_analysis import diff_exp_clusters

import scipy.cluster.hierarchy as sch

In [None]:
# constants (reset for local environment)

DATA_FOLDER = '../maca-dash/data/'

In [None]:
# load in data with appropriate subsets
tenx = common.TenX_Runs(DATA_FOLDER, 
                        filters={'Age': [3], 'Tissue': ['Liver']},
                        verbose=True)
exp = pd.DataFrame(tenx.genes.matrix.todense(), 
                   index=tenx.genes.rows, 
                   columns=tenx.genes.columns)

# name the file based on your filters
knn_cache_file = os.path.join(DATA_FOLDER, 'coords', 'descriptive_file_name.npy')
if not os.path.exists(os.path.dirname(knn_cache_file)):
    os.mkdir(os.path.dirname(knn_cache_file))

In [None]:
# run this to create a knn cache
knn_cache = nutil.KNNCache(tenx.genes.matrix, max_k=200)
np.save(knn_cache_file, knn_cache.knn_array)

In [None]:
# ... or run this if you saved it and want to reload
knn_cache = nutil.KNNCache(knn_cache_file)

## Clustering and layout

In [None]:
knn_graph = knn_cache.get_knn_graph(k=25) # change k here

coords = nutil.network_layout(tenx.genes.rows, knn_graph)
coords['cluster'], Z = nutil.label_propagation(exp, knn_graph)

In [None]:
# plot and color by cluster
nutil.plot_labelprop(coords, Z, color_by=coords['cluster'])

In [None]:
# plot and color by a gene
nutil.plot_labelprop(exp.join(coords), Z, color_by=np.log2(exp['Apoa2'] + 1))

In [None]:
# calculate all differential expressions across the tree
de_dict = diff_exp_clusters(Z, exp, coords['cluster'])
print(de_dict.keys())
de_dict[42,'all']

### More Subsetting

In [None]:
# subset the knn_cache further
exp_subset = exp.loc[tenx.cell_metadata['Sex'] == 'M']

# subset using integer index only
knnc = knn_cache.subset_cache(np.where(exp_subset)[0])
print(knnc) # will automatically determine new max k

subset_graph = knnc.get_knn_graph(k=50)
subset_coords = nutil.network_layout(exp_subset, subset_graph)
subset_coords['cluster'], subset_Z = nutil.label_propagation(exp_subset, subset_graph)

nutil.plot_labelprop(subset_coords, subset_Z)

In [None]:
# joyplots
hm.joyplot(np.log2(exp + 1).join(coords), 'Actb', 'cluster', sch.leaves_list(Z))
plt.show()