## Notebook Setup

In [1]:
import os, glob

import numpy as np
import pandas as pd

import singlecell_dash.common as common

import seaborn as sns

from tissue_analysis import *
from singlecell_dash.network_util import plot_clustering
import singlecell_dash.network_util as nutil

In [2]:
%matplotlib inline

## Load data

In [3]:
# data folder (change to match your local environment)
data_folder = '/data1/maca_deployment'

In [4]:
tissue = 'Bladder'

In [5]:
tenx,exp,knn_cache = load_tissue(data_folder, tissue)

Reading /data1/maca_deployment/10x_data/10X_P4_3/10X_P4_3.mus.cell-gene.npz ...
Reading /data1/maca_deployment/10x_data/10X_P4_4/10X_P4_4.mus.cell-gene.npz ...
Reading /data1/maca_deployment/10x_data/10X_P6_3/10X_P6_3.mus.cell-gene.npz ...
Reading /data1/maca_deployment/10x_data/10X_P6_4/10X_P6_4.mus.cell-gene.npz ...
Reading /data1/maca_deployment/10x_data/10X_P7_7/10X_P7_7.mus.cell-gene.npz ...


In [None]:
# If old cache is bogus
knn_cache = nutil.KNNCache(tenx.genes.matrix, max_k=500)

## Clustering and layout

In [None]:
coords, Z = cluster_tissue(exp, knn_cache, 25)

In [None]:
# plot and color by cluster
plot_clustering(coords, Z, color_by=coords['cluster'], discrete_data=True)

In [None]:
# plot and color by a gene
plot_clustering(exp.join(coords), Z, color_by=np.log2(exp['Krt19'] + 1), discrete_data=False)

In [None]:
# calculate all differential expressions across the tree
de_dict = diff_exp_clusters(Z, exp, coords['cluster'], verbose=False)

# printing out all the calculate comparisons
print('\n\nComparison keys:')
print(de_dict.keys())

## Make a joyplot

In [None]:
plot_gene_joyplot(exp, coords, 'Krt19', Z)

## Compare Clusterings

In [None]:
# Load annotations
annotations = pd.DataFrame(index=exp.index)
for fn in glob.glob(data_folder + '/annotations/' + tissue + '/*'):
    print(fn)
    annos = pd.read_csv(fn, index_col=0)
    prefix = os.path.basename(fn).split('.')[0]
    annos.columns = [prefix + '_' + name for name in annos.columns]
    annotations = annotations.join(annos)

In [None]:
annotations.head()

In [None]:
ax = sns.heatmap(np.log10(pd.crosstab(annotations['10x-release-smushed_cluster'], coords['cluster'])+1))

## Subset and Re-analyze

In [None]:
# define filters as dictionary of column_name: [list of values]
filters = {'Sex': ['F']}
# subset the expression and knn_cache given the filters
exp_subset,knn_subset = subset_exp(tenx, exp, filters, knn_cache=knn_cache)
print('expression subset: ', exp_subset.shape)
print(knn_subset)

In [None]:
# try a new k
subset_coords, subset_Z = cluster_tissue(exp_subset, knn_subset, 10)
plot_clustering(subset_coords, subset_Z, color_by=subset_coords['cluster'])

In [None]:
gene = 'Krt19'
plot_clustering(subset_coords, subset_Z, color_by=np.log2(exp_subset[gene] + 1), discrete_data=False)

In [None]:
# make a new joyplot
gene = 'Krt19'
plot_gene_joyplot(exp_subset, subset_coords, gene, subset_Z)

In [None]:
# define filters as dictionary of column_name: [list of values]
samples = annotations.index[annotations['10x-release-smushed_cluster'] == 1]
# subset the expression and knn_cache given the filters
exp_subset,knn_subset = subset_exp(tenx, exp, samples=samples, knn_cache=knn_cache)
print('expression subset: ', exp_subset.shape)
print(knn_subset)

In [None]:
# try a new k
subset_coords, subset_Z = cluster_tissue(exp_subset, knn_subset, 20)
plot_clustering(subset_coords, subset_Z, color_by=subset_coords['cluster'])

In [None]:
# calculate all differential expressions across the tree
de_dict = diff_exp_clusters(subset_Z, exp_subset, subset_coords['cluster'], verbose=False)

# printing out all the calculate comparisons
print('\n\nComparison keys:')
print(de_dict.keys())