<a id=top></a>

# Multi-Channel Atlas Analysis

## Table of Contents

----

1. [Preparations](#prep)
2. [PCA & tSNE](#pca)
3. [Covariate bi-graph](#graph)
4. [Tissue Consensus Map](#map)
5. [Archetype Overlay](#archo)

<a id=prep></a>

## 1. Preparations

----

In [None]:
### Import modules

# External, general
from __future__ import division
import os, sys, pickle
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# External, specific
import pandas as pd
import ipywidgets as widgets
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold.t_sne import TSNE
import networkx as nx
import scipy.stats as stats

# Internal
import katachi.utilities.loading as ld
import katachi.utilities.plotting as kp

In [None]:
### Load basic data

# Prep loader
loader = ld.DataLoaderIDR()
loader.find_imports(r"data/experimentA/extracted_measurements/", recurse=True, verbose=True)

# Import shape spaces
fspace_TFOR_pca, prim_IDs, fspace_idx = loader.load_dataset("shape_TFOR_pca_measured.tsv")
fspace_CFOR_pca, _, _ = loader.load_dataset("shape_CFOR_pca_measured.tsv", IDs=prim_IDs)
print "Imported TFOR shape space of shape:", fspace_TFOR_pca.shape
print "Imported CFOR shape space of shape:", fspace_CFOR_pca.shape

# Import TFOR centroid locations
centroids = loader.load_dataset("_other_measurements.tsv", IDs=prim_IDs)[0][:,3:6][:,::-1]
print "Imported TFOR centroids of shape:", centroids.shape

# Import & standardize engineered features
covar_df, _, _ = loader.load_dataset("_other_measurements.tsv", IDs=prim_IDs, force_df=True)
del covar_df['Centroids RAW X']; del covar_df['Centroids RAW Y']; del covar_df['Centroids RAW Z']
covar_names = list(covar_df.columns)
covar_df_z = (covar_df - covar_df.mean()) / covar_df.std()
print "Imported engineered features of shape:", covar_df.shape

In [None]:
### Load atlas data

# Set embedding type for atlas data
embedding_type = 'TFOR'
#embedding_type = 'CFOR'

# Specify channels to load
channels = ['CDMPRtagRFPt', 'mKate2GM130', 'tagRFPtUtrCH', 'b4galT1tagRFPt', 
            'NLStdTomato', 'mKate2rab11', 'mKate2rab5', 'lysotrackerdeepred']

# Load channel data
obs_fspace = {}
atlas_fspace = {}
for channel in channels:
    
    # Load observed channel data (and map to full space)
    obs, obs_IDs, obs_idx = loader.load_dataset(channel+'_'+embedding_type+'_'+'pca_measured.tsv', IDs=prim_IDs)
    obs_full = np.empty((fspace_TFOR_pca.shape[0], obs.shape[1]))
    obs_full[:] = np.nan
    obs_full[np.in1d(prim_IDs, obs_IDs)[fspace_idx]] = obs
    obs_fspace[channel] = obs_full
    
    # Load predicted channel data
    atl, _, _ = loader.load_dataset(channel+'_'+embedding_type+'_'+'pca_predicted.tsv', IDs=prim_IDs)
    atlas_fspace[channel] = atl
    
    # Report
    print channel+":\tobs.shape is", obs.shape, "-- obs_full.shape is", obs_full.shape, "-- atl.shape is", atl.shape

In [None]:
### Load archetype data

# Source fspace
#archetype_type = 'TFOR'
archetype_type = 'CFOR'

# Load archetype predictions
archetype_classes, _, _ = loader.load_dataset("_archetype_"+archetype_type+"_classifications.tsv", 
                                              IDs=prim_IDs)
print "Imported archetype classifications of shape:", archetype_classes.shape
archetype_probas, _, _  = loader.load_dataset("_archetype_"+archetype_type+"_probabilities.tsv", 
                                              IDs=prim_IDs)
print "Imported archetype probabilities of shape:", archetype_probas.shape

# PCA of archetype space
archetype_pca = PCA().fit_transform(archetype_probas)

# Archetype annotation and visualization
archetype_decodedict = {0 : 'unclassified', 1 : 'central', 2 : 'peri',
                        3 : 'inter', 4 : 'leader'}
archetype_encodedict = {name:key for key,name in archetype_decodedict.iteritems()}
archetype_colors = {0 : 'lightgray', 1 : 'royalblue', 2 : 'limegreen',
                    3 : 'cyan', 4 : 'orangered'}

<a id=pca></a>

## 3. PCA & tSNE

----

In [None]:
### Plot PCA

# Set interactions
from ipywidgets import interact
@interact(channel=atlas_fspace.keys(),
          PCx=(1, atlas_fspace[atlas_fspace.keys()[0]].shape[1], 1),
          PCy=(1, atlas_fspace[atlas_fspace.keys()[0]].shape[1], 1),
          highlight_obs=False)

# Show 
def show_PCs(channel='mKate2GM130', PCx=1, PCy=2, highlight_obs=False): 
    
    # Prep
    plt.figure(figsize=(9,7))

    # Plot
    rnd = np.random.permutation(np.arange(atlas_fspace[channel].shape[0]))
    if not highlight_obs:
        plt.scatter(atlas_fspace[channel][rnd,PCx-1], atlas_fspace[channel][rnd,PCy-1],
                    c=fspace_idx[rnd], cmap=plt.cm.plasma,
                    s=10, edgecolor='', alpha=0.5)
    else:
        plt.scatter(atlas_fspace[channel][rnd,PCx-1], atlas_fspace[channel][rnd,PCy-1],
                    c='gray', s=10, edgecolor='', alpha=0.5, label='predicted')
        plt.scatter(obs_fspace[channel][rnd,PCx-1], obs_fspace[channel][rnd,PCy-1],
                    c='darkcyan', s=10, edgecolor='', label='observed')

    # Cosmetics 
    if not highlight_obs:
        cbar = plt.colorbar()
        cbar.set_label('sample', rotation=270, labelpad=15)
    else:
        plt.legend(frameon=False)
    plt.xlabel("PC "+str(PCx))
    plt.ylabel("PC "+str(PCy))
    plt.title(channel)
    plt.show()

<a id=graph></a>

## 3. Covariate Bi-Graph

----

In [None]:
### Compute covariate correlations

# USER INPUT: Parameters
channel1     = 'covars'       # Upper channel
channel2     = 'mKate2GM130'  # Lower channel
num_PCs      = 8              # Number of PCs to include
corr_measure = 'pearsonr'     # Correlation measure to use (currently only pearsonr is available!)
threshold    = 0.30           # Threshold to include a correlation as relevant

# Select the datasets to compare
if channel1 == 'covars':
    ch1_data = covar_df_z
elif channel1 == 'TFOR':
    ch1_data = pd.DataFrame(fspace_TFOR_pca[:,:num_PCs])
elif channel1 == 'CFOR':
    ch1_data = pd.DataFrame(fspace_CFOR_pca[:,:num_PCs])
else:
    ch1_data = pd.DataFrame(StandardScaler().fit_transform(atlas_fspace[channel1][:,:num_PCs]))
ch2_data = pd.DataFrame(StandardScaler().fit_transform(atlas_fspace[channel2][:,:num_PCs]))
ch1_names = covar_names if channel1=='covars' else [str(num) for num in range(1, num_PCs+1)]
    
# Compute correlations
print "Preparing correlation data..."
if corr_measure == 'pearsonr':
    dists = ch1_data.expanding(axis=1).corr(ch2_data, pairwise=True).iloc[-1, :, :].get_values()
else:
    raise NotImplementedError()

# Generate the plot
print "Generating bigraph...\n"
kp.covar_pc_bigraph(dists, threshold, ch1_names, 
                    height=0.6, verbose=True, show=False)

# Done
plt.show()

<a id=map></a>

## 4. Tissue Consensus Map

----

In [None]:
### Plot backmapping

# PARAMETERS
PC = 4
interesting = 'leaders'
save_pdf = False
### Contour plot backmapping plot for publication

# Set interactions
@widgets.interact(channel=channels+['TFOR','CFOR'],
                  standardized=['no','z'],
                  PC=(1, 20, 1))

# Plot
def contour_backmap(channel='mKate2GM130',
                    standardized='no', PC=1):  

    # Settings
    xlim = (-130, 8)
    ylim = ( -19, 19)

    # Select data
    if channel=='TFOR':
        plot_values = fspace_TFOR_pca[:,PC-1]
    elif channel=='CFOR':
        plot_values = fspace_CFOR_pca[:,PC-1]
    else:
        plot_values = atlas_fspace[channel][:,PC-1]
    
    # Standardization
    if standardized=='z':
        plot_values = StandardScaler().fit_transform(plot_values[:,np.newaxis]).squeeze()

    # Tools for smoothing on scatter
    from katachi.utilities.pcl_helpers import pcl_gaussian_smooth
    from scipy.spatial.distance import pdist, squareform

    # Cut off at prim contour outline
    kernel_prim = stats.gaussian_kde(centroids[:,1:].T)
    f_prim = kernel_prim(centroids[:,1:].T)
    f_prim_mask = f_prim > f_prim.min() + (f_prim.max()-f_prim.min())*0.1
    plot_values    = plot_values[f_prim_mask]
    plot_centroids = centroids[f_prim_mask]

    # Smoothen
    pdists = squareform(pdist(plot_centroids[:,1:]))
    plot_values = pcl_gaussian_smooth(pdists, plot_values[:,np.newaxis], sg_percentile=0.5)[:,0]

    # Initialize figure
    fig, ax = plt.subplots(1, figsize=(8, 3.25))

    # Contourf plot
    cfset = ax.tricontourf(plot_centroids[:,2], plot_centroids[:,1], plot_values, 20, 
                           cmap='plasma')

    # Illustrative centroids from a single prim
    plt.scatter(centroids[fspace_idx==prim_IDs.index(prim_IDs[0]), 2], 
                centroids[fspace_idx==prim_IDs.index(prim_IDs[0]), 1],
                c='', alpha=0.5)

    # Cosmetics
    ax.set_xlabel('TFOR x', fontsize=16)
    ax.set_ylabel('TFOR y', fontsize=16)
    plt.tick_params(axis='both', which='major', labelsize=13)
    plt.xlim(xlim); plt.ylim(ylim)
    ax.invert_yaxis()  # To match images

    # Colorbar
    cbar = plt.colorbar(cfset, ax=ax, pad=0.01)
    cbar.set_label(channel+' (PC'+str(PC)+')', rotation=270, labelpad=10, fontsize=16)
    cbar.ax.tick_params(labelsize=13)

    # Done
    plt.tight_layout()
    plt.show()

<a id=archov></a>

## 5. Archetype Overlay

----

In [None]:
### Plot archetype overlay

# Set interactions
@interact(channel=atlas_fspace.keys(),
          dim=(1, atlas_fspace[atlas_fspace.keys()[0]].shape[1], 1), 
          PCx=(1, archetype_pca.shape[1], 1),
          PCy=(1, archetype_pca.shape[1], 1),
          vmax_factor=(0.0, 1.0, 0.1),
          show_classes=False)

# Show 
def show_proba_fspace_overlay(channel='mKate2GM130', dim=1, PCx=1, PCy=2, 
                              vmax_factor=1.0, show_classes=False): 
    
    # Prep plot
    plt.figure(figsize=(9,7))

    # Grab overlay data
    overlay_data = atlas_fspace[channel][:, dim-1]
    
    # Create scatter
    if not show_classes:
        scat = plt.scatter(archetype_pca[:, PCx-1], archetype_pca[:, PCy-1],
                           color=overlay_data, cmap='viridis',
                           vmax=vmax_factor*np.max(overlay_data),
                           edgecolor='', s=10, alpha=0.7)
        
    # Show the archetypes instead (for reference)
    if show_classes:
        for key in archetype_decodedict.keys():
            mask = archetype_classes==key
            if np.any(mask):
                scat = plt.scatter(archetype_pca[mask, PCx-1], archetype_pca[mask, PCy-1],
                                   color=archetype_colors[key], edgecolor='', 
                                   s=10, alpha=0.5, label=archetype_decodedict[key])
        plt.legend(frameon=False, fontsize=16)

    # Cosmetics  
    plt.xlabel("PC "+str(PCx), fontsize=18)
    plt.ylabel("PC "+str(PCy), fontsize=18)
    if not show_classes: plt.title(channel + ', dim '+str(dim), fontsize=20)
    plt.gca().tick_params(axis='both', which='major', labelsize=16)
    plt.tight_layout()
    
    # Show
    plt.show()

In [None]:
### Archetype overlay panel

# Prep
fig, ax = plt.subplots(1, 4, figsize=(12,3), sharex=True, sharey=True)
PCx = 1; PCy=2

# Plot archetypes
for key in archetype_decodedict.keys()[::-1]:
    mask = archetype_classes==key
    if np.any(mask):
        scat = ax[0].scatter(archetype_pca[mask, PCx-1], archetype_pca[mask, PCy-1],
                             color=archetype_colors[key], edgecolor='', s=7, alpha=0.5, 
                             label=archetype_decodedict[key].replace('Cells','').replace('Rosette',''))
        
# Add archetype legend
legend = ax[0].legend(frameon=False, fontsize=9)
for i in range(len(legend.legendHandles)):
    legend.legendHandles[i]._sizes = [30 for s in legend.legendHandles[i]._sizes]

# Plot GM130 TFOR PC 3
channel = 'mKate2GM130'
dim = 3
vmax_factor=1.0
overlay_data = atlas_fspace[channel][:, dim-1]
scat = ax[1].scatter(archetype_pca[:, PCx-1], archetype_pca[:, PCy-1],
                     color=overlay_data, cmap='viridis',
                     vmax=vmax_factor*np.max(overlay_data),
                     edgecolor='', s=7, alpha=0.7)

# Plot UtrCH TFOR PC 3
channel = 'tagRFPtUtrCH'
dim = 3
vmax_factor=1.0
overlay_data = atlas_fspace[channel][:, dim-1]
scat = ax[2].scatter(archetype_pca[:, PCx-1], archetype_pca[:, PCy-1],
                     color=overlay_data, cmap='viridis',
                     vmax=vmax_factor*np.max(overlay_data),
                     edgecolor='', s=7, alpha=0.7)

# Plot NLS TFOR PC 3
channel = 'NLStdTomato'
dim = 3
vmax_factor=1.0
overlay_data = atlas_fspace[channel][:, dim-1]
scat = ax[3].scatter(archetype_pca[:, PCx-1], archetype_pca[:, PCy-1],
                     color=overlay_data, cmap='viridis',
                     vmax=vmax_factor*np.max(overlay_data),
                     edgecolor='', s=7, alpha=0.7)

  
# Cosmetics
for axx in ax.flatten():
    axx.set_xticks(np.arange(-0.5, 1.1, 0.5))
    axx.set_yticks(np.arange(-0.5, 1.1, 0.5))
    axx.set_xlim([-0.7, 0.9]); axx.set_ylim([-0.5, 1.0])
    axx.tick_params(axis='both', which='major', labelsize=14)
plt.tight_layout()

# Show
plt.show()

In [None]:
### Archetype overlay panel for publication

# Prep
fig, ax = plt.subplots(2, 2, figsize=(12,12), sharex=True, sharey=True)
PCx = 1; PCy=2

# Plot TFOR-PC1
dim = 1
vmax_factor=1.0
scat = ax[0,0].scatter(archetype_pca[:, PCx-1], archetype_pca[:, PCy-1],
                       color=fspace_TFOR_pca[:, dim-1], cmap='viridis',
                       vmax=vmax_factor*np.max(fspace_TFOR_pca[:, dim-1]),
                       edgecolor='', s=15, alpha=0.7)

# Plot TFOR-PC3
dim = 3
vmax_factor=1.0
scat = ax[0,1].scatter(archetype_pca[:, PCx-1], archetype_pca[:, PCy-1],
                       color=fspace_TFOR_pca[:, dim-1], cmap='viridis',
                       vmax=vmax_factor*np.max(fspace_TFOR_pca[:, dim-1]),
                       edgecolor='', s=15, alpha=0.7)

# Plot UtrCH TFOR-PC3
channel = 'tagRFPtUtrCH'
dim = 3
vmax_factor=1.0
overlay_data = atlas_fspace[channel][:, dim-1]
scat = ax[1,0].scatter(archetype_pca[:, PCx-1], archetype_pca[:, PCy-1],
                       color=overlay_data, cmap='viridis',
                       vmax=vmax_factor*np.max(overlay_data),
                       edgecolor='', s=15, alpha=0.7)

# Plot CFOR-PC1
dim = 1
vmax_factor=1.0
scat = ax[1,1].scatter(archetype_pca[:, PCx-1], archetype_pca[:, PCy-1],
                       color=fspace_CFOR_pca[:, dim-1], cmap='viridis',
                       vmax=vmax_factor*np.max(fspace_CFOR_pca[:, dim-1]),
                       edgecolor='', s=15, alpha=0.7)

# Cosmetics
for axx in ax.flatten():
    axx.set_xticks(np.arange(-0.5, 1.1, 0.5))
    axx.set_yticks(np.arange(-0.5, 1.1, 0.5))
    axx.set_xlim([-0.7, 0.9]); axx.set_ylim([-0.5, 1.0])
    axx.tick_params(axis='both', which='major', labelsize=24)
plt.tight_layout()

# Show
plt.show()

----
[back to top](#top)