<a id=top></a>

# Basic Analysis of ISLA-CBE Embeddings

## Table of Contents

----

1. [Preparations](#prep)
2. [PCA Analysis](#pca)
3. [tSNE Analysis](#tsne)
4. [Tissue Consensus Map](#tcmap)

<a id=prep></a>

## 1. Preparations

----

In [None]:
### Import modules

# External, general
from __future__ import division
import os, sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# External, specific
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
from scipy.spatial import cKDTree
from sklearn.manifold.t_sne import TSNE
import scipy.stats as stats

# Internal
import katachi.utilities.loading as ld
import katachi.utilities.plotting as kp

In [None]:
### Load data

# Prep loader
loader = ld.DataLoaderIDR()
loader.find_imports(r"data/experimentA/extracted_measurements/", recurse=True, verbose=True)

# Import feature space
dataset_suffix = "shape_TFOR_raw_measured.tsv"
#dataset_suffix = "shape_CFOR_raw_measured.tsv"
#dataset_suffix = "tagRFPtUtrCH_TFOR_raw_measured.tsv"
#dataset_suffix = "mKate2GM130_TFOR_raw_measured.tsv"
fspace, prim_IDs, fspace_idx = loader.load_dataset(dataset_suffix)
print "Imported feature space of shape:", fspace.shape

# Import TFOR centroid locations
centroids = loader.load_dataset("_other_measurements.tsv", IDs=prim_IDs)[0][:,3:6][:,::-1]
print "Imported TFOR centroids of shape:", centroids.shape

# OPTIONAL: Import TFOR landmarks for point cloud-based visualizations
# WARNING: This data is not available on the IDR, so visualizations that
#          require it are skipped so long as `use_landmarks = False`.
#          To generate the data from the images/segmentations themselves, 
#          run `RUN_Initialization.ipynb` and `RUN_FeatureEmbedding.ipynb`.
#          Note that the latter takes a long time and significant resources.
use_landmarks = False
if use_landmarks:
    loader_lms = ld.DataLoader("data/experimentA/image_data/", recurse=True, verbose=True)
    lms, _, _ = loader_lms.load_dataset("seg_LMs_TFOR_kmeansPRES.npy", IDs=prim_IDs)
    #lms, _, _ = loader_lms.load_dataset("tagRFPtUtrCH_LMs_TFOR_kmeansPRES.npy", IDs=prim_IDs)
    #lms, _, _ = loader_lms.load_dataset("mKate2GM130_LMs_TFOR_kmeansPRES.npy", IDs=prim_IDs)
    print "Imported landmarks of shape:", lms.shape

In [None]:
### Standardize feature space

# To zero mean and unit variance
print 'Before:\n  Means:', fspace.mean(axis=0)[:3], '\n  Stds: ', fspace.std(axis=0)[:3]
fspace_z = (fspace - fspace.mean(axis=0)) / fspace.std(axis=0)
print 'After:\n  Means:', fspace_z.mean(axis=0)[:3], '\n  Stds: ', fspace_z.std(axis=0)[:3]

In [None]:
### Show imported data as boxplots

# Prep
fig,ax = plt.subplots(1,2,figsize=(12,3))

# Before standardization
ax[0].boxplot(fspace)
ax[0].set_title("Boxplot of Shape Space [raw]")
ax[0].set_xlabel("Features")

# After standardization
ax[1].boxplot(fspace_z)
ax[1].set_title("Boxplot of Shape Space [standardized]")
ax[1].set_xlabel("Features")

# Done
plt.show()

<a id=pca></a>

## 2. PCA Analysis

----

#### Basics

In [None]:
### Perform PCA

# Fit & transform PCA
pca = PCA()
fspace_pca = pca.fit_transform(fspace_z)

# Report
num_PCs = len(pca.components_)
print "N samples:", fspace_z.shape[0]
print "N PCs:    ", num_PCs

# For publication: Invert PCs to make them easier to discuss
inversion_vector = np.ones(fspace_pca.shape[1])
if 'shape' in dataset_suffix and 'TFOR' in dataset_suffix:
    inversion_vector[[0,2,4,5]] = -1    
if 'shape' in dataset_suffix and 'CFOR' in dataset_suffix:
    inversion_vector[0] = -1
fspace_pca = fspace_pca * inversion_vector

In [None]:
### Plot explained variance ratio

# Make plot
expl_var = pca.explained_variance_ratio_
plt.figure(figsize=(4,2))
plt.plot(np.arange(1,num_PCs+1), expl_var, '.-')

# Cosmetics
plt.xticks(np.arange(0, num_PCs+1, 1))
plt.xlim([0.8,20.2])
plt.xlabel('PCs')
plt.ylabel('Explained variance ratio')
plt.show()

In [None]:
### Interactive 2D visualization of PC space

# Set interactions
from ipywidgets import interact
@interact(show_all=True,
          prim_ID=prim_IDs,
          PCx=(1, fspace_pca.shape[1], 1),
          PCy=(1, fspace_pca.shape[1], 1))

# Show 
def show_PCs(show_all=True, prim_ID=prim_IDs[0], PCx=1, PCy=2): 
    
    # Prep
    plt.figure(figsize=(8,6))
    
    # If all should be shown...
    if show_all:
        
        # Plot
        plt.scatter(fspace_pca[:,PCx-1], fspace_pca[:,PCy-1],
                    c=fspace_idx, cmap=plt.cm.plasma,
                    s=10, edgecolor='')
    
        # Cosmetics  
        cbar = plt.colorbar()
        cbar.set_label('prim', rotation=270, labelpad=15)
        plt.xlabel("PC "+str(PCx))
        plt.ylabel("PC "+str(PCy))
        plt.title("PCA-Transformed Shape Space [All Prims]")
        plt.show()
        
    # If individual prims should be shown...
    else:
        
        # Plot
        plt.scatter(fspace_pca[fspace_idx==prim_IDs.index(prim_ID), PCx-1], 
                    fspace_pca[fspace_idx==prim_IDs.index(prim_ID), PCy-1],
                    c=[prim_IDs.index(prim_ID) for _ 
                       in range(np.sum(fspace_idx==prim_IDs.index(prim_ID)))],
                    cmap=plt.cm.plasma, s=10, edgecolor='',
                    vmin=0, vmax=len(prim_IDs))
        
        # Cosmetics
        cbar = plt.colorbar()
        cbar.set_label('prim', rotation=270, labelpad=15)
        plt.xlabel("PC "+str(PCx))
        plt.ylabel("PC "+str(PCy))
        plt.title("PCA-Transformed Shape Space [prim "+prim_ID+"]")
        plt.show()

#### Optional: Interactive Selection of Point Cloud Display

In [None]:
### Visualization of cells in PC space: prep
if use_landmarks:

    # Define PCA axes to look at
    x_pca, y_pca = (0, 1)

    # Function to find nearby clicked point
    kdTree = cKDTree(fspace_pca[:,(x_pca, y_pca)])
    def get_close_point(xdata, ydata, min_dist=0.5):
        distance,index = kdTree.query([xdata,ydata], k=1, eps=0)
        if distance < min_dist:
            return index
        else:
            return None

In [None]:
### Visualization of cells in PC space: prep
if use_landmarks:

    # Note: Due to the switch to the pylab notebook backend, this cell may need to be
    #       executed twice for the plot to display!

    # Switch to notebook backend
    get_ipython().magic('pylab notebook')

    # Initial plot
    fig = plt.figure(figsize=(13,4))
    ax = [fig.add_subplot(1, 2, 1), fig.add_subplot(1, 2, 2, projection='3d')]
    scat = ax[0].scatter(fspace_pca[:,x_pca],
                         fspace_pca[:,y_pca],
                         c=fspace_idx, cmap=plt.cm.plasma,
                         s=10, edgecolor='')
    ax[0].set_xlabel("PC "+str(x_pca+1))
    ax[0].set_ylabel("PC "+str(y_pca+1))
    cbar = plt.colorbar(scat, ax=ax[0])
    cbar.set_label('prim', rotation=270, labelpad=15)

    # HTML widgets for printing
    # Note: this is needed because jupyter would overwrite ordinary prints
    import ipywidgets as widgets
    w = widgets.HTML()
    q = widgets.HTML()

    # Click event function
    def onclick(event):

        # Skip clicks in the 3D subplot
        if event.x > 500:
            return

        # For printing click data
        w.value = 'button=%d, x=%d, y=%d, xdata=%f, ydata=%f'%(
                  event.button, event.x, event.y, event.xdata, event.ydata)

        # Get nearby point in figure
        min_dist = 0.5
        target  = get_close_point(event.xdata, event.ydata, min_dist)
        q.value = 'target_idx=%d' % target

        # If a nearby point has been found...
        if target is not None:

            # Clear and replot the shape space with the selected point in red
            ax[0].cla()
            ax[0].scatter(fspace_pca[:,x_pca],
                          fspace_pca[:,y_pca],
                          c=fspace_idx, cmap=plt.cm.plasma,
                          s=10, edgecolor='')
            ax[0].scatter(fspace_pca[target,x_pca],
                          fspace_pca[target,y_pca],
                          c='r', edgecolor='')
            ax[0].set_xlabel("PC "+str(x_pca+1))
            ax[0].set_ylabel("PC "+str(y_pca+1))

            # Clear and plot the corresponding cell
            ax[1].cla()
            ax[1].scatter(lms[target,:,2], lms[target,:,1], lms[target,:,0],
                          c='b', edgecolor='', cmap=plt.cm.plasma)  

            # Draw the updates
            fig.canvas.draw()

    # Starting the plot 'app'
    cid = fig.canvas.mpl_connect('button_press_event', onclick)

    # Displaying the text
    display(w)
    display(q)

In [None]:
# Return to inline backend
if use_landmarks:
    get_ipython().magic('pylab inline')

#### Optional: Moving Through Dimensions

In [None]:
### Find a sensible path to move through a given target dimension
if use_landmarks:

    # Params
    target_dim    =   0
    relevant_dims =  20

    # Find points along the spectrum of the target dim
    percentiles = [np.percentile(fspace_pca[:,target_dim], p) for p in range(0,101)]
    candidates  = [np.where(  (fspace_pca[:,target_dim] >  p_a)
                            & (fspace_pca[:,target_dim] <= p_b)
                           )[0] for p_a,p_b in zip(percentiles, percentiles[1:])]

    # Select the one with minimum squared distance from zero across all other dims
    all_except_target = list(d for d in np.arange(relevant_dims) if not d==target_dim)
    squared_distances = np.sum(fspace_pca[:,all_except_target]**2.0, axis=1)
    dim_path = [candits[np.argmin(squared_distances[candits])] for candits in candidates]

In [None]:
### Register the point clouds along the path by ICP
if use_landmarks:
    
    # Grab ICP
    from katachi.external.icp import icp_ready as icp

    # Align each cloud on the path to the previous
    # Note: This can be suboptimal if you traverse an area of round-ish cells,
    #       in which case the original alignment orientation is lost...
    registered_clouds = [lms[dim_path[0]]]
    for step in dim_path[1:]:
        reg, _, _ = icp.register_by_icp(lms[step], registered_clouds[-1])
        registered_clouds.append(reg)
    registered_clouds = np.array(registered_clouds)

In [None]:
### Interactive visualization of path point clouds in 3D
if use_landmarks:

    # Note: Due to the switch to the pylab notebook backend, this cell may need to be
    #       executed twice for the plot to display!

    # Switch to notebook backend
    get_ipython().magic('pylab notebook')

    # Choose whether to use registered or unregistered
    path_clouds = registered_clouds

    # Initial plot creation
    fig, ax = kp.point_cloud_3D(path_clouds[0][:,2],
                                   path_clouds[0][:,1], 
                                   path_clouds[0][:,0], 
                                   s=40, c='r',
                                   figsize=(8,8), fin=False)

    from ipywidgets import interact, fixed
    @interact(step=(0,len(dim_path)-1,1),
              ax=fixed(ax), fig=fixed(fig))
    def path_plot(ax, fig, step=0):

        # Get cloud
        step_cloud = path_clouds[step]

        # Plot
        ax.cla()
        fig, ax = kp.point_cloud_3D(step_cloud[:,2], step_cloud[:,1], step_cloud[:,0], 
                                       s=40, c='r', 
                                       init=False, fin=False,
                                       pre_fig=fig, pre_ax=ax)

        # Axis limits
        ax.set_xlim([-8,8])
        ax.set_ylim([-8,8])
        ax.set_zlim([-8,8])

        # Labels
        ax.set_xlabel(r'x [$\mu m$]', fontsize=14)
        ax.set_ylabel(r'y [$\mu m$]', fontsize=14)
        ax.set_zlabel(r'z [$\mu m$]', fontsize=14)
        ax.set_title('PC-Dim='+str(target_dim+1)+' | Step='+str(step))
        plt.tick_params(axis='both', which='major', labelsize=12)

        # Show
        fig.canvas.draw()

In [None]:
# Return to inline backend
if use_landmarks:
    get_ipython().magic('pylab inline')

#### PCA-Plots for Publication

In [None]:
### PC 1 vs PC2

# Params
PCx = 1
PCy = 2

# Prep
plt.figure(figsize=(6,6))

# Plot
scramble = np.random.permutation(np.arange(fspace_pca.shape[0]))
plt.scatter(fspace_pca[scramble,PCx-1], fspace_pca[scramble,PCy-1],
            c=fspace_idx[scramble], cmap=plt.cm.plasma,
            s=6, edgecolor='', alpha=0.8)

# Labels
plt.xticks(range(-10, 11,  5), fontsize=18)  # TFOR
#plt.xticks(range(-30, 21, 10), fontsize=18)  # CFOR
#plt.yticks(range(-10, 11,  5), fontsize=18)  # CFOR
plt.xlabel("PC "+str(PCx), fontsize=19)
plt.ylabel("PC "+str(PCy), fontsize=19, labelpad=-15)
plt.title("Shape Space (TFOR)", fontsize=21, y=1.015)  # TFOR
#plt.title("Shape Space (CFOR)", fontsize=21, y=1.015)  # CFOR

# Axes
plt.xlim([-12, 12]); plt.ylim([-10, 11])  # TFOR
#plt.xlim([-30, 20]); plt.ylim([-10, 11])  # CFOR

# Finalize
plt.tight_layout()
plt.show()

#### Optional: Example 3D Cell Renderings

In [None]:
### Example 3D Cell Renderings
if use_landmarks:

    # Params
    #target =  5817  # High CFOR PC 1
    #target =  6225  # Low CFOR PC 1
    #target =   844  # High CFOR PC 2
    #target =  2867  # Low CFOR PC 2
    #target =  5765  # High TFOR PC 1
    #target =  3586  # Low TFOR PC 1
    #target = 12356  # High TFOR PC 2
    target =  2668  # Low TFOR PC 2

    # Plot
    fig, ax = kp.point_cloud_3D(lms[target,:,2], lms[target,:,1], lms[target,:,0], 
                                   figsize=(4,4), s=40, c='r',
                                   fin=False)

    # Labels
    ax.set_xlabel(r'x [$\mu m$]', fontsize=14)
    ax.set_ylabel(r'y [$\mu m$]', fontsize=14)
    ax.set_zlabel(r'z [$\mu m$]', fontsize=14)
    ax.set_xticks([-8,-4,0,4,8])
    ax.set_yticks([-8,-4,0,4,8])
    ax.set_zticks([-8,-4,0,4,8])
    plt.tick_params(axis='both', which='major', labelsize=12)

    # Axis limits
    ax.set_xlim([-8,8])
    ax.set_ylim([-8,8])
    ax.set_zlim([-8,8])

    # View
    #ax.view_init(azim=330)  # For TFOR PC 1 only

    # Finalize
    #plt.tight_layout()
    plt.show()

<a id=tsne></a>

## 3. tSNE Analysis

----

<font color=orange>**Warning:**</font> Running the tSNE on the full dataset can take a while!

<a id=tcmap></a>

## 4. Tissue Consensus Map

----

In [None]:
### Centroid-based back-mapping

# Axis range (easiest to set manually)
xlim = (-175, 15)
ylim = (- 20, 20)

# Interactive choice of PC
from ipywidgets import interact
@interact(PC=(1, fspace_pca.shape[1],1))
def centroid_backmap(PC=1):  
    
    # Select fspace data
    fspace_plot_data = fspace_pca
    
    # Init
    fig, ax = plt.subplots(1 ,figsize=(12,5))
    
    # Back-mapping plot
    #zord = np.argsort(-fspace_pca[:,PC-1])
    zord = np.arange(len(fspace_pca)); np.random.shuffle(zord)  # Random is better!
    scat = ax.scatter(centroids[zord,2], centroids[zord,1],
                      color=fspace_pca[zord,PC-1], cmap=plt.cm.plasma,
                      edgecolor='', s=15, alpha=0.75)

    # Cosmetics
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.invert_yaxis()  # To match images
    ax.set_xlabel('TFOR x')
    ax.set_ylabel('TFOR y')
    ax.set_title('Centroid Back-Mapping of PC '+str(PC))
    cbar = plt.colorbar(scat,ax=ax)
    cbar.set_label('PC '+str(PC), rotation=270, labelpad=10)
    
    # Done
    plt.show()

In [None]:
### Backmapping with kde
# Derived from /SO/Flabetvibes on SO/questions/30145957

# Axis range (easiest to set manually)
xlim = (-175, 15)
ylim = (- 20, 20)

# Interactive options
from ipywidgets import interact
@interact(PC=(1,fspace_pca.shape[1],1),
          p_thresh=(10,90,10))
def kde_backmap(PC=1, p_thresh=70):
    
    # Prep data
    kde_data = centroids[fspace_pca[:,PC-1] > np.percentile(fspace_pca[:,PC-1], p_thresh), :]
    kde_x = kde_data[:, 2]
    kde_y = kde_data[:, 1]

    # Extents
    xmin, xmax = xlim
    ymin, ymax = ylim

    # Peform the kernel density estimate
    xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
    positions = np.vstack([xx.ravel(), yy.ravel()])
    values = np.vstack([kde_x, kde_y])
    kernel = stats.gaussian_kde(values)
    f = np.reshape(kernel(positions).T, xx.shape)
    
    # Initialize figure
    fig = plt.figure(figsize=(12,5))
    ax = fig.gca()

    # Contourf plot
    cfset = ax.contourf(xx, yy, f, 10, cmap='magma')
    
    # Prim contour outline
    values_prim = np.vstack([centroids[:,2], centroids[:,1]])
    kernel_prim = stats.gaussian_kde(values_prim)
    f_prim = np.reshape(kernel_prim(positions).T, xx.shape)
    levels = [f_prim.min() + (f_prim.max()-f_prim.min())*factor for factor in [0.1]]
    cset_prim = ax.contour(xx, yy, f_prim, levels, colors='w', linestyles='dashed')
    
    # Cosmetics
    plt.title("KDE of PC"+str(PC)+" Centroids at Percentile Threshold "+str(p_thresh))
    plt.xlabel('TFOR x')
    plt.ylabel('TFOR y')
    plt.xlim(xmin, xmax)
    plt.ylim(ymin, ymax)
    ax.invert_yaxis()  # To match images
    cbar = plt.colorbar(cfset, ax=ax, format="%.E")
    cbar.set_label('PC '+str(PC), rotation=270, labelpad=20)
    
    # Done
    plt.show()

In [None]:
### Contour plot backmapping plot for publication

# Settings
PC = 1
xlim = (-130, 8)
ylim = ( -19, 19)

# Get plot values
plot_values = fspace_pca[:,PC-1]

# Tools for smoothing on scatter
from katachi.utilities.pcl_helpers import pcl_gaussian_smooth
from scipy.spatial.distance import pdist, squareform

# Cut off at prim contour outline
kernel_prim = stats.gaussian_kde(centroids[:,1:].T)
f_prim = kernel_prim(centroids[:,1:].T)
f_prim_mask = f_prim > f_prim.min() + (f_prim.max()-f_prim.min())*0.1
plot_values    = plot_values[f_prim_mask]
plot_centroids = centroids[f_prim_mask]

# Smoothen
pdists = squareform(pdist(plot_centroids[:,1:]))
plot_values = pcl_gaussian_smooth(pdists, plot_values[:,np.newaxis], sg_percentile=0.5)[:,0]

# Initialize figure
fig, ax = plt.subplots(1, figsize=(8, 3.25))

# Contourf plot
cfset = ax.tricontourf(plot_centroids[:,2], plot_centroids[:,1], plot_values, 20, 
                       cmap='plasma')

# Illustrative centroids from a single prim
plt.scatter(centroids[fspace_idx==prim_IDs.index(prim_IDs[0]), 2], 
            centroids[fspace_idx==prim_IDs.index(prim_IDs[0]), 1],
            c='', alpha=0.5)

# Cosmetics
ax.set_xlabel('TFOR x', fontsize=16)
ax.set_ylabel('TFOR y', fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=13)
plt.xlim(xlim); plt.ylim(ylim)
ax.invert_yaxis()  # To match images

# Colorbar
cbar = plt.colorbar(cfset, ax=ax, pad=0.01)
cbar.set_label('PC '+str(PC), rotation=270, labelpad=10, fontsize=16)
cbar.ax.tick_params(labelsize=13)

# Done
plt.tight_layout()
plt.show()

----
[back to top](#top)