In [None]:
import numpy as np
from numpy import linalg as LA
from scipy.sparse import csc_matrix,coo_matrix
from scipy.sparse.linalg import svds, eigs
from sklearn.decomposition import PCA,TruncatedSVD
import pickle
import pandas as pd
from random import randint

import umap
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN, KMeans, AffinityPropagation, MeanShift
from sklearn.preprocessing import MinMaxScaler
import kmapper as km
from kmapper.cover import Cover

import networkx as nx
from community import best_partition # this is not part of networkx

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from matplotlib.colors import ListedColormap
from scipy import ndimage
import imageio
import plotly
import plotly.graph_objs as go

import os

def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]

    return rmin, rmax, cmin, cmax    
def embedding(data,dim):
    projection = mapper.fit_transform(data, projection=umap.UMAP(n_components=dim, n_neighbors=200, 
                                             a=None, angular_rp_forest=False, b=None, init='spectral',
                                           learning_rate=1.0, local_connectivity=1.0, metric='euclidean',
                                           metric_kwds=None, min_dist=0.1, n_epochs=500,
                                           negative_sample_rate=10, random_state=47,
                                           repulsion_strength=1.0, set_op_mix_ratio=0.5, spread=0.25,
                                           target_metric='categorical', target_metric_kwds=None,
                                           target_n_neighbors=-1, target_weight=0.5, transform_queue_size=10.0,
                                           transform_seed=42, verbose=False))
    return projection

In [None]:
# %matplotlib
widths = []
heights = []
target = []

directory = '/home/garner1/Work/dataset/cellImages/Cancer'
for img in os.listdir(directory):
    filename = os.path.join(directory, img)
    img = imageio.imread(filename)
    rmin, rmax, cmin, cmax = bbox(img)
    width = rmax-rmin
    height = cmax-cmin
    widths.append(width)
    heights.append(height)
    target.append(0)

directory = '/home/garner1/Work/dataset/cellImages/Immuno'
for img in os.listdir(directory):
    filename = os.path.join(directory, img)
    img = imageio.imread(filename)
    rmin, rmax, cmin, cmax = bbox(img)
    width = rmax-rmin
    height = cmax-cmin
    widths.append(width)
    heights.append(height)
    target.append(1)

directory = '/home/garner1/Work/dataset/cellImages/Other'
for img in os.listdir(directory):
    filename = os.path.join(directory, img)
    img = imageio.imread(filename)
    rmin, rmax, cmin, cmax = bbox(img)
    width = rmax-rmin
    height = cmax-cmin
    widths.append(width)
    heights.append(height)
    target.append(2)
    
Mwidths = max(widths)
Mheights = max(heights)

nullimg = np.zeros(Mwidths,Mheights)
images = []
directory = '/home/garner1/Work/dataset/cellImages/Cancer'
for img in os.listdir(directory):
    filename = os.path.join(directory, img)
    img = imageio.imread(filename)
    rmin, rmax, cmin, cmax = bbox(img)
    padwidth = int(Mwidths-(rmax-rmin))
    padheight = int(Mheights-(cmax-cmin))
    newimg = np.pad(img[rmin:rmax,cmin:cmax],((0,padwidth),(0,padheight)),'constant', constant_values=(0))
    images.append(newimg)
directory = '/home/garner1/Work/dataset/cellImages/Immuno'
for img in os.listdir(directory):
    filename = os.path.join(directory, img)
    img = imageio.imread(filename)
    rmin, rmax, cmin, cmax = bbox(img)
    padwidth = int(Mwidths-(rmax-rmin))
    padheight = int(Mheights-(cmax-cmin))
    newimg = np.pad(img[rmin:rmax,cmin:cmax],((0,padwidth),(0,padheight)),'constant', constant_values=(0))
    images.append(newimg)
directory = '/home/garner1/Work/dataset/cellImages/Other'
for img in os.listdir(directory):
    filename = os.path.join(directory, img)
    img = imageio.imread(filename)
    rmin, rmax, cmin, cmax = bbox(img)
    padwidth = int(Mwidths-(rmax-rmin))
    padheight = int(Mheights-(cmax-cmin))
    newimg = np.pad(img[rmin:rmax,cmin:cmax],((0,padwidth),(0,padheight)),'constant', constant_values=(0))
    images.append(newimg)

print(len(images))

# sns.set(style='white', context='notebook', rc={'figure.figsize':(168,120)})
# fig, ax_array = plt.subplots(30,20)
# axes = ax_array.flatten()
# for i, ax in enumerate(axes):
#     ax.imshow(images[i], cmap='gray_r')
# plt.setp(axes, xticks=[], yticks=[], frame_on=False)
# plt.tight_layout(h_pad=0.5, w_pad=0.01)

In [None]:
data = np.zeros((Mwidths*Mheights,len(images)))
for ind in range(len(images)): data[:,ind] = images[ind].flatten() # from 2D arrays to 1D arrays
data = data.transpose()

'''Initialize'''
mapper = km.KeplerMapper(verbose=1)

'''UMAP projection'''
projected_data = embedding(data,dim=5)
projected_data = embedding(projected_data,dim=4)
projected_data = embedding(projected_data,dim=3)
'''3D visualization of the UMAP projection'''
# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()
# Configure the trace.
trace = go.Scatter3d(
    x=projected_data[:,0],  # <-- Put your data instead
    y=projected_data[:,1],  # <-- Put your data instead
    z=projected_data[:,2],  # <-- Put your data instead
    mode='markers',
    marker=dict(color=np.asarray(target),size=5, opacity=1)
)
# Configure the layout.
layout = go.Layout(
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)
data = [trace]
plot_figure = go.Figure(data=data, layout=layout)
# Render the plot.
plotly.offline.iplot(plot_figure)
'''UMAP projection'''
projected_data = embedding(projected_data,dim=2)
'''2D visualization of the embedding'''
sns.set(style='white', context='notebook', rc={'figure.figsize':(20,10)})
plt.scatter(projected_data[:, 0], projected_data[:, 1], c=np.asarray(target), cmap='Spectral', s=50)
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(4)-0.5).set_ticks(np.arange(3))
plt.title('UMAP projection of the cell dataset', fontsize=24);

In [None]:
'''Create dictionary called 'graph' with nodes, edges and meta-information'''
n_cubes = 10
graph = mapper.map(projected_data, data,
                   clusterer=KMeans(n_clusters=1,random_state=3471),
                   cover=Cover(n_cubes=n_cubes, perc_overlap=0.3))
# DBSCAN(eps=0.9, min_samples=3)
# KMeans(n_clusters=2,random_state=3471)
# MeanShift(bandwidth=None, seeds=None, bin_seeding=False,min_bin_freq=1, cluster_all=True, n_jobs=-1),
'''Visualize the mapper graph'''
mapper.visualize(graph)

In [None]:
sns.set(style='white', context='notebook', rc={'figure.figsize':(20,10)})
plt.scatter(projected_data[:, 0], projected_data[:, 1], c=np.asarray(target), cmap='Spectral', s=50)
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(4)-0.5).set_ticks(np.arange(3))
plt.title('UMAP projection of the cell dataset', fontsize=24);

In [None]:
# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()

# Configure the trace.
trace = go.Scatter3d(
    x=projected_data[:,0],  # <-- Put your data instead
    y=projected_data[:,1],  # <-- Put your data instead
    z=projected_data[:,2],  # <-- Put your data instead
    mode='markers',
    marker=dict(color=np.asarray(target),size=5, opacity=1)
)

# Configure the layout.
layout = go.Layout(
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)

data = [trace]

plot_figure = go.Figure(data=data, layout=layout)

# Render the plot.
plotly.offline.iplot(plot_figure)

In [None]:
'''Build the network'''
net = km.to_networkx(graph)
print nx.info(net)

In [None]:
'''Create network layout for visualizations'''
spring_pos = nx.spring_layout(net)
plt.figure()
plt.axis("off")
nx.draw_networkx(net, pos = spring_pos, with_labels = False, node_size = 5)

In [None]:
'''Partition the network'''
parts = best_partition(net)
values = [parts.get(node) for node in net.nodes()]
plt.figure()
plt.axis("off")
nx.draw_networkx(net, pos = spring_pos, cmap = plt.get_cmap("jet"), 
                 node_color = values, node_size = 10, with_labels = False)

In [None]:
'''Get all the nodes in a module'''
groups = []
for value in set(parts.values()):
    value_keys = [node for node in parts.keys() if value == parts[node]]
    groups.append(value_keys) 

In [None]:
'''Plot the spatial projection of the modules'''
sns.set(style='white', context='notebook', rc={'figure.figsize':(80,60)})
ind = 0 
for group in range(len(groups)):
    print(group)
    spots = [graph['nodes'][node] for node in groups[group]]
    l = list(set([item for sublist in spots for item in sublist]))
    
    fig, ax_array = plt.subplots(8,11)
    axes = ax_array.flatten()
    for i, ax in enumerate(axes):
        if i < len(l):
            ax.imshow(images[l[i]], cmap='gray_r')
        if i >= len(l):
#             ax.imshow(images[l[-1]], cmap='gray_r')
            continue
    plt.setp(axes, xticks=[], yticks=[], frame_on=True)
    plt.tight_layout(h_pad=0.5, w_pad=0.01)
    plt.savefig(str(group)+'.png')