In [10]:
import os, sys
import argparse
import numpy as np
from pathlib import Path

import matplotlib.pyplot as plt
import seaborn as sns
import time
import torch
from scipy.stats import mode

from tensorboardX import SummaryWriter
import imageio
import cv2
from sklearn.decomposition import PCA, FastICA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import Birch

### Args:
features_file = "/Users/andreachatrian/Documents/Repositories/ProstateCancer/Results/feats_epoch_.411_loss_0.20396_acc_0.95000_dice_0.93000_lr_0.0000103566.txt"
thumbnails_file = "/Users/andreachatrian/Documents/Repositories/ProstateCancer/Results/thumbnails_epoch_.411_loss_0.20396_acc_0.95000_dice_0.93000_lr_0.0000103566.npy"
sprite_file = "/Users/andreachatrian/Documents/Repositories/ProstateCancer/Results/sprite_epoch_.411_loss_0.20396_acc_0.95000_dice_0.93000_lr_0.0000103566.png"
thumbnail_size = 64
pca_reduce = 2/3
###

def create_sprite_image(images):
    """Returns a sprite image consisting of images passed as argument. Images should be count x width x height"""
    if isinstance(images, list):
        images = np.array(images)
    img_h = images.shape[1]
    img_w = images.shape[2]
    n_plots = int(np.ceil(np.sqrt(images.shape[0])))

    spriteimage = np.ones((img_h * n_plots, img_w * n_plots, 3))

    for i in range(n_plots):
        for j in range(n_plots):
            this_filter = i * n_plots + j
            if this_filter < images.shape[0]:
                this_img = images[this_filter]
                spriteimage[i * img_h:(i + 1) * img_h,
                j * img_w:(j + 1) * img_w] = this_img

    return spriteimage

# Preprocess

In [None]:
### Preprocessing and visualization ###
run_time = time.time()
with open(features_file, 'r') as feats_file:
    X = np.loadtxt(feats_file, skiprows=1)  # need to skip header row (?)
    #X = X[0:-(170 * 4 + 1), ...] # test glands are repeated
run_time = time.time() - run_time
print("X's shape is ", X.shape)
print("Run in {}s".format(run_time))

In [None]:
print(X[0:3, -4:]) # check colour and size

# Embedding

In [None]:
# Check X:
good = np.any(np.isfinite(X))
print(good)

In [None]:
# Preprocessing
X = StandardScaler().fit_transform(X)

In [7]:
# For t-SNE, PCA is recommended as a first step to reduce dimensionality
run_time = time.time()
pca = PCA(n_components=int(X.shape[1]*pca_reduce), whiten=True)  # reduce number of features
#dec = FastICA(n_components=int(X.shape[1]*pca_reduce), whiten=True, max_iter=200)
# !!! it is possible to generative negative eigenvalue and make the algorithm fail with large matrix that has determinant close to zero
# for ICA, should reduce the number of comoponents to number (rule of thumb, as many as 99% variance explanation in PCA)
pca = pca.fit(X)
X_new = pca.transform(X)
run_time = time.time() - run_time
print("X_new's shape is ", X_new.shape)
print("Run in {}s".format(run_time))

KeyboardInterrupt: 

In [None]:
sns.set(style="darkgrid")
plt.plot(pca.explained_variance_ratio_)
print(pca.explained_variance_ratio_)

cum_var = np.cumsum(pca.explained_variance_ratio_)
plt.plot(cum_var)
ninetynine = np.where(cum_var > 0.99)[0][0]
print(ninetynine)

In [23]:
# For t-SNE, PCA is recommended as a first step to reduce dimensionality
run_time = time.time()
dec = FastICA(n_components=ninetynine, whiten=True, max_iter=400)
# !!! it is possible to generative negative eigenvalue and make the algorithm fail with large matrix that has determinant close to zero
# for ICA, should reduce the number of comoponents to number (rule of thumb, as many as 99% variance explanation in PCA)
dec = dec.fit(X)
X_new = dec.transform(X)
run_time = time.time() - run_time
print("X_new's shape is ", X_new.shape)
print("Run in {}s".format(run_time))

X_new's shape is  (5267, 1373)
Run in 596.3380219936371s


In [24]:
# Open thumbnails file
with open(thumbnails_file, 'rb') as tnbs_file:
    thumbnails = np.load(tnbs_file)
    
sprite = create_sprite_image(thumbnails.astype(np.uint8).clip(0,255))

In [25]:
# t-SNE with tensorboard
log_dir = Path(features_file).parent/"projector"
try:
    os.mkdir(log_dir)
except FileExistsError:
    pass
writer = SummaryWriter(log_dir=str(log_dir))

In [26]:
X_t = torch.from_numpy(X_new)
thumbnails_t = torch.from_numpy(thumbnails).permute(0,3,1,2).type(torch.float32)  # turn to NCHW for tensorboardX embedding
print(X_t.shape)
print(thumbnails_t.shape)
writer.add_embedding(X_t, label_img=thumbnails_t)

torch.Size([5267, 1373])
torch.Size([5267, 3, 100, 100])


In [None]:
print(thumbnails.shape)
print(np.unique(thumbnails))


In [None]:
"""
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector


tf_X = tf.Variable(X_new, trainable=False)
saver = tf.train.Saver([tf_X])

# based on https://www.easy-tensorflow.com/tf-tutorials/tensorboard/tb-embedding-visualization
# http://www.pinchofintelligence.com/simple-introduction-to-tensorboard-embedding-visualisation/

th_size = (100,) * 2
with tf.Session() as sess:

    sess.run(tf_X.initializer)

    writer = tf.summary.FileWriter(str(log_dir /"projector"), sess.graph)

    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = 'embeddingding:0'
    embedding.sprite.image_path = str(log_dir/"projector"/sprite_file)
    projector.visualize_embeddings(summary_writer=writer, config=config)
    
    dir(embedding.sprite)
import inspect
inspect.getmembers(embedding.sprite)

"""

# Clustering

In [None]:
### Clustering
birch = Birch(threshold=0.5, branching_factor=50, n_clusters=3)
birch.fit(X_new)
labels = birch.predict(X_new)


In [None]:
import sys
sys.executable