In [1]:
import pandas as pd
import matplotlib
matplotlib.use('Qt4Agg')

import matplotlib.pyplot as plt
import numpy as np
from utils import utils

### Merge labels, coordinates and images in same df

In [2]:
df_labels = utils.create_df_from_files(path='data/perfiles_CATA/clases/')
df_labels

1
['AL_08C\n', 'AL_13B\n', 'AL_14B\n', 'BA115_4\n', 'BA172_3\n', 'BA191_2\n', 'BA191_3\n', 'BA191_4\n', 'BA208_2\n', 'BA208_3\n', 'BA42_2\n', 'BA43_3\n', 'BA44_1\n', 'BA45_3\n', 'BA45_4\n', 'CCCATA_9_09\n', 'CCCATA_9_13\n', 'CCCATA_9_14\n', 'GI_anf01\n', 'LP17_1\n', 'MB_01\n', 'MB_03\n', 'TU110_1\n', 'TU110_2\n', 'TU110_3\n', 'TU110_4\n', 'TU13\n', 'TU8_1\n', 'TU8_2\n', 'TU96_2\n']
2
['BA113_1\n', 'BA121_1\n', 'BA135_1\n', 'BA168_1\n', 'BA178_1\n', 'BA178_2\n', 'BA181_1\n', 'BA188_1\n', 'BA206_5\n', 'BA21_1\n', 'BA24_1\n', 'BA36a_1\n', 'BA5_3\n', 'BA82_4\n', 'BO18_1\n', 'BO18_2\n', 'BO19_2\n', 'BO5_1\n', 'CC01_1\n', 'CC01_2\n', 'CC01_3\n', 'CC015_3\n', 'CC017_4\n', 'CC02_3\n', 'CC02_5\n', 'CC02_6\n', 'CC5_617_1\n', 'EL03_4\n', 'ER05\n', 'GU008\n', 'GU041\n', 'LP10_1\n', 'LP11_2\n', 'PO027\n', 'TO1_1\n', 'TO2_1\n', 'TO4_1\n', 'TO4_6\n', 'TO4_7\n', 'TU02\n', 'TU11_1\n', 'TU44_2\n', 'TU44_3\n', 'TU84_2\n', 'TU86_1\n', 'TU96_1\n', 'TU97_4\n']
3
['BA102_2\n', 'BA110_1\n', 'BA116_1\n', 'BA11

Unnamed: 0,id,class
0,AL_08C,1
1,AL_13B,1
2,AL_14B,1
3,BA115_4,1
4,BA172_3,1
5,BA191_2,1
6,BA191_3,1
7,BA191_4,1
8,BA208_2,1
9,BA208_3,1


In [3]:
df = pd.read_csv('data/input_morphoJ.csv')
df['id'] = df['id'].apply(lambda x: x.split('.')[0])

In [4]:
from PIL import Image, ImageOps

def get_np(name, size=32):
    image =  Image.open('data/perfiles_CATA/png_full/' + name + '.png')
    image = image.resize((size, size), Image.ANTIALIAS)
    inverted_image =  1 - np.asarray(image)
    
    return inverted_image.ravel()

In [5]:
data = [[df.iloc[row][0], get_np(df.iloc[row][0])] for row in range(df.shape[0])]
df_images = pd.DataFrame(data=data, columns=['id', 'Image'])

In [15]:
merged_ = pd.merge(df_labels, df, how='inner', on=['id'])
#merged = pd.merge(df_labels, df, how='inner', on=['id'])
merged = pd.merge(df_labels, df_images, how='inner', on=['id'])

### Check # of elements per class

In [7]:
grouped = merged.groupby('class')
for name, group in grouped:
    print(name, group.shape)
merged.shape

1 (30, 3)
2 (47, 3)
3 (75, 3)
4 (10, 3)
5 (56, 3)
6 (52, 3)
7 (50, 3)
8 (293, 3)
9 (22, 3)
10 (372, 3)
11 (125, 3)


(1132, 3)

### Plot t-SNE of raw images

In [16]:
%matplotlib notebook
plt.style.use('ggplot')
import matplotlib.pyplot as plt
from time import time
from utils import utils
from sklearn import manifold

id_ =  merged['id']
class_ = merged['class']
#merged_tsne_2d = merged.drop(['id', 'class'], axis=1)

print("Computing t-SNE embedding of the 11 classes of  vessel")
tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
#t0 = time()
#print(merged.values)
X_tsne = tsne.fit_transform(np.array(merged['Image'].tolist()))
utils.plot_embedding(X_tsne, merged_,
               "t-SNE embedding of the 11 classes of vessels")

plt.grid()
plt.show()


Computing t-SNE embedding of the 11 classes of  vessel


<IPython.core.display.Javascript object>

### Now plot in 3D with images ..

In [17]:
from sklearn import manifold

tsne = manifold.TSNE(n_components=3, init='pca', random_state=0)
X_tsne = tsne.fit_transform(np.array(merged['Image'].tolist()))

In [18]:
#for ii in range(0, 360, 5):
utils.plot_tsne_3D(X_tsne, merged, azim=40)
    

<IPython.core.display.Javascript object>

### PCA

In [19]:
from sklearn.decomposition import PCA # using randomized Singular Value Decomposition

id_ =  merged['id']
class_ = merged['class']
merged = merged.drop(['id', 'class'], axis=1)
Xp = PCA(svd_solver='randomized', n_components=2, random_state=1).fit_transform(np.array(merged['Image'].tolist()))
Xp

array([[-205.75711109,  289.87597641],
       [-223.97536348,  247.33984032],
       [-166.35596739,   80.97645153],
       ...,
       [ 618.25075413, -367.28096557],
       [ 641.13850946,   16.59624255],
       [ 290.13366804,  364.87242287]])

In [20]:
%matplotlib notebook
pca_df = pd.DataFrame(Xp, columns=['x', 'y'])
pca_df = pd.concat([id_, class_, pca_df], axis=1)
pca_df

groups = pca_df.groupby('class')

# Plot
fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in groups:
    if name in range(12): # 11, 10 y 9
        ax.plot(group.x, group.y, marker='o', linestyle='', ms=5, label=name, alpha=0.5)
        #print(group)
        #for i, row in group.iterrows():
            #print(row.id)
            #ax.annotate(row.id, (row.x, row.y), alpha=0.5)

#ax.set_xlim([-7000, 50000])
#ax.set_ylim([-3000, 6000])
#ax.legend()
plt.show()

<IPython.core.display.Javascript object>

### KMEANS clustering for geometric vessel clusters

In [21]:
from sklearn.cluster import KMeans
from matplotlib import offsetbox
import PIL
from PIL import Image
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d


kmeans_model = KMeans(n_clusters=11, random_state=42).fit(X_tsne)
labels = kmeans_model.predict(X_tsne)

labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)

print("number of estimated clusters : %d" % n_clusters_)
fig = plt.figure(figsize=(20, 20))
ax = fig.add_subplot(111, projection=Axes3D.name)
ax.view_init(elev=30., azim=105)
for i in range(X_tsne.shape[0]):
        ax.scatter(X_tsne[i, 0], X_tsne[i, 1], X_tsne[i, 2], c=plt.cm.inferno(labels[i] / 11.), s=80)

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
#ax.legend()
plt.title('Kmeans over the 11 classes of vessels')
#plt.savefig("/tmp/movie%d.png" % ii)
plt.show()


number of estimated clusters : 11


<IPython.core.display.Javascript object>

### Procrustes method

In [22]:
class Shape(object):
        def __init__(self, landmarks, name):
            self.xy = np.array(landmarks, dtype=np.float)
            self.nr_landmarks =  self.xy.shape[0]/2
            self.name = name
            self.centroid = self.get_centroid()

        def load_shape(self, shape):
            pass

        def translate_shape(self, direction):

            T = np.identity(3)
            T[:2, 2] = direction[:2]

            return T

        def get_centroid(self):
            centroid = self.xy.mean(0)
            return centroid

        def align_shape(self, mean_shape):
            """
            Procrustes fit
            """
            n, m = mean_shape.xy.shape
            ny, my = self.xy.shape
            reflection = 'best'
            scaling = True

            X0 = mean_shape.xy - mean_shape.centroid
            Y0 = self.xy - self.centroid

            ssX = (X0**2.).sum()
            ssY = (Y0**2.).sum()
            
            # centred Frobenius norm
            normX = np.sqrt(ssX)
            normY = np.sqrt(ssY)

            # scale to equal (unit) norm
            X0 /= normX
            Y0 /= normY

            if my < m:
                Y0 = np.concatenate((Y0, np.zeros(n, m-my)),0)
            # optimum rotation matrix of Y
            A = np.dot(X0.T, Y0)
            U,s,Vt = np.linalg.svd(A,full_matrices=False)
            V = Vt.T
            T = np.dot(V, U.T)

            if reflection is not 'best':

                # does the current solution use a reflection?
                have_reflection = np.linalg.det(T) < 0
                # if that's not what was specified, force another reflection
                if reflection != have_reflection:
                    V[:,-1] *= -1
                    s[-1] *= -1
                    T = np.dot(V, U.T)

            traceTA = s.sum()

            if scaling:

                # optimum scaling of Y
                b = traceTA * normX / normY

                # standarised distance between X and b*Y*T + c
                d = 1 - traceTA**2

                # transformed coords
                Z = normX*traceTA*np.dot(Y0, T) + mean_shape.centroid

            else:
                b = 1
                d = 1 + ssY/ssX - 2 * traceTA * normY / normX
                Z = normY*np.dot(Y0, T) + mean_shape.centroid

            # transformation matrix
            if my < m:
                T = T[:my,:]
            c = mean_shape.centroid - b*np.dot(self.centroid, T)

            #transformation values 
            tform = {'rotation':T, 'scale':b, 'translation':c}

            self.xy = Z.copy()
            #print d

        def plot_shape(self, color=''):
            return plt.plot(self.xy[:,1], self.xy[:,0], 'o'+color)

In [23]:
plt.style.use('ggplot')

my_shapes = list()
for index, row in df.iterrows():
    my_shapes.append(Shape(row[1:].values.reshape((100, 2)), row[0]))

for shape in my_shapes:
    shape.align_shape(my_shapes[0])    
for shape in my_shapes[:20]:
    shape.plot_shape()

plt.axes().set_aspect('equal', 'datalim')
plt.gca().invert_yaxis()
plt.show()

<IPython.core.display.Javascript object>



In [None]:
df_procrustes = pd.DataFrame(columns=range(200))
for index, shape in enumerate(my_shapes):
    df_procrustes.loc[index] = my_shapes[index].xy.reshape((200,))
df_procrustes.to_csv('data/procrustes_coordinates_homemade.txt', index=None)