In [1]:
import os
import glob
import matplotlib
import numpy as np
import pandas as pd
import multiprocessing
import sklearn as scikit_learn
import PersistenceImages.persistence_images as pimg

from matplotlib import pyplot as plt
from sklearn.manifold import MDS
from sklearn.manifold import TSNE
from sklearn.cluster import OPTICS
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import SpectralClustering

In [2]:
def get_npy_data(listfoldertypes, numsims, timestep, dimension, dimensionreduction):
    
    totalfiles = len(listfoldertypes)*numsims;
    dimstring = "H" + str(dimension)
    listofnparrayg = []
    listofnparrayr = []
    listofnparraygr = []
    
    for val in listfoldertypes:
        for i in range(1,numsims+1):
            filepath = val + "_" + str(i) + "_persimg_serialize" + "/" + str(timestep).zfill(7) + ".npy"
            singlefile = np.load(filepath,allow_pickle = True).item()
            dimdatag = singlefile[dimstring][0]
            dimdatag = np.nan_to_num(dimdatag)
            dimdatar = singlefile[dimstring][1]
            dimdatar = np.nan_to_num(dimdatar)
            dimdatagr = singlefile[dimstring][2]
            dimdatagr = np.nan_to_num(dimdatagr)
            listofnparrayg.append(dimdatag)
            listofnparrayr.append(dimdatar)
            listofnparraygr.append(dimdatagr)
            
    if(dimensionreduction == "pca"):
        
        listofnparrayg = perform_pca(listofnparrayg)
        listofnparrayr = perform_pca(listofnparrayr)
        listofnparraygr = perform_pca(listofnparraygr)
        
    elif(dimensionreduction == "tsne"):
        
        listofnparrayg = perform_tsne(listofnparrayg)
        listofnparrayr = perform_tsne(listofnparrayr)
        listofnparraygr = perform_tsne(listofnparraygr)
        
    elif(dimensionreduction == "mds"):
        
        listofnparrayg = perform_mds(listofnparrayg)
        listofnparrayr = perform_mds(listofnparrayr)
        listofnparraygr = perform_mds(listofnparraygr)
        
    fulldimdata = np.hstack((listofnparrayg,listofnparrayr))
    fulldimdata = np.hstack((fulldimdata,listofnparraygr))

    return np.array(fulldimdata)
        
def unison_shuffled_copies(listoflistshuffled):
    
    p = np.random.permutation(len(listoflistshuffled[0]))
    shuffledlist = []
    
    for val in listoflistshuffled:
        shuffled = val[p]
        shuffledlist.append(shuffled)
        
    return shuffledlist 

def kmeansclust(h0data, h1data, h2data, useh0, useh1, useh2):
    
    if(useh0):
        kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10, random_state=0)
        pred_y = kmeans.fit_predict(h0data)
        return pred_y
    
    elif(useh1):
        kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10, random_state=0)
        pred_y = kmeans.fit_predict(h1data) 
        return pred_y
    
    elif(useh2):
        kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10, random_state=0)
        pred_y = kmeans.fit_predict(h2data) 
        return pred_y
    
def opticsclust(h0data, h1data, h2data, useh0, useh1, useh2):
    
    if(useh0):
        clusterf = OPTICS(min_samples=4).fit(h0data)
        return clusterf.labels_
    
    elif(useh1):
        clusterf = OPTICS(min_samples=4).fit(h1data)
        return clusterf.labels_
    
    elif(useh2):
        clusterf = OPTICS(min_samples=4).fit(h2data)
        return clusterf.labels_
    
def dbscanclust(h0data, h1data, h2data, useh0, useh1, useh2):
    
    if(useh0):
        clusters = DBSCAN(eps=1.36, min_samples=3).fit(h0data)
        return clusters.labels_
    
    elif(useh1):
        clusters = DBSCAN(eps=1.36, min_samples=3).fit(h1data)
        return clusters.labels_
    
    elif(useh2):
        clusters = DBSCAN(eps=1.36, min_samples=3).fit(h2data)
        return clusters.labels_
    
def perform_pca(datatochange):
    
    x = StandardScaler().fit_transform(datatochange)
    pca = PCA(n_components=5,svd_solver="auto")
    principalComponents = pca.fit_transform(x)
    return principalComponents

def perform_tsne(datatochange):
    
    X_embedded = TSNE(n_components=3).fit_transform(datatochange)
    return X_embedded;

def perform_mds(datatochange):
    
    embedding = MDS(n_components=5)
    X_transformed = embedding.fit_transform(datatochange)
    return X_transformed

In [348]:
vectorizedimgslist0 = get_npy_data(["DAH_Complete_Sorting","DAH_Lipid_Bilayer","DAH_Two_Phase"], 
                                   5, 5000000, 0, "mds")
vectorizedimgslist1 = get_npy_data(["DAH_Complete_Sorting","DAH_Lipid_Bilayer","DAH_Two_Phase"], 
                                   5, 5000000, 1, "mds")
vectorizedimgslist2 = get_npy_data(["DAH_Complete_Sorting","DAH_Lipid_Bilayer","DAH_Two_Phase"], 
                                   5, 5000000, 2, "mds")

labels = np.array(["CS1","CS2","CS3","CS4","CS5","BL1","BL2","BL3","BL4","BL5","BP1","BP2","BP3","BP4","BP5"])

totalshuffled = unison_shuffled_copies([vectorizedimgslist0, vectorizedimgslist1, vectorizedimgslist2, labels])

In [349]:
opticsclust(totalshuffled[0], totalshuffled[1], totalshuffled[2], True, False, False)

array([ 0,  2,  1,  1,  1,  0,  1,  2, -1,  2,  2,  0,  1,  0,  1])

In [350]:
dbscanclust(totalshuffled[0], totalshuffled[1], totalshuffled[2], True, False, False)

array([0, 1, 2, 2, 2, 0, 2, 1, 2, 1, 1, 0, 2, 0, 2], dtype=int64)

In [351]:
kmeansclust(totalshuffled[0], totalshuffled[1], totalshuffled[2], True, False, False)

array([2, 1, 0, 0, 0, 2, 0, 1, 0, 1, 1, 2, 0, 2, 0])

In [352]:
totalshuffled[3]

array(['BL3', 'BP2', 'CS2', 'CS5', 'BP5', 'BL4', 'CS3', 'BP3', 'BL5',
       'BP1', 'BP4', 'BL1', 'CS4', 'BL2', 'CS1'], dtype='<U3')