## Read from Da-TACOS feature-fused SSMs and compute shape DNA and hierarchical structure decomposition

### Library importing

In [11]:
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy.interpolate import interp2d
from scipy.sparse.csgraph import laplacian
from scipy.spatial.distance import directed_hausdorff
from scipy.cluster import hierarchy
from scipy.linalg import eigh
from scipy.ndimage import median_filter
import cv2
from seaborn import clustermap
import sklearn
import librosa
import glob
import os
import random
import json
import deepdish as dd
import matplotlib.pyplot as plt
%matplotlib inline

### Reading and computing structure

In [37]:
with open('./da-tacos_metadata/da-tacos_benchmark_subset_metadata.json') as f:
    benchmark_metadata = json.load(f)

count=0
#dict = {} #everything stored here keeping W,P hierarchy, as [shapeDNA, dist_set]
all_shapeDNA = []
all_distset = []
y = []
for W in benchmark_metadata.keys():
    #W_dict = {}
    if len(benchmark_metadata[W].keys()) > 4:
        for P in benchmark_metadata[W].keys():
            #Computations
            try:
                SSM = dd.io.load("./da-tacosSSMs/StructureLaplacian_datacos_crema_" + P + ".h5")['WFused']
            except:
                continue
            else:
                N = dd.io.load("./da-tacosSSMs/StructureLaplacian_datacos_crema_" + P + ".h5")['N']

                #Construct square matrix from flattened upper triangle
                A = np.zeros((N,N))
                iN = np.triu_indices(N) #return indices for upper-triangle of (N,N) matrix
                for i in range(len(SSM)):
                    A[iN[0][i]][iN[1][i]] = SSM[i]
                B = np.transpose(A)
                square_SSM = A+B

                #Downsample to 256X256   
                Xindex = np.linspace(0, 1, num=N)
                f = interp2d(Xindex, Xindex, square_SSM.flatten(), kind='linear')
                Xindex_ds = np.linspace(0, 1, num=256)
                SSM_ds = np.reshape(f(Xindex_ds, Xindex_ds), (256,256))

                #Compute the Laplacian
                L = laplacian(SSM_ds, normed=True)

                #Laplacian eigenvalues and eigenvectors
                evals, evecs = eigh(L)

                #Shape DNA
                shapeDNA = evals[:30]

                #Hierarchical structure
                evecs = median_filter(evecs, size=(9, 1))
                Cnorm = np.cumsum(evecs**2, axis=1)**0.5
                dist_set = []
                for k in range(2, 10): #change range here (min value 2)
                    X = evecs[:, :k] / Cnorm[:, k-1:k]
                    distance = squareform(pdist(X, metric='euclidean'))
                    dist_set.append(distance)

                #W_dict[P] = [shapeDNA, dist_set]
                all_shapeDNA.append(shapeDNA)
                all_distset.append(dist_set)
                y.append(W)

                #plt.matshow()
                #plt.colorbar()
                #plt.show()

        count+=1       
        #dict[W] = W_dict 

        if (count >= 10):
            break

### Shape DNA

In [38]:
from sklearn.manifold import TSNE
X2 = TSNE(n_components=2).fit_transform(all_shapeDNA)

In [39]:
#altair
import vega
import altair as alt
import pandas as pd

x_axis = []
y_axis = []

for i in range(len(X2)):
    x_axis.append(X2[i][0])
    y_axis.append(X2[i][1])
    
print(len(x_axis))
print(len(y_axis))
print((y))
    
df1 = pd.DataFrame({'x': np.asarray(x_axis), 'y': np.asarray(y_axis), 'Locations': np.asarray(y)})
viz1 = alt.Chart(df1).mark_circle(opacity=0.6, size=60).encode(x='x', y='y', color='Locations').interactive()
display(viz1)

130
130
['W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_113724', 'W_1

### Hierarchical decomposition

In [42]:
all_flat_set = []
for distset in all_distset:
    flat_set = []
    for indivset in distset:
        flat = indivset.flatten()
        flat_set += flat.tolist()
    all_flat_set.append(flat_set)
print(len(all_flat_set))

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [49]:
from sklearn.manifold import TSNE
X3 = TSNE(n_components=2).fit_transform(all_flat_set)

In [50]:
#altair
import vega
import altair as alt
import pandas as pd

x3_axis = []
y3_axis = []

for i in range(len(X3)):
    x3_axis.append(X3[i][0])
    y3_axis.append(X3[i][1])
    
print(len(x3_axis))
print(len(y3_axis))
print((y))
    
df2 = pd.DataFrame({'x': np.asarray(x3_axis), 'y': np.asarray(y3_axis), 'Locations': np.asarray(y)})
viz2 = alt.Chart(df2).mark_circle(opacity=0.6, size=60).encode(x='x', y='y', color='Locations').interactive()
display(viz2)

130
130
['W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_113724', 'W_1