## Read from Da-TACOS feature-fused SSMs and compute shape DNA and hierarchical structure decomposition

### Library importing

In [1]:
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy.interpolate import interp2d
from scipy.sparse.csgraph import laplacian
from scipy.spatial.distance import directed_hausdorff
from scipy.cluster import hierarchy
from scipy.linalg import eigh
from scipy.ndimage import median_filter
import cv2
from seaborn import clustermap
import sklearn
import librosa
import glob
import os
import random
import json
import deepdish as dd
import matplotlib.pyplot as plt
import dill
%matplotlib inline

In [None]:
dill.dumb_session('shapeset.db')

### Reading and computing structure

In [2]:
with open('./da-tacos_metadata/da-tacos_benchmark_subset_metadata.json') as f:
    benchmark_metadata = json.load(f)

count=0
#dict = {} #everything stored here keeping W,P hierarchy, as [shapeDNA, dist_set]
all_shapeDNA = []
all_distset = []
all_WP = []
y = []
for W in benchmark_metadata.keys():
    #W_dict = {}
    if len(benchmark_metadata[W].keys()) > 4:
        for P in benchmark_metadata[W].keys():
            #Computations
            try:
                SSM = dd.io.load("./da-tacosSSMs/StructureLaplacian_datacos_crema_" + P + ".h5")['WFused']
            except:
                continue
            else:
                N = dd.io.load("./da-tacosSSMs/StructureLaplacian_datacos_crema_" + P + ".h5")['N']

                #Construct square matrix from flattened upper triangle
                A = np.zeros((N,N))
                iN = np.triu_indices(N) #return indices for upper-triangle of (N,N) matrix
                for i in range(len(SSM)):
                    A[iN[0][i]][iN[1][i]] = SSM[i]
                B = np.transpose(A)
                square_SSM = A+B

                #Downsample to 256X256   
                Xindex = np.linspace(0, 1, num=N)
                f = interp2d(Xindex, Xindex, square_SSM.flatten(), kind='linear')
                Xindex_ds = np.linspace(0, 1, num=256)
                SSM_ds = np.reshape(f(Xindex_ds, Xindex_ds), (256,256))

                #Compute the Laplacian
                L = laplacian(SSM_ds, normed=True)

                #Laplacian eigenvalues and eigenvectors
                evals, evecs = eigh(L)

                #Shape DNA
                shapeDNA = evals[:30]

                #Hierarchical structure
                evecs = median_filter(evecs, size=(9, 1))
                Cnorm = np.cumsum(evecs**2, axis=1)**0.5
                dist_set = []
                for k in range(2, 10): #change range here (min value 2)
                    X = evecs[:, :k] / Cnorm[:, k-1:k]
                    distance = squareform(pdist(X, metric='euclidean'))
                    dist_set.append(distance)

                #W_dict[P] = [shapeDNA, dist_set]
                all_shapeDNA.append(shapeDNA)
                all_distset.append(dist_set)
                y.append(W)
                
                #append W and P
                all_WP.append([W, P])
                
                #plt.matshow()
                #plt.colorbar()
                #plt.show()

            count+=1  
            print(count, end=", ")
            if (count >= 1000):
                break
        if (count >= 1000):
            break
        #dict[W] = W_dict 

        

1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222

1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, 1596, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718

2919, 2920, 2921, 2922, 2923, 2924, 2925, 2926, 2927, 2928, 2929, 2930, 2931, 2932, 2933, 2934, 2935, 2936, 2937, 2938, 2939, 2940, 2941, 2942, 2943, 2944, 2945, 2946, 2947, 2948, 2949, 2950, 2951, 2952, 2953, 2954, 2955, 2956, 2957, 2958, 2959, 2960, 2961, 2962, 2963, 2964, 2965, 2966, 2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2978, 2979, 2980, 2981, 2982, 2983, 2984, 2985, 2986, 2987, 2988, 2989, 2990, 2991, 2992, 2993, 2994, 2995, 2996, 2997, 2998, 2999, 3000, 3001, 3002, 3003, 3004, 3005, 3006, 3007, 3008, 3009, 3010, 3011, 3012, 3013, 3014, 3015, 3016, 3017, 3018, 3019, 3020, 3021, 3022, 3023, 3024, 3025, 3026, 3027, 3028, 3029, 3030, 3031, 3032, 3033, 3034, 3035, 3036, 3037, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055, 3056, 3057, 3058, 3059, 3060, 3061, 3062, 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, 3071, 3072, 3073, 3074, 3075, 3076, 3077, 3078, 3079, 3080, 3081, 3082, 3083, 3084, 3085

4286, 4287, 4288, 4289, 4290, 4291, 4292, 4293, 4294, 4295, 4296, 4297, 4298, 4299, 4300, 4301, 4302, 4303, 4304, 4305, 4306, 4307, 4308, 4309, 4310, 4311, 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, 4320, 4321, 4322, 4323, 4324, 4325, 4326, 4327, 4328, 4329, 4330, 4331, 4332, 4333, 4334, 4335, 4336, 4337, 4338, 4339, 4340, 4341, 4342, 4343, 4344, 4345, 4346, 4347, 4348, 4349, 4350, 4351, 4352, 4353, 4354, 4355, 4356, 4357, 4358, 4359, 4360, 4361, 4362, 4363, 4364, 4365, 4366, 4367, 4368, 4369, 4370, 4371, 4372, 4373, 4374, 4375, 4376, 4377, 4378, 4379, 4380, 4381, 4382, 4383, 4384, 4385, 4386, 4387, 4388, 4389, 4390, 4391, 4392, 4393, 4394, 4395, 4396, 4397, 4398, 4399, 4400, 4401, 4402, 4403, 4404, 4405, 4406, 4407, 4408, 4409, 4410, 4411, 4412, 4413, 4414, 4415, 4416, 4417, 4418, 4419, 4420, 4421, 4422, 4423, 4424, 4425, 4426, 4427, 4428, 4429, 4430, 4431, 4432, 4433, 4434, 4435, 4436, 4437, 4438, 4439, 4440, 4441, 4442, 4443, 4444, 4445, 4446, 4447, 4448, 4449, 4450, 4451, 4452

KeyboardInterrupt: 

In [3]:
print(len(all_distset))

5472


### Shape DNA

In [5]:
from sklearn.manifold import TSNE
X2 = TSNE(n_components=2).fit_transform(all_shapeDNA)

In [8]:
#altair
import vega
import altair as alt
import pandas as pd

x_axis = []
y_axis = []

for i in range(len(X2)):
    x_axis.append(X2[i][0])
    y_axis.append(X2[i][1])
    
print(len(x_axis))
print(len(y_axis))
print((y))
    
df1 = pd.DataFrame({'x': np.asarray(x_axis), 'y': np.asarray(y_axis), 'Locations': np.asarray(y)})
viz1 = alt.Chart(df1).mark_circle(opacity=0.6, size=60).encode(x='x', y='y', color='Locations').interactive()
display(viz1)

4000
4000
['W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_113724', 'W

### Flatten hierarchical decomposition

In [42]:
all_flat_set = []
for distset in all_distset:
    flat_set = []
    for indivset in distset:
        flat = indivset.flatten()
        flat_set += flat.tolist()
    all_flat_set.append(flat_set)
print(len(all_flat_set))

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [49]:
from sklearn.manifold import TSNE
X3 = TSNE(n_components=2).fit_transform(all_flat_set)

In [50]:
#altair
import vega
import altair as alt
import pandas as pd

x3_axis = []
y3_axis = []

for i in range(len(X3)):
    x3_axis.append(X3[i][0])
    y3_axis.append(X3[i][1])
    
print(len(x3_axis))
print(len(y3_axis))
print((y))
    
df2 = pd.DataFrame({'x': np.asarray(x3_axis), 'y': np.asarray(y3_axis), 'Locations': np.asarray(y)})
viz2 = alt.Chart(df2).mark_circle(opacity=0.6, size=60).encode(x='x', y='y', color='Locations').interactive()
display(viz2)

130
130
['W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_3964', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_144354', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_8339', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_12542', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6163', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_6753', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_112640', 'W_113724', 'W_1

### Calculate distances between all shape DNAs

### Calculate Hausdorff distances between all sets