In [81]:
import pandas as pd
import numpy as np
from pyemd import emd_samples
from IPython.display import display

# To import packages from the parent directory
import sys
sys.path.insert(0,'..')

# Autoreload allows us to update our code mid-notebook
%load_ext autoreload
%autoreload 2

# Our code
import processing
import visualization

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
df = pd.read_pickle('../data/sample_morphologies.pickle')
print(f'Length of df: {len(df)}')

Length of df: 496


In [27]:
final_states = []
for grouping, group in df.groupby(['BR', 'CHI', 'version']):
    final_state = group[group.timestep == max(group.timestep)].iloc[0]
    final_states.append(final_state)
    
df_finals = pd.DataFrame(final_states)

In [59]:
df_finals.iloc[40]

BR                                                       0.56
CHI                                                       3.4
count                                                     166
image       [0.05490177869796753, 0.471236914396286, 0.918...
timestep                                                  150
version                                                     0
Name: 224, dtype: object

In [66]:
df_finals.iloc[8]

BR                                                        0.5
CHI                                                       3.4
count                                                     287
image       [0.9672163724899292, 0.9605730772018433, 0.961...
timestep                                                  275
version                                                     0
Name: 394, dtype: object

In [64]:
df_finals.iloc[52]

BR                                                       0.59
CHI                                                       2.8
count                                                     263
image       [0.08619406819343567, 0.08618982136249542, 0.0...
timestep                                                  250
version                                                     0
Name: 259, dtype: object

In [71]:
sample_x = df_finals.iloc[40].image.reshape((100, 400))
sample_y = df_finals.iloc[8].image.reshape((100, 400))
sample_z = df_finals.iloc[52].image.reshape((100, 400))

display(visualization.get_image_figure(sample_x))
display(visualization.get_image_figure(sample_y))
display(visualization.get_image_figure(sample_z))

FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

In [72]:
components_x = processing.extract_components(sample_x, background=1)
components_y = processing.extract_components(sample_y, background=1)
components_z = processing.extract_components(sample_z, background=1)

sigs_x = processing.apply_to_components(components_x, processing.perimeter_area_ratio_sig)
sigs_y = processing.apply_to_components(components_y, processing.perimeter_area_ratio_sig)
sigs_z = processing.apply_to_components(components_z, processing.perimeter_area_ratio_sig)

emd_xy = emd_samples(sigs_x, sigs_y)
emd_yz = emd_samples(sigs_y, sigs_z)
emd_zx = emd_samples(sigs_z, sigs_x)

print(f'emd_xy: {emd_xy}')
print(f'emd_yz: {emd_yz}')
print(f'emd_zx: {emd_zx}')

emd_xy: 0.0966751026163832
emd_yz: 0.02066419410407383
emd_zx: 0.08997156016455239


# Similarity Query

Sort the dataset be most/least similar.

First, calculate signatures for each sample:

In [73]:
df_sigs = df_finals.copy()
df_sigs['perimeter_area_ratio_sig'] = np.empty(len(df_sigs), dtype=np.object)
df_sigs['rect_area_ratio_sig'] = np.empty(len(df_sigs), dtype=np.object)

for idx, row in df_sigs.iterrows():
    sample = row.image.reshape((100, 400))
    components = processing.extract_components(sample, background=1)    
    df_sigs.at[
        idx,
        'perimeter_area_ratio_sig'
    ] = processing.apply_to_components(components, processing.perimeter_area_ratio_sig)
    df_sigs.at[
        idx,
        'rect_area_ratio_sig'
    ] = processing.apply_to_components(components, processing.rect_area_ratio_sig)

In [74]:
df_sigs.BR.unique()

array([0.5 , 0.53, 0.56, 0.59])

In [75]:
df_sigs.CHI.unique()

array([2.2, 2.8, 3.4, 4. ])

We will be querying based on this input sample:

In [76]:
sample_z = df_finals.iloc[52]
print(sample_z)
display(visualization.get_image_figure(sample_z.image.reshape((100, 400))))

BR                                                       0.59
CHI                                                       2.8
count                                                     263
image       [0.08619406819343567, 0.08618982136249542, 0.0...
timestep                                                  250
version                                                     0
Name: 259, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

Calculate EMD between query sample signatures and database signatures.

In [77]:
df_sigs['emd_perimeter_area_ratio_sig'] = np.empty(len(df_sigs), dtype=np.object)
df_sigs['emd_rect_area_ratio_sig'] = np.empty(len(df_sigs), dtype=np.object)
perimeter_area_ratio_sig_z = processing.apply_to_components(components_z, processing.perimeter_area_ratio_sig)
rect_area_ratio_sig_z = processing.apply_to_components(components_z, processing.rect_area_ratio_sig)

for idx, row in df_sigs.iterrows():
    df_sigs.at[idx, 'emd_perimeter_area_ratio_sig'] = emd_samples(
        row.perimeter_area_ratio_sig,
        perimeter_area_ratio_sig_z
    )
    df_sigs.at[idx, 'emd_rect_area_ratio_sig'] = emd_samples(
        row.rect_area_ratio_sig,
        rect_area_ratio_sig_z
    )

Display the most similar/dissimilar images for each signature.

In [85]:
for idx in range(4):
    print('emd_perimeter_area_ratio_sig:')
    nearest = df_sigs.sort_values('emd_perimeter_area_ratio_sig').iloc[idx]
    print(nearest)
    display(visualization.get_image_figure(nearest.image.reshape((100, 400))))
    
    print('emd_rect_area_ratio_sig:')
    nearest = df_sigs.sort_values('emd_rect_area_ratio_sig').iloc[idx]
    print(nearest)
    display(visualization.get_image_figure(nearest.image.reshape((100, 400))))
    
    print('\n====================================================\n')

emd_perimeter_area_ratio_sig:
BR                                                                           0.59
CHI                                                                           2.8
count                                                                         263
image                           [0.08619406819343567, 0.08618982136249542, 0.0...
timestep                                                                      250
version                                                                         0
perimeter_area_ratio_sig        [0.5453429643792055, 0.42374368670764573, 0.93...
rect_area_ratio_sig             [0.5357142857142857, 0.5612244897959183, 0.625...
emd_perimeter_area_ratio_sig                                                    0
emd_rect_area_ratio_sig                                                         0
Name: 259, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

emd_rect_area_ratio_sig:
BR                                                                           0.59
CHI                                                                           2.8
count                                                                         263
image                           [0.08619406819343567, 0.08618982136249542, 0.0...
timestep                                                                      250
version                                                                         0
perimeter_area_ratio_sig        [0.5453429643792055, 0.42374368670764573, 0.93...
rect_area_ratio_sig             [0.5357142857142857, 0.5612244897959183, 0.625...
emd_perimeter_area_ratio_sig                                                    0
emd_rect_area_ratio_sig                                                         0
Name: 259, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…



emd_perimeter_area_ratio_sig:
BR                                                                            0.5
CHI                                                                           3.4
count                                                                         287
image                           [0.9672163724899292, 0.9605730772018433, 0.961...
timestep                                                                      275
version                                                                         0
perimeter_area_ratio_sig        [0.2814185431418234, 0.22127872410073543, 0.22...
rect_area_ratio_sig             [0.47039473684210525, 0.014538461538461538, 0....
emd_perimeter_area_ratio_sig                                            0.0206642
emd_rect_area_ratio_sig                                                  0.173196
Name: 394, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

emd_rect_area_ratio_sig:
BR                                                                           0.59
CHI                                                                           2.2
count                                                                          48
image                           [0.6712865829467773, 0.6950507760047913, 0.710...
timestep                                                                       25
version                                                                         2
perimeter_area_ratio_sig        [0.5694251482381063, 0.6674563090207275, 0.457...
rect_area_ratio_sig             [0.509090909090909, 0.46875, 0.584415584415584...
emd_perimeter_area_ratio_sig                                            0.0662628
emd_rect_area_ratio_sig                                                  0.016759
Name: 154, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…



emd_perimeter_area_ratio_sig:
BR                                                                           0.59
CHI                                                                           2.8
count                                                                         227
image                           [0.901650607585907, 0.9031100869178772, 0.9032...
timestep                                                                      225
version                                                                         3
perimeter_area_ratio_sig        [0.6166546413478968, 0.7142857142857143, 0.720...
rect_area_ratio_sig             [0.4666666666666667, 1.0, 0.5, 0.5769230769230...
emd_perimeter_area_ratio_sig                                            0.0208819
emd_rect_area_ratio_sig                                                 0.0217443
Name: 269, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

emd_rect_area_ratio_sig:
BR                                                                           0.59
CHI                                                                           2.2
count                                                                          48
image                           [0.3056536018848419, 0.3736875057220459, 0.473...
timestep                                                                       25
version                                                                         1
perimeter_area_ratio_sig        [0.8558375965793723, 1.0295866830266498, 0.381...
rect_area_ratio_sig             [0.5333333333333333, 0.75, 0.63, 0.6125, 0.509...
emd_perimeter_area_ratio_sig                                            0.0589015
emd_rect_area_ratio_sig                                                 0.0182434
Name: 156, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…



emd_perimeter_area_ratio_sig:
BR                                                                           0.53
CHI                                                                           3.4
count                                                                         261
image                           [0.9460951089859009, 0.8421623110771179, 0.595...
timestep                                                                      250
version                                                                         1
perimeter_area_ratio_sig        [0.3092743392552578, 0.6, 0.3418470416533082, ...
rect_area_ratio_sig             [0.14411764705882352, 1.0, 0.41025641025641024...
emd_perimeter_area_ratio_sig                                            0.0211311
emd_rect_area_ratio_sig                                                  0.209324
Name: 97, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

emd_rect_area_ratio_sig:
BR                                                                           0.59
CHI                                                                           2.8
count                                                                         193
image                           [0.08601965010166168, 0.08642247319221497, 0.1...
timestep                                                                      175
version                                                                         2
perimeter_area_ratio_sig        [0.7016400264276649, 0.953359222422738, 0.3226...
rect_area_ratio_sig             [0.625, 0.7, 0.6060606060606061, 0.47727272727...
emd_perimeter_area_ratio_sig                                            0.0256854
emd_rect_area_ratio_sig                                                 0.0209017
Name: 241, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…





In [34]:
for idx in range(4):
    print('emd_perimeter_area_ratio_sig:')
    furthest = df_sigs.sort_values('emd_perimeter_area_ratio_sig').iloc[-1-idx]
    print(furthest)
    display(visualization.get_image_figure(furthest.image.reshape((100, 400))))
    
    print('emd_rect_area_ratio_sig:')
    furthest = df_sigs.sort_values('emd_rect_area_ratio_sig').iloc[-1-idx]
    print(furthest)
    display(visualization.get_image_figure(furthest.image.reshape((100, 400))))
    
    print('\n====================================================\n')

emd_perimeter_area_ratio_sig:
BR                                                                           0.56
CHI                                                                             4
count                                                                         140
image                           [0.002926476998254657, 0.023009920492768288, 0...
timestep                                                                      125
version                                                                         0
perimeter_area_ratio_sig        [0.837802393359958, 0.7387123108385651, 0.9279...
rect_area_ratio_sig             [0.5714285714285714, 0.20105820105820105, 0.57...
emd_perimeter_area_ratio_sig                                             0.193775
emd_rect_area_ratio_sig                                                  0.166524
Name: 61, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

emd_rect_area_ratio_sig:
BR                                                                            0.5
CHI                                                                           2.8
count                                                                         208
image                           [0.9122486114501953, 0.9127185940742493, 0.913...
timestep                                                                      200
version                                                                         2
perimeter_area_ratio_sig        [0.2167224658893368, 0.17272317161763714, 0.29...
rect_area_ratio_sig             [0.0575, 0.14857142857142858, 0.54945054945054...
emd_perimeter_area_ratio_sig                                             0.109689
emd_rect_area_ratio_sig                                                   0.31428
Name: 424, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…



emd_perimeter_area_ratio_sig:
BR                                                                           0.59
CHI                                                                             4
count                                                                         155
image                           [0.01830443926155567, 0.018545100465416908, 0....
timestep                                                                      150
version                                                                         0
perimeter_area_ratio_sig        [0.8744743347942345, 0.4658280525178399, 0.0, ...
rect_area_ratio_sig             [0.45, 0.40404040404040403, 1.0, 0.06818181818...
emd_perimeter_area_ratio_sig                                             0.177945
emd_rect_area_ratio_sig                                                 0.0873534
Name: 186, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

emd_rect_area_ratio_sig:
BR                                                                            0.5
CHI                                                                           2.8
count                                                                         201
image                           [0.09411002695560455, 0.09400024265050888, 0.0...
timestep                                                                      200
version                                                                         0
perimeter_area_ratio_sig        [0.2463264943545411, 0.18994276970616839, 0.18...
rect_area_ratio_sig             [0.30340557275541796, 0.10312899106002554, 0.0...
emd_perimeter_area_ratio_sig                                            0.0704301
emd_rect_area_ratio_sig                                                  0.267292
Name: 442, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…



emd_perimeter_area_ratio_sig:
BR                                                                           0.56
CHI                                                                             4
count                                                                         161
image                           [0.6431540846824646, 0.9664037823677063, 0.974...
timestep                                                                      150
version                                                                         2
perimeter_area_ratio_sig        [0.5971681093532926, 0.5075825214724777, 0.544...
rect_area_ratio_sig             [0.13157894736842105, 0.2976190476190476, 0.05...
emd_perimeter_area_ratio_sig                                             0.151301
emd_rect_area_ratio_sig                                                  0.195532
Name: 45, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

emd_rect_area_ratio_sig:
BR                                                                            0.5
CHI                                                                           2.2
count                                                                          46
image                           [0.7341058850288391, 0.7345213294029236, 0.734...
timestep                                                                       25
version                                                                         3
perimeter_area_ratio_sig        [0.27180240488971685, 0.2605627200126459, 0.26...
rect_area_ratio_sig             [0.01818181818181818, 0.07538802660753881, 0.0...
emd_perimeter_area_ratio_sig                                            0.0565701
emd_rect_area_ratio_sig                                                  0.262138
Name: 77, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…



emd_perimeter_area_ratio_sig:
BR                                                                           0.59
CHI                                                                             4
count                                                                         236
image                           [0.9844369292259216, 0.8907439112663269, 0.146...
timestep                                                                      225
version                                                                         2
perimeter_area_ratio_sig        [0.4928511301977579, 0.4615069229109035, 0.706...
rect_area_ratio_sig             [0.6666666666666666, 0.11312217194570136, 0.5,...
emd_perimeter_area_ratio_sig                                             0.147745
emd_rect_area_ratio_sig                                                 0.0686393
Name: 170, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

emd_rect_area_ratio_sig:
BR                                                                            0.5
CHI                                                                           2.2
count                                                                          46
image                           [0.29060789942741394, 0.29163551330566406, 0.3...
timestep                                                                       25
version                                                                         1
perimeter_area_ratio_sig        [0.273310862473115, 0.26589602031159504, 0.256...
rect_area_ratio_sig             [0.09312638580931264, 0.023803620133895365, 0....
emd_perimeter_area_ratio_sig                                            0.0530439
emd_rect_area_ratio_sig                                                  0.251178
Name: 73, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…





## Distance Matrix

In [35]:
chi_values = df_sigs.CHI.unique()
br_values = df_sigs.BR.unique()
version_values = df_sigs.version.unique()

In [36]:
def get_dist_matrix(sigs_x, sigs_y=None):
    if sigs_y is None:
        sigs_y = sigs_x
    dist_matrix = []
    for y in sigs_y:
        row = []
        for x in sigs_x:
            row.append(emd_samples(x, y))
        dist_matrix.append(row)
    return dist_matrix

In [58]:
params = [
    { 'BR': 0.59, 'CHI': 2.8, 'version': 0 },
    { 'BR': 0.53, 'CHI': 2.8, 'version': 0 },
    { 'BR': 0.53, 'CHI': 4, 'version': 0 },
]
idx = 2

sample_z = df_sigs[
    (df_sigs['BR'] == params[idx]['BR']) &
    (df_sigs['CHI'] == params[idx]['CHI']) &
    (df_sigs['version'] == params[idx]['version'])
].iloc[0]
print(sample_z)

sample_image_z = sample_z.image.reshape((100, 400))
display(visualization.get_image_figure(sample_image_z))
components_z = processing.extract_components(sample_image_z, background=1)
sigs_z = processing.apply_to_components(components_z, processing.perimeter_area_ratio_sig)

BR                                                                           0.53
CHI                                                                             4
count                                                                         178
image                           [0.9801279902458191, 0.9795420169830322, 0.979...
timestep                                                                      175
version                                                                         0
perimeter_area_ratio_sig        [0.4507165642490036, 1.0345177968644246, 0.489...
rect_area_ratio_sig             [0.007142857142857143, 0.7142857142857143, 0.0...
emd_perimeter_area_ratio_sig                                             0.130463
emd_rect_area_ratio_sig                                                  0.233971
Name: 30, dtype: object


FigureWidget({
    'data': [{'hoverinfo': 'none',
              'showscale': False,
              'type': 'hea…

In [48]:
dist_matrix = []
sig_name = 'rect_area_ratio_sig'
#sig_name = 'perimeter_area_ratio_sig'
for chi in chi_values:
    row = []
    for br in br_values:
        version_dists = []
        for version in version_values:
            sample = df_sigs[
                (df_sigs.CHI == chi) &
                (df_sigs.BR == br) &
                (df_sigs.version == version)
            ].iloc[0]
            version_dists.append(emd_samples(sample[sig_name], sigs_z))
        row.append(np.mean(version_dists))
    dist_matrix.append(row)

In [49]:
viz = visualization.get_distance_matrix_figure(
    dist_matrix,
    labels_x = br_values,
    labels_y = chi_values,
    scaleanchor=None
)
#viz.data[0].update(zmin=0., zmax=0.4) # this adjusts the color scale
display(viz)

FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': '5670a84f-edec-4125-bdef-b414acf1a5f9',
 …