# What does T.E.D do?

I'll do a deep dive into what unordered tree edit distance does. 

1. Compare Mike and my annotations.
2. Compare different algorithms with the ground truth. 

This comparison will be based on visualizing the optimal mapping in descending order of the costs incurred.

Based on this, I'll determine whether the unordered tree edit distance is sufficient or whether we need to take a look at other variations such as:

1. Constrained Tree Edit Distance.
2. Structure-respecting Tree Edit Distance.

In [None]:
######################################################## 
## LOAD DATA and MODEL
######################################################## 
import random
from vectorrvnn.data import *
from vectorrvnn.utils import *
from vectorrvnn.baselines import *
from vectorrvnn.trainutils import *
from vectorrvnn.interfaces import *
from more_itertools import unzip
import svgpathtools as svg
from tqdm import tqdm
import matplotlib.pyplot as plt
from itertools import product

data = TripletDataset('../data/MikeAnnotations')

def getAnnotationsByName (name) : 
    ann = []
    for i, d in enumerate(data) :
        try : 
            id, _name_ = data.metadata[i].split(', ')
            if _name_ == name : 
                ann.append((id, d))
        except Exception :
            pass
    return ann

mike = getAnnotationsByName('mike')
sumit = getAnnotationsByName('sumit')

commonIds = set(unzip(mike)[0]).intersection(set(unzip(sumit)[0]))

mike = dict([(id, d) for id, d in mike if id in commonIds])
sumit = dict([(id, d) for id, d in sumit if id in commonIds])
svgFiles = list(map(lambda x : mike[x].svgFile, commonIds))

opts = Options().parse(testing=[
    '--batch_size', '32',
    '--checkpoints_dir', '../results',
    '--dataroot', '../data/All',
    '--embedding_size', '32', 
    '--load_ckpt', 'pattern_oneof_rerun/best_0-785-07-03-2021-20-34-16.pth',                          
    '--modelcls', 'PatternGrouping',
    '--name', 'pattern_oneof',
    '--structure_embedding_size', '8',
    '--samplercls', 'DiscriminativeSampler',
    '--device', 'cuda:0',
    '--phase', 'test',
])

model = buildModel(opts)
triplet = dict([(id, model.greedyTree(forest2tree(d))) for id, d in mike.items()])
sug = dict([(id, suggero(d.doc)) for id, d in mike.items()])
for id in triplet.keys() : 
    triplet[id].doc = mike[id].doc
    sug[id].doc = mike[id].doc

In [None]:
######################################################## 
## VISUALIZE A TREE FOR SANITY
######################################################## 
randomId = random.choice(list(commonIds))
print("MIKE's TREE")
treeImageFromGraph(mike[randomId])
plt.show()
print("SUMIT's TREE")
treeImageFromGraph(sumit[randomId])
plt.show()
print("TRIPLET's TREE")
treeImageFromGraph(triplet[randomId])
plt.show()
print("SUGGERO's TREE")
treeImageFromGraph(sug[randomId])
plt.show()

In [None]:
a, b = sug[randomId], sumit[randomId]
opt, matchMatrix = cted(a, b, matching=True)
print("T.E.D. = ", opt)
fig, ax = treeMatchVis(a, b, matchMatrix)
plt.show()

In [None]:
for i in com