# Node Overlap

I gathered partially organized publicly available graphics. Around 60% of these graphics were created or opened on inkscape. It is likely that the groups within these graphics were created to make editing easier.

In [None]:
# Loading dataset
from vectorrvnn.utils import *
from vectorrvnn.data import *
from vectorrvnn.utils import *
from vectorrvnn.baselines import *
from vectorrvnn.trainutils import *
from vectorrvnn.interfaces import *
from more_itertools import *
import svgpathtools as svg
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

DATA_DIR = '../data/PublicDomainVectors'

svgFiles = [f for f in allfiles(DATA_DIR) if f.endswith('svg')][:1000]
dataset  = [SVGData(_) for _ in svgFiles]
# Filter out graphics with too many paths. 
dataset = [_ for _ in dataset if _.nPaths < 40] 

In [None]:
# Visualize some groups
print(len(dataset))
sample = dataset[:10]
for _ in sample :
    plt.imshow(rasterize(_.doc, 200, 200))
    plt.show()
    print(_.svgFile)
    groupNodes = nonLeaves(_)
    groupNodes.remove(findRoot(_))
    pathSets = [_.nodes[n]['pathSet'] for n in groupNodes]
    nPs = len(pathSets)
    fig, axes = plt.subplots(1, nPs)
    psSample = random.sample(pathSets, k=nPs)
    print(nPs)
    for ps, ax in zip(psSample, axes) : 
        ax.imshow(rasterize(subsetSvg(_.doc, ps), 200, 200))
    plt.show()


## What are we measuring?

I compare three methods for graphic organization using this dataset. 

1. Ours
2. Fisher et. al.
3. Suggero

For each graphic in the dataset, I use one of the three methods to obtain a complete hierarchical organization, $T$. Then, for each group $G$ in the graphic, I find the node in the hierarchy that maximally overlaps with it. This score is used to rank the three methods. 

$$score(G, T) = max_{n \in V(T)} IoU(leaves(T, n), G)$$

In [None]:
# Load our model
opts = Options().parse(testing=[
    '--backbone', 'resnet50',
    '--checkpoints_dir', '../results',
    '--dataroot', '../data/All',
    '--embedding_size', '64',
    '--hidden_size', '128', '128', '128',
    '--load_ckpt', 'aug-21-cropnet_expt-1/training_end.pth',
    '--loss', 'cosineSimilarity',
    '--modelcls', 'CropNet',
    '--name', 'test',
    '--sim_criteria', 'negativeCosineSimilarity',
    '--device', 'cuda:0',
    '--phase', 'test',
    '--temperature', '0.1',
])

model = buildModel(opts)

In [None]:
from collections import defaultdict

results = defaultdict(dict)

def iou (a, b) : 
    return len(set(a).intersection(set(b))) / len(set(a).union(set(b)))

def logResult (T, method, methodName) : 
    global results
    T_ = method(T)
    gNodes = nonLeaves(T)
    gNodes.remove(findRoot(T))
    lens, area, scores = [], [], []
    for n in gNodes : 
        ps = T.nodes[n]['pathSet']
        maxIoU = max([iou(ps, T_.nodes[_]['pathSet']) for _ in T_.nodes])
        paths = cachedPaths(T.doc)
        bboxArea = union([pathBBox(p.path) for p in paths]).area() / getDocBBox(T.doc).area()
        lens.append(len(ps))
        area.append(bboxArea)
        scores.append(maxIoU)
    results[T][methodName] = dict(lens=lens, area=area, scores=scores, tree=T_)

ours   = model.greedyTree
fisher = lambda t : autogroup(t.doc)
sug    = lambda t : suggero(t.doc)

In [None]:
with open('results.pkl', 'rb') as fd :
    results = pickle.load(fd)

# I used the code below precompute and save the results as 
# this step takes a long time. 
for k in tqdm(results.keys()) : 
    logResult(k, ours, 'Ours')
#     logResult(dataPt, fisher, 'Fisher et. al.')
#     logResult(dataPt, sug, 'Suggero')

# with open('results.pkl', 'wb') as fd : 
#     pickle.dump(results, fd)

## Average MaxIoU

Over 1000 groups are evaluated in this section

In [None]:
ourScores, fisherScores, suggeroScores = [], [], []
ourLens  , fisherLens  , suggeroLens   = [], [], []
ourAreas , fisherAreas , suggeroAreas  = [], [], [] 

for k in results.keys() : 
    if 'Ours' in results[k] : 
        ourScores.extend(results[k]['Ours']['scores'])
        ourLens.extend(results[k]['Ours']['lens'])
        ourAreas.extend(results[k]['Ours']['area'])
    if 'Fisher et. al.' in results[k] : 
        fisherScores.extend(results[k]['Fisher et. al.']['scores'])
        fisherLens.extend(results[k]['Fisher et. al.']['lens'])
        fisherAreas.extend(results[k]['Fisher et. al.']['area'])
    if 'Suggero' in results[k] : 
        suggeroScores.extend(results[k]['Suggero']['scores'])
        suggeroLens.extend(results[k]['Suggero']['lens'])
        suggeroAreas.extend(results[k]['Suggero']['area'])

print('Our\t average MaxIoU: ', '{:.3}'.format(np.mean(ourScores)))
print('Fisher\t average MaxIoU: ', '{:.3}'.format(np.mean(fisherScores)))
print('Suggero\t average MaxIoU: ', '{:.3}'.format(np.mean(suggeroScores)))

## Distribution of MaxIoU with number of paths in group

Again, our advantage seems to come from doing well with the smaller groups of which there is a large number in this dataset. That is why we are winning here.

In [None]:
ourLens_ = np.log2(ourLens)
fisherLens_ = np.log2(fisherLens)
suggeroLens_ = np.log2(suggeroLens)

def printScoresInLenRange(lens, scores, methodName, lo, hi) : 
    scores_ = [s for s, l in zip(scores, lens) if lo <= l <= hi]
    print(f'{methodName}\t', ':', 
          '{:.4}'.format(np.mean(scores_)), '+/-', 
          '{:.3}'.format(np.std(scores_)), 
          f'(bucket size = {len(scores_)})')

lenIntervals = [1, 2, 3, 5, 6]

for lo, hi in zip(lenIntervals[:-1], lenIntervals[1:]) : 
    print("__________________________________________________________")
    print('Average MaxIoU for groups with #paths in range', 1 << lo, '-', 1 << hi)
    printScoresInLenRange(ourLens_, ourScores, 'Our', lo, hi)
    printScoresInLenRange(suggeroLens_, suggeroScores, 'Suggero', lo, hi)
    printScoresInLenRange(fisherLens_, fisherScores, 'Fisher', lo, hi)

print("__________________________________________________________")

In [None]:
# Visualize all groups with paths in ranges 6 - 7 along with the predictions

def findBestMatchPathSet (ps, t) : 
    tpss = [t.nodes[_]['pathSet'] for _ in t.nodes]
    ious = [iou(ps, ps_) for ps_ in tpss]
    return tpss[argmax(ious)], max(ious)

    
lo = 1
hi = 3

i = 0
for T in results.keys() :
    root = findRoot(T)
    pathsets = [T.nodes[n]['pathSet'] for n in T.nodes if n != root]
    for ps in pathsets : 
        if (1 << lo) <= len(ps) <= (1 << hi) : 
            tour = results[T]['Ours']['tree']
            tfisher = results[T]['Fisher et. al.']['tree']
            tsug = results[T]['Suggero']['tree']
            tour.doc = tfisher.doc = tsug.doc = T.doc
            ops, oiou = findBestMatchPathSet(ps, tour)
            fps, fiou = findBestMatchPathSet(ps, tfisher)
            sps, siou = findBestMatchPathSet(ps, tsug)
            if oiou < fiou or oiou < siou : 
                i += 1
                print("___________________________________________________")
                print(T.svgFile)
                print(ps)
                plt.imshow(rasterize(T.doc, 200, 200))
                plt.show()
                plt.imshow(rasterize(subsetSvg(T.doc, ps), 200, 200))
                plt.show()
                print('Ours')
                print('MaxIoU:', oiou)
                plt.imshow(rasterize(subsetSvg(T.doc, ops), 200, 200))
                plt.show()
                treeImageFromGraph(tour)
                plt.show()
                print('Fisher')
                print('MaxIoU:', fiou)
                plt.imshow(rasterize(subsetSvg(T.doc, fps), 200, 200))
                plt.show()
                treeImageFromGraph(tfisher)
                plt.show()
                print('Suggero')
                print('MaxIoU:', siou)
                plt.imshow(rasterize(subsetSvg(T.doc, sps), 200, 200))
                plt.show()
                treeImageFromGraph(tsug)
                plt.show()
                if i > 100 : 
                    break

###### 