# Node Overlap

I gathered partially organized publicly available graphics. Around 60% of these graphics were created or opened on inkscape. It is likely that the groups within these graphics were created to make editing easier.

In [None]:
# Loading dataset
from vectorrvnn.utils import *
from vectorrvnn.data import *
from vectorrvnn.utils import *
from vectorrvnn.baselines import *
from vectorrvnn.trainutils import *
from vectorrvnn.interfaces import *
from more_itertools import *
import svgpathtools as svg
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

DATA_DIR = '../data/PublicDomainVectors'

svgFiles = [f for f in allfiles(DATA_DIR) if f.endswith('svg')]
docs = [svg.Document(s) for s in svgFiles]
trees = [getTreeStructureFromSVG(s) for s in svgFiles]
dataset = list(zip(svgFiles, docs, trees))
# Filter out graphics with too many paths. 
dataset = [(s, d, t) for s, d, t in dataset if len(leaves(t)) < 100] 

In [None]:
# Visualize some groups
sample = random.sample(dataset, k=100)
for svgFile, doc, tree in sample :
    plt.imshow(rasterize(doc, 200, 200))
    plt.show()
    r = findRoot(tree)
    groupNodes = [n for n in tree.nodes if tree.out_degree(n) > 0 and not n == r]
    pathSets = [tree.nodes[n]['pathSet'] for n in groupNodes]
    nPs = min(5, len(pathSets))
    fig, axes = plt.subplots(1, nPs)
    psSample = random.sample(pathSets, k=nPs)
    print(svgFile)
    for ps, ax in zip(psSample, axes) : 
        print(ps)
        ax.imshow(rasterize(subsetSvg(doc, ps), 200, 200))
    plt.show()


## What are we measuring?

I compare three methods for graphic organization using this dataset. 

1. Ours
2. Fisher et. al.
3. Suggero

For each graphic in the dataset, I use one of the three methods to obtain a complete hierarchical organization, $T$. Then, for each group $G$ in the graphic, I find the node in the hierarchy that maximally overlaps with it. This score is used to rank the three methods. 

$$score(G, T) = max_{n \in V(T)} IoU(leaves(T, n), G)$$

In [None]:
# Load our model
opts = Options().parse(testing=[
    '--batch_size', '32',
    '--checkpoints_dir', '../results',
    '--dataroot', '../data/All',
    '--embedding_size', '32', 
    '--load_ckpt', 'onebranch_oneof/best_0-796-07-13-2021-10-20-04.pth',                          
    '--modelcls', 'OneBranch',
    '--name', 'onebranch_oneof',
    '--samplercls', 'DiscriminativeSampler',
    '--phase', 'test',
])

model = buildModel(opts)

In [None]:
from collections import defaultdict

results = defaultdict(dict)

def iou (a, b) : 
    return len(set(a).intersection(set(b))) / len(set(a).union(set(b)))

def logResult (dataPt, method, methodName) : 
    global results
    _, doc, T = dataPt
    T_ = method(dataPt)
    root = findRoot(T)
    gNodes = [n for n in T.nodes if T.out_degree(n) > 0 and not n == root]
    lens, area, scores = [], [], []
    for n in gNodes : 
        ps = T.nodes[n]['pathSet']
        maxIoU = max([iou(ps, T_.nodes[_]['pathSet']) for _ in T_.nodes])
        paths = cachedPaths(doc)
        bboxArea = union([pathBBox(p.path) for p in paths]).area() / getDocBBox(doc).area()
        lens.append(len(ps))
        area.append(bboxArea)
        scores.append(maxIoU)
    results[dataPt][methodName] = dict(lens=lens, area=area, scores=scores)

ours   = lambda dataPt : model.greedyTree(SVGData(dataPt[0], tree=dataPt[2]))
fisher = lambda dataPt : autogroup(dataPt[1])
sug    = lambda dataPt : suggero(dataPt[1])

In [None]:
# I used the code below precompute and save the results as 
# this step takes a long time. 
# for dataPt in tqdm(take(200, dataset)) : 
#     try :
#         logResult(dataPt, ours, 'Ours')
#         logResult(dataPt, fisher, 'Fisher et. al.')
#         logResult(dataPt, sug, 'Suggero')
#     except Exception as e : 
#         print(e)
with open('results.pkl', 'rb') as fd :
    results = pickle.load(fd)

## Average MaxIoU

Over 1000 groups are evaluated in this section

In [None]:
ourScores, fisherScores, suggeroScores = [], [], []
ourLens  , fisherLens  , suggeroLens   = [], [], []
ourAreas , fisherAreas , suggeroAreas  = [], [], [] 

for k in results.keys() : 
    if 'Ours' in results[k] : 
        ourScores.extend(results[k]['Ours']['scores'])
        ourLens.extend(results[k]['Ours']['lens'])
        ourAreas.extend(results[k]['Ours']['area'])
    if 'Fisher et. al.' in results[k] : 
        fisherScores.extend(results[k]['Fisher et. al.']['scores'])
        fisherLens.extend(results[k]['Fisher et. al.']['lens'])
        fisherAreas.extend(results[k]['Fisher et. al.']['area'])
    if 'Suggero' in results[k] : 
        suggeroScores.extend(results[k]['Suggero']['scores'])
        suggeroLens.extend(results[k]['Suggero']['lens'])
        suggeroAreas.extend(results[k]['Suggero']['area'])

print('Our\t average MaxIoU: ', '{:.3}'.format(np.mean(ourScores)))
print('Fisher\t average MaxIoU: ', '{:.3}'.format(np.mean(fisherScores)))
print('Suggero\t average MaxIoU: ', '{:.3}'.format(np.mean(suggeroScores)))

## Distribution of MaxIoU with size of group

Our only advantage over Fisher et. al. comes in the category of groups having very small area. In this category, Suggero is quite good actually. In all other categories, we are on par or worse than Fisher et. al.

The uncertainty in these estimates, i.e. the number after +/- is 1 standard deviation.

In [None]:
def printScoresInAreaRange(areas, scores, methodName, lo, hi) : 
    scores_ = [s for s, a in zip(scores, areas) if lo <= a <= hi]
    print(f'{methodName}\t', ':', '{:.4}'.format(np.mean(scores_)), '+/-', '{:.3}'.format(np.std(scores_)))

areaIntervals = [0.0, 0.2, 0.4, 0.6, 1.0]

for lo, hi in zip(areaIntervals[:-1], areaIntervals[1:]) : 
    print("__________________________________________________________")
    print('Average MaxIoU for groups with area in range', lo, '-', hi)
    printScoresInAreaRange(ourAreas, ourScores, 'Our', lo, hi)
    printScoresInAreaRange(suggeroAreas, suggeroScores, 'Suggero', lo, hi)
    printScoresInAreaRange(fisherAreas, fisherScores, 'Fisher', lo, hi)

print("__________________________________________________________")


## Distribution of MaxIoU with number of paths in group

Again, our advantage seems to come from doing well with the smaller groups of which there is a large number in this dataset. That is why we are winning here.

In [None]:
ourLens_ = np.log2(ourLens)
fisherLens_ = np.log2(fisherLens)
suggeroLens_ = np.log2(suggeroLens)

def printScoresInLenRange(lens, scores, methodName, lo, hi) : 
    scores_ = [s for s, l in zip(scores, lens) if lo <= l <= hi]
    print(f'{methodName}\t', ':', 
          '{:.4}'.format(np.mean(scores_)), '+/-', 
          '{:.3}'.format(np.std(scores_)), 
          f'(bucket size = {len(scores_)})')

lenIntervals = [1, 2, 3, 5, 6, 7]

for lo, hi in zip(lenIntervals[:-1], lenIntervals[1:]) : 
    print("__________________________________________________________")
    print('Average MaxIoU for groups with #paths in range', 1 << lo, '-', 1 << hi)
    printScoresInLenRange(ourLens_, ourScores, 'Our', lo, hi)
    printScoresInLenRange(suggeroLens_, suggeroScores, 'Suggero', lo, hi)
    printScoresInLenRange(fisherLens_, fisherScores, 'Fisher', lo, hi)

print("__________________________________________________________")

In [None]:
# Visualize all groups with paths in ranges 2 - 4 along with the predictions
lo = 2
hi = 4
methodName = 'Ours'
for pt in results.keys() :
    svgFile, doc, T = pt
    results[pt]['method']