# svg_dataset

> End-to-end functions taking in centerline-stroke SVG's and outputting deltas in Stroke-3 format.

In [None]:
#| default_exp stroke3.svg_dataset

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from singleline_dataset.stroke3.path_transforms import *
from singleline_dataset.stroke3.stroke3 import *


In [None]:
#| export


def stroke_rdp_deltas(rescaled_strokes, epsilon=2.0):
    rdp_result = rdp_strokes(rescaled_strokes, epsilon)
    deltas = strokes_to_deltas(rdp_result)

    ## roundtrip / sanity check
    # _rdp_result = stroke3.deltas_to_strokes(deltas)
    # display_plot.plot_strokes(_rdp_result)
    
    return deltas
    

In [None]:
#| export
import os

import numpy as np

from singleline_dataset.stroke3.display_svg import render_deltas, render_strokes
from singleline_dataset.stroke3.display_plot import *
from singleline_dataset.stroke3.stroke3 import *
from singleline_dataset.stroke3.svg_files import enumerate_files
from singleline_dataset.stroke3.path_joining import merge_until, splice_until


def svgs_to_deltas(input_dir, output_dir=None, target_size=200, total_n=1000, min_n=3, epsilon=1.0, limit=None):
    if output_dir and not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    dataset = []
    for i, fname in enumerate(enumerate_files(input_dir)):
        if limit and i > limit:
            break
        input_fname = os.path.join(input_dir, fname)

        try:
            rescaled_strokes = svg_to_strokes(input_fname, total_n=total_n, min_n=min_n)

            joined_strokes, _ = merge_until(rescaled_strokes, dist_threshold=10.0)
            spliced_strokes, _ = splice_until(joined_strokes, dist_threshold=30.0)

            print(f"{fname}: {len(rescaled_strokes)} strokes -> {len(joined_strokes)} joined -> {len(spliced_strokes)} spliced")

            deltas = stroke_rdp_deltas(spliced_strokes, epsilon=epsilon)
            dataset.append(deltas)

            # monitor number of points before/after applying RDP path simplification algorithm
            raw_points = np.vstack(rescaled_strokes).shape[0]
            rdp_points = deltas.shape[0]
            print(f"{input_fname} points: raw={raw_points}, rdp={rdp_points}")

            if output_dir:
                def new_suffix(fname, suffix):
                    return os.path.join(output_dir, fname.replace(".svg", suffix))
                
                plot_strokes(rescaled_strokes, fname=new_suffix(fname, '.0_strokes.png'))
                plot_strokes(joined_strokes, fname=new_suffix(fname, '.1_joined.png'))
                plot_strokes(spliced_strokes, fname=new_suffix(fname, '.2_spliced.png'))
                plot_strokes(deltas_to_strokes(deltas), fname=new_suffix(fname, '.3_deltas.png'))

                raw_output_fname = new_suffix(fname, ".raw.svg")
                with open(raw_output_fname, "w", encoding="utf-8") as raw_out:
                    raw_dwg = render_strokes(rescaled_strokes, target_size=target_size)
                    raw_dwg.write(raw_out, pretty=True)
                    print(f"\twrote {raw_output_fname}")

                preproc_output_fname = new_suffix(fname, ".preproc.svg")
                with open(preproc_output_fname, "w", encoding="utf-8") as preproc_out:
                    preproc_dwg = render_deltas(deltas, target_size=target_size)
                    preproc_dwg.save(preproc_output_fname)
                    print(f"\twrote {preproc_output_fname}")
        except Exception as e:
            print(f"error processing {input_fname}: {e}")
            raise e
    return np.array(dataset, dtype=object)

In [None]:
!mkdir ../outputs

mkdir: ../outputs: File exists


In [None]:
input_dir = '../data/svg/'
output_dir = '../outputs'

# debug: only run for the first 10 files
limit = 10

_ = svgs_to_deltas(input_dir, output_dir, limit=limit)

Minimum distance: 0.2946925297391963
From 51_-1 (4 points)
To 52_0 (4 points)
[0] - len(curr_strokes) = 52, min_dist = 0.2946925297391963
Minimum distance: 0.4971782261692783
From 42_0 (5 points)
To 48_-1 (5 points)
[1] - len(curr_strokes) = 51, min_dist = 0.4971782261692783
Minimum distance: 0.5111259422916135
From 25_0 (13 points)
To 33_0 (13 points)
[2] - len(curr_strokes) = 50, min_dist = 0.5111259422916135
Minimum distance: 0.5224572989728425
From 29_0 (8 points)
To 39_-1 (8 points)
[3] - len(curr_strokes) = 49, min_dist = 0.5224572989728425
Minimum distance: 0.5393953678378925
From 2_-1 (68 points)
To 39_0 (68 points)
[4] - len(curr_strokes) = 48, min_dist = 0.5393953678378925
Minimum distance: 0.6142969310507422
From 9_0 (31 points)
To 37_0 (31 points)
[5] - len(curr_strokes) = 47, min_dist = 0.6142969310507422
Minimum distance: 0.6550592528202287
From 27_0 (9 points)
To 38_-1 (9 points)
[6] - len(curr_strokes) = 46, min_dist = 0.6550592528202287
Minimum distance: 0.679443022996

array([array([[ 2.70653183e+01,  1.19851617e+02,  0.00000000e+00],
              [-1.83608141e+00, -3.05697395e+01,  0.00000000e+00],
              [ 4.68129860e+00, -2.33840228e+00,  0.00000000e+00],
              [ 2.68481317e+00,  1.83781896e+00,  0.00000000e+00],
              [ 1.12625282e+00,  3.18310248e+00,  0.00000000e+00],
              [-5.54964080e+00,  1.75594504e+01,  0.00000000e+00],
              [ 4.02296933e-01, -1.82689407e+01,  0.00000000e+00],
              [-1.91114335e+00, -6.42232727e+00,  0.00000000e+00],
              [-3.25826265e+00, -2.18031478e+00,  0.00000000e+00],
              [ 8.15003203e+00, -2.06620698e+00,  0.00000000e+00],
              [ 8.34433139e+00,  7.50235267e-01,  0.00000000e+00],
              [-5.62342523e+00,  3.69201519e+00,  0.00000000e+00],
              [ 5.02924552e+00,  3.78669348e+00,  0.00000000e+00],
              [ 8.29961113e-01,  3.05808639e+00,  0.00000000e+00],
              [-8.13401595e+00,  2.21890291e+01,  0.00000000e+

In [None]:
full_dataset = svgs_to_deltas(input_dir, output_dir, limit=None)

Minimum distance: 0.2946925297391963
From 51_-1 (4 points)
To 52_0 (4 points)
[0] - len(curr_strokes) = 52, min_dist = 0.2946925297391963
Minimum distance: 0.4971782261692783
From 42_0 (5 points)
To 48_-1 (5 points)
[1] - len(curr_strokes) = 51, min_dist = 0.4971782261692783
Minimum distance: 0.5111259422916135
From 25_0 (13 points)
To 33_0 (13 points)
[2] - len(curr_strokes) = 50, min_dist = 0.5111259422916135
Minimum distance: 0.5224572989728425
From 29_0 (8 points)
To 39_-1 (8 points)
[3] - len(curr_strokes) = 49, min_dist = 0.5224572989728425
Minimum distance: 0.5393953678378925
From 2_-1 (68 points)
To 39_0 (68 points)
[4] - len(curr_strokes) = 48, min_dist = 0.5393953678378925
Minimum distance: 0.6142969310507422
From 9_0 (31 points)
To 37_0 (31 points)
[5] - len(curr_strokes) = 47, min_dist = 0.6142969310507422
Minimum distance: 0.6550592528202287
From 27_0 (9 points)
To 38_-1 (9 points)
[6] - len(curr_strokes) = 46, min_dist = 0.6550592528202287
Minimum distance: 0.679443022996

In [None]:
len(full_dataset)

np.savez('../outputs/subset.npz', full_dataset, encoding='latin1', allow_pickle=True)

In [None]:
#| hide
import nbdev

nbdev.nbdev_export()