# svg_dataset

> End-to-end functions taking in centerline-stroke SVG's and outputting deltas in Stroke-3 format.

In [None]:
#| default_exp stroke3.svg_dataset

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from singleline_dataset.stroke3.path_transforms import *
from singleline_dataset.stroke3.stroke3 import *

In [None]:
#| export


def stroke_rdp_deltas(rescaled_strokes, epsilon=2.0):
    rdp_result = rdp_strokes(rescaled_strokes, epsilon)
    deltas = strokes_to_deltas(rdp_result)

    ## roundtrip / sanity check
    # _rdp_result = stroke3.deltas_to_strokes(deltas)
    # display_plot.plot_strokes(_rdp_result)

    return deltas

In [None]:
#| export
import os

import numpy as np

from singleline_dataset.stroke3.display_plot import *
from singleline_dataset.stroke3.display_svg import render_deltas, render_strokes
from singleline_dataset.stroke3.path_joining import merge_until, splice_until
from singleline_dataset.stroke3.stroke3 import *
from singleline_dataset.stroke3.svg_files import enumerate_files


def svgs_to_deltas(
    input_dir,
    output_dir=None,
    target_size=200,
    total_n=1000,
    min_n=3,
    epsilon=1.0,
    limit=None,
):
    if output_dir:
        svg_dir = os.path.join(output_dir, "svg")
        png_dir = os.path.join(output_dir, "png")
        for d in [svg_dir, png_dir]:
            if not os.path.isdir(d):
                os.makedirs(d)

    all_files = enumerate_files(input_dir)
    print(f"found {len(all_files)} in {input_dir}")
    dataset = []
    for i, fname in enumerate(all_files):
        if limit and i > limit:
            break
        input_fname = os.path.join(input_dir, fname)

        try:
            rescaled_strokes = svg_to_strokes(input_fname, total_n=total_n, min_n=min_n)

            joined_strokes, _ = merge_until(rescaled_strokes, dist_threshold=15.0)
            spliced_strokes, _ = splice_until(joined_strokes, dist_threshold=40.0)

            print(
                f"{fname}: {len(rescaled_strokes)} strokes -> {len(joined_strokes)} joined -> {len(spliced_strokes)} spliced"
            )

            deltas = stroke_rdp_deltas(spliced_strokes, epsilon=epsilon)
            dataset.append(deltas)

            # monitor number of points before/after applying RDP path simplification algorithm
            raw_points = np.vstack(rescaled_strokes).shape[0]
            rdp_points = deltas.shape[0]
            print(f"{input_fname} points: raw={raw_points}, rdp={rdp_points}")

            if output_dir:

                def new_suffix(subdir, fname, suffix):
                    sd = os.path.join(output_dir, subdir)
                    if not os.path.isdir(sd):
                        os.makedirs(sd)
                    return os.path.join(sd, fname.replace(".svg", suffix))

                final_n_strokes = len(spliced_strokes)
                subdir = f"png/{final_n_strokes:02d}"
                plot_strokes(
                    rescaled_strokes, fname=new_suffix(subdir, fname, ".0_strokes.png")
                )
                plot_strokes(
                    joined_strokes, fname=new_suffix(subdir, fname, ".1_joined.png")
                )
                plot_strokes(
                    spliced_strokes, fname=new_suffix(subdir, fname, ".2_spliced.png")
                )
                plot_strokes(
                    deltas_to_strokes(deltas),
                    fname=new_suffix(subdir, fname, ".3_deltas.png"),
                )

                # raw_output_fname = new_suffix('svg', fname, ".raw.svg")
                # with open(raw_output_fname, "w", encoding="utf-8") as raw_out:
                #     raw_dwg = render_strokes(rescaled_strokes, target_size=target_size)
                #     raw_dwg.write(raw_out, pretty=True)
                #     print(f"\twrote {raw_output_fname}")

                # preproc_output_fname = new_suffix('svg', fname, ".preproc.svg")
                # with open(preproc_output_fname, "w", encoding="utf-8") as preproc_out:
                #     preproc_dwg = render_deltas(deltas, target_size=target_size)
                #     preproc_dwg.save(preproc_output_fname)
                #     print(f"\twrote {preproc_output_fname}")
        except Exception as e:
            print(f"error processing idx={i} input_fname={input_fname}: {e}")
            # raise e
    return np.array(dataset, dtype=object)

In [None]:
# input_dir = '../data/svg/'
# output_dir = '../outputs'

# # debug: only run for the first 10 files
# limit = 10

# _ = svgs_to_deltas(input_dir, output_dir, limit=limit)

In [None]:
# partial_dataset = svgs_to_deltas(input_dir, output_dir, limit=None)

In [None]:
# len(partial_dataset)

# np.savez('../outputs/subset.npz', partial_dataset, encoding='latin1', allow_pickle=True)

In [None]:
## first dataset

# full_dataset = svgs_to_deltas('../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/', '../outputs', limit=None)
# np.savez('../datasets/v1-splice.npz', full_dataset, encoding='latin1', allow_pickle=True)

In [None]:
%%time

full_dataset_eps05 = svgs_to_deltas(
    "../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/",
    "../outputs_segmented",
    epsilon=0.5,
    limit=None,
)

found 1651 in ../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/
1485.svg: 23 strokes -> 3 joined -> 1 spliced
../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/1485.svg points: raw=994, rdp=177
2832.svg: 55 strokes -> 9 joined -> 4 spliced
../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/2832.svg points: raw=970, rdp=288
2826.svg: 36 strokes -> 7 joined -> 3 spliced
../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/2826.svg points: raw=994, rdp=226
2198.svg: 39 strokes -> 6 joined -> 4 spliced
../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/2198.svg points: raw=920, rdp=171
0957.svg: 14 strokes -> 3 joined -> 3 spliced
../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/0957.svg points: raw=1000, rdp=30
1491.svg: 40 strokes -> 7 joined -> 1 spliced
../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/1491.svg points: raw=987, rdp=252
0758.svg: 34 strokes -> 9 joined -> 4 spliced
../../svg-dataset/

NameError: name 'full_dataset' is not defined

In [None]:
print("-" * 50)
print(len(full_dataset_eps05))
print("-" * 50)
np.savez(
    "../datasets/v2-splice-eps05-j15-s40.npz",
    full_dataset_eps05,
    encoding="latin1",
    allow_pickle=True,
)

--------------------------------------------------
1651
--------------------------------------------------


In [None]:
!ls ../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/ | wc -l

    1651


In [None]:
rescaled_strokes = svg_to_strokes(
    "../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/1812.svg",
    total_n=1000,
    min_n=3,
)
plot_strokes(rescaled_strokes)
joined_strokes, _ = merge_until(rescaled_strokes, dist_threshold=10.0)

In [None]:
#| hide
import nbdev

nbdev.nbdev_export()