# svg_dataset

> End-to-end functions taking in centerline-stroke SVG's and outputting deltas in Stroke-3 format.

In [None]:
#| default_exp stroke3.svg_dataset

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from singleline_dataset.stroke3.path_transforms import *
from singleline_dataset.stroke3.stroke3 import *


In [None]:
#| export


def stroke_rdp_deltas(rescaled_strokes, epsilon=2.0):
    rdp_result = rdp_strokes(rescaled_strokes, epsilon)
    deltas = strokes_to_deltas(rdp_result)

    ## roundtrip / sanity check
    # _rdp_result = stroke3.deltas_to_strokes(deltas)
    # display_plot.plot_strokes(_rdp_result)
    
    return deltas
    

In [None]:
#| export
import os

import numpy as np

from singleline_dataset.stroke3.display_svg import render_deltas, render_strokes
from singleline_dataset.stroke3.display_plot import *
from singleline_dataset.stroke3.stroke3 import *
from singleline_dataset.stroke3.svg_files import enumerate_files
from singleline_dataset.stroke3.path_joining import merge_until, splice_until


def svgs_to_deltas(input_dir, output_dir=None, target_size=200, total_n=1000, min_n=3, epsilon=1.0, limit=None):
    if output_dir:
        svg_dir = os.path.join(output_dir, 'svg')
        png_dir = os.path.join(output_dir, 'png')
        for d in [svg_dir, png_dir]:
            if not os.path.isdir(d):
                os.makedirs(d)
    
    dataset = []
    for i, fname in enumerate(enumerate_files(input_dir)):
        if limit and i > limit:
            break
        input_fname = os.path.join(input_dir, fname)

        try:
            rescaled_strokes = svg_to_strokes(input_fname, total_n=total_n, min_n=min_n)

            joined_strokes, _ = merge_until(rescaled_strokes, dist_threshold=10.0)
            spliced_strokes, _ = splice_until(joined_strokes, dist_threshold=30.0)

            print(f"{fname}: {len(rescaled_strokes)} strokes -> {len(joined_strokes)} joined -> {len(spliced_strokes)} spliced")

            deltas = stroke_rdp_deltas(spliced_strokes, epsilon=epsilon)
            dataset.append(deltas)

            # monitor number of points before/after applying RDP path simplification algorithm
            raw_points = np.vstack(rescaled_strokes).shape[0]
            rdp_points = deltas.shape[0]
            print(f"{input_fname} points: raw={raw_points}, rdp={rdp_points}")

            if output_dir:
                def new_suffix(subdir, fname, suffix):
                    return os.path.join(output_dir, subdir, fname.replace(".svg", suffix))
                
                plot_strokes(rescaled_strokes, fname=new_suffix('png', fname, '.0_strokes.png'))
                plot_strokes(joined_strokes, fname=new_suffix('png', fname, '.1_joined.png'))
                plot_strokes(spliced_strokes, fname=new_suffix('png', fname, '.2_spliced.png'))
                plot_strokes(deltas_to_strokes(deltas), fname=new_suffix('png', fname, '.3_deltas.png'))

                raw_output_fname = new_suffix('svg', fname, ".raw.svg")
                with open(raw_output_fname, "w", encoding="utf-8") as raw_out:
                    raw_dwg = render_strokes(rescaled_strokes, target_size=target_size)
                    raw_dwg.write(raw_out, pretty=True)
                    print(f"\twrote {raw_output_fname}")

                preproc_output_fname = new_suffix('svg', fname, ".preproc.svg")
                with open(preproc_output_fname, "w", encoding="utf-8") as preproc_out:
                    preproc_dwg = render_deltas(deltas, target_size=target_size)
                    preproc_dwg.save(preproc_output_fname)
                    print(f"\twrote {preproc_output_fname}")
        except Exception as e:
            print(f"error processing idx={i} input_fname={input_fname}: {e}")
            #raise e
    return np.array(dataset, dtype=object)

In [None]:
# input_dir = '../data/svg/'
# output_dir = '../outputs'

# # debug: only run for the first 10 files
# limit = 10

# _ = svgs_to_deltas(input_dir, output_dir, limit=limit)

In [None]:
# partial_dataset = svgs_to_deltas(input_dir, output_dir, limit=None)

In [None]:
# len(partial_dataset)

# np.savez('../outputs/subset.npz', partial_dataset, encoding='latin1', allow_pickle=True)

In [None]:
full_dataset = svgs_to_deltas('../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/', '../outputs', limit=None)


Minimum distance: 0.2733557172226476
From 3_0 (13 points)
To 10_0 (13 points)
[0] - len(curr_strokes) = 22, min_dist = 0.2733557172226476
Minimum distance: 0.3594045412019282
From 4_0 (113 points)
To 16_-1 (113 points)
[1] - len(curr_strokes) = 21, min_dist = 0.3594045412019282
Minimum distance: 0.4407728498440062
From 9_0 (13 points)
To 12_0 (13 points)
[2] - len(curr_strokes) = 20, min_dist = 0.4407728498440062
Minimum distance: 0.4653403426611523
From 8_-1 (25 points)
To 13_0 (25 points)
[3] - len(curr_strokes) = 19, min_dist = 0.4653403426611523
Minimum distance: 0.6613861104541229
From 1_-1 (259 points)
To 11_0 (259 points)
[4] - len(curr_strokes) = 18, min_dist = 0.6613861104541229
Minimum distance: 0.7238222327333038
From 3_-1 (130 points)
To 17_0 (130 points)
[5] - len(curr_strokes) = 17, min_dist = 0.7238222327333038
Minimum distance: 0.7904821906758656
From 6_0 (31 points)
To 9_-1 (31 points)
[6] - len(curr_strokes) = 16, min_dist = 0.7904821906758656
Minimum distance: 1.2372

TypeError: list indices must be integers or slices, not NoneType

In [None]:
rescaled_strokes = svg_to_strokes('../../svg-dataset/sketch_mgmt/imgs_sorted/drawings_svg_cropped/1812.svg', total_n=1000, min_n=3)
plot_strokes(rescaled_strokes)
joined_strokes, _ = merge_until(rescaled_strokes, dist_threshold=10.0)

In [None]:
#| hide
import nbdev

nbdev.nbdev_export()