In [20]:
import math
import time
import timeit
from itertools import permutations
from pathlib import Path

import numpy as np

from score_strokes import alignStrokes, strokeErrorMatrix
from xmlparse import getXmlScore, loadGeometryBases, loadRef, minXml

## Dynamic Programming Algorithm

In [42]:
"""
May not be a true dynamic programming implementation. At the moment the algorithm is incompatible
with Holiday's API because it requires scoring multiple stroke orders (not O(n)).

Results:
Currently, the dynamic algorithm is significantly more accurate than the greedy algorithm but is also
inordinately slow, being even slower than the exhaustive search by a giant margin. What costs a lot of
time is the process of finding the candidate stroke orders, not querying the Stylus API. Need to fix
this because in theory the dynamic algorithm should be faster than the exhaustive search.
"""
def dynamic(ref_char, ref_data, char_data):
    ref, p_ref, _ = ref_data
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    heuristic_scores = []
    for (geometry_length, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
        stroke_priority = permutations(range(0, len(ref)))
        compare_scores = []
        strokes, p_strokes = geometry_length
        b = 0
        stroke_maps = {}
        # Find candidate stroke orders
        for priority in stroke_priority:
            error_maps = strokeErrorMatrix(strokes, ref, p_strokes, p_ref)
            stroke_map = np.full(len(strokes), -1)
            for i in priority:
                smallerror = np.argmin(error_maps[i]) # retrieve index of smallest error for current archetype stroke
                while(stroke_map[smallerror]!=-1):
                    # change small error so that we do not repeat over indexes that are already taken
                    # just keeps repeating until we land on an index that doesn't already have a value in its place
                    error_maps[i][smallerror] = 10000
                    smallerror = np.argmin(error_maps[i])
                stroke_map[smallerror] = i
            if np.array2string(stroke_map) not in stroke_maps:
                stroke_maps[np.array2string(stroke_map)] = stroke_map
        # Retrieve scores for each candidate stroke order
        for s in stroke_maps.values():
            heuristic_xml = minXml(ref_char, bases, stroke_set, s+1)
            heuristic_score = getXmlScore(heuristic_xml)
            compare_scores.append(heuristic_score)
        heuristic_scores.append(max(compare_scores))
    return heuristic_scores


"""
Alternative function that attempts to estimate the stroke ordering with the smallest error before 
querying the Stylus API. Should significantly reduce the amount of calls to Stylus. Compatible 
with Holiday's API.
"""
def dynamic2(strokes, ref, p_strokes, p_ref):
    ref, p_ref, _ = ref_data
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    heuristic_scores = []
    for (geometry_length, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
        stroke_priority = permutations(range(0, len(ref)))
        stroke_maps = []
        compare_scores = []
        strokes, p_strokes = geometry_length
        error_maps = strokeErrorMatrix(strokes, ref, p_strokes, p_ref)
        filled_list = []
        for priority in stroke_priority:
            p = np.array(priority)
            stroke_map = np.full(len(strokes), -1)
            smallerror = np.argmin(error_maps[priority[0]])
            if stroke_map[smallerror] != -1:
                filled_list.append(smallerror)
                

ref_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/Reference' # archetype directory
data_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewGenes' # gene directory
ref_char = "6709"

ref_data = loadRef(ref_char, ref_dir)
char_data = loadGeometryBases(data_dir, ref_data[2])

dynamic(ref_char, ref_data, char_data)
#dynamic2(ref_char, ref_data, char_data)

[0.007614231345445199]

## Heuristic Comparison Function

In [28]:
"""
Function to compare two heuristic algorithms' accuracy and performance.
"""
def compareHeuristic(algo1, algo2, ref_data, char_data, trials):
    ref_geometry, ref_progress_percentage, output_size = ref_data
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    
    
    def benchmark(algo):
        heuristic_scores = []
        heuristic_alignments = []
        for (geometry_length, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
            geometry, progress_percentage = geometry_length
            heuristic_alignment = np.array(algo(geometry, ref_geometry, progress_percentage, ref_progress_percentage))+1
            heuristic_alignments.append(heuristic_alignment)
            heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
            heuristic_score = getXmlScore(heuristic_xml)
            heuristic_scores.append(heuristic_score)
        return heuristic_scores, heuristic_alignments

    wins1 = 0
    wins2 = 0
    scores1, align1 = benchmark(algo1)
    scores2, align2 = benchmark(algo2)

    for (score1, score2) in zip(scores1, scores2):
        if score1 > score2:
            wins1 += 1
        elif score2 > score1:
            wins2 += 1

    results1 = timeit.timeit("benchmark(algo1)", number=trials, globals=locals())
    results2 = timeit.timeit("benchmark(algo2)", number=trials, globals=locals())
    time.sleep(1) # to make it easier to find results
    print("The first algorithm took", results1, "seconds to execute", trials, "times.")
    print("The second algorithm took", results2, "seconds to execute", trials, "times.")
    print("The first algorithm scored", wins1, "genes more accurately than the second algorithm.")
    print("The second algorithm scored", wins2, "genes more accurately than the first algorithm.")
    print("The first and second algorithm scored", len(scores1)-wins1-wins2, "genes identically.")

# The higher the trial number, the more accurate the benchmark results, but I can't turn off Stylus output...
compareHeuristic(alignStrokes, alignStrokes, ref_data, char_data, 100)

2024-06-10T19:30:08.272526Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T19:30:08.272597Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-10T19:30:08.280490Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T19:30:08.280544Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-10T19:30:08.287687Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T19:30:08.287729Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-10T19:30:08.295239Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T19:30:08.295280Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-10T19:30:08.301611Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T19:30:08.301649Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-10T19:30:08.305905Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T19:30:08.305949Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-10T19:30:08.310552Z [INFO 

The first algorithm took 0.4302054198924452 seconds to execute 100 times.
The second algorithm took 0.5223035709932446 seconds to execute 100 times.
The first algorithm scored 0 genes more accurately than the second algorithm.
The second algorithm scored 0 genes more accurately than the first algorithm.
The first and second algorithm scored 1 genes identically.
