In [149]:
import time
import timeit
from itertools import permutations
from pathlib import Path

import numpy as np

from score_strokes import alignStrokes, strokeErrorMatrix
from xmlparse import getXmlScore, loadGeometryBases, loadRef, minXml

## Dynamic Programming Algorithm

In [150]:
"""
Currently the function is broken, as it does not return scores correctly with multiple gene files in the directory.
Can still be tested for a single gene file though. Implementation and design of a dynamic programming algorithm are 
probably incorrect as well, requires heavy testing and more learning on my part. At the moment the algorithm is 
incompatible with Holiday's API because it requires scoring multiple stroke orders (not O(n)).
"""

# Testing a dynamic programming algorithm
def dynamic(ref_char, ref_data, char_data):
    ref, p_ref, _ = ref_data
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    stroke_priority = permutations(range(0, len(ref)))
    heuristic_scores = []
    for (geometry_length, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
        stroke_maps = []
        strokes, p_strokes = geometry_length
        error_maps = strokeErrorMatrix(strokes, ref, p_strokes, p_ref)
        # Find candidate stroke orders
        for priority in stroke_priority:
            stroke_map = np.full(len(strokes), -1)
            for i in priority:
                smallerror = np.argmin(error_maps[i]) # retrieve index of smallest error for current archetype stroke
                while(stroke_map[smallerror]!=-1):
                    # change small error so that we do not repeat over indexes that are already taken
                    # just keeps repeating until we land on an index that doesn't already have a value in its place
                    error_maps[i][smallerror] = 10000
                    smallerror = np.argmin(error_maps[i])
                stroke_map[smallerror] = i
            if not any(np.array_equal(stroke_map, m) for m in stroke_maps):
                stroke_maps.append(stroke_map)
        # Retrieve scores for each candidate stroke order
        for s in stroke_maps:
            heuristic_xml = minXml(ref_char, bases, stroke_set, np.array(s)+1)
            heuristic_score = getXmlScore(heuristic_xml)
            heuristic_scores.append(heuristic_score)
    return heuristic_scores

ref_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/Reference' # archetype directory
data_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewGenes' # gene directory
ref_char = "6709"

ref_data = loadRef(ref_char, ref_dir)
char_data = loadGeometryBases(data_dir, ref_data[2])

dynamic(ref_char, ref_data, char_data)

2024-06-05T22:56:01.525440Z [INFO ] Loaded genome  containing 1 genes - trial set to 0


[0.0001031825318209632]

2024-06-05T22:56:01.525508Z [INFO ] TRIAL 0: Fitness is 0.001342939948137


## Heuristic Comparison Function

In [151]:
"""
Function to compare two heuristic algorithms' accuracy and performance.
"""
def compareHeuristic(algo1, algo2, ref_data, char_data, trials):
    ref_geometry, ref_progress_percentage, output_size = ref_data
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    
    
    def benchmark(algo):
        heuristic_scores = []
        heuristic_alignments = []
        for (geometry_length, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
            geometry, progress_percentage = geometry_length
            heuristic_alignment = np.array(algo(geometry, ref_geometry, progress_percentage, ref_progress_percentage))+1
            heuristic_alignments.append(heuristic_alignment)
            heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
            heuristic_score = getXmlScore(heuristic_xml)
            heuristic_scores.append(heuristic_score)
        return heuristic_scores, heuristic_alignments

    wins1 = 0
    wins2 = 0
    scores1, align1 = benchmark(algo1)
    scores2, align2 = benchmark(algo2)

    for (score1, score2) in zip(scores1, scores2):
        if score1 > score2:
            wins1 += 1
        elif score2 > score1:
            wins2 += 1

    results1 = timeit.timeit("benchmark(algo1)", number=trials, globals=locals())
    results2 = timeit.timeit("benchmark(algo2)", number=trials, globals=locals())
    time.sleep(1) # to make it easier to find results
    print("The first algorithm took", results1, "seconds to execute", trials, "times.")
    print("The second algorithm took", results2, "seconds to execute", trials, "times.")
    print("The first algorithm scored", wins1, "genes more accurately than the second algorithm.")
    print("The second algorithm scored", wins2, "genes more accurately than the first algorithm.")

# The higher the trial number, the more accurate the benchmark results, but I can't turn off Stylus output...
compareHeuristic(alignStrokes, alignStrokes, ref_data, char_data, 100)

2024-06-05T22:56:01.541344Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-05T22:56:01.541404Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-05T22:56:01.547837Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-05T22:56:01.547880Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-05T22:56:01.554272Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-05T22:56:01.554315Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-05T22:56:01.560936Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-05T22:56:01.560973Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-05T22:56:01.567860Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-05T22:56:01.567900Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-05T22:56:01.574086Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-05T22:56:01.574122Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-05T22:56:01.580187Z [INFO 

The first algorithm took 0.4470476279966533 seconds to execute 100 times.
The second algorithm took 0.42553735896945 seconds to execute 100 times.
The first algorithm scored 0 genes more accurately than the second algorithm.
The second algorithm scored 0 genes more accurately than the first algorithm.
