In [87]:
from contextlib import contextmanager
from itertools import permutations
from math import factorial
from pathlib import Path

import os
import sys
import time
import timeit

import numpy as np
import pandas as pd
import xmltodict

import logging

from score_strokes import alignStrokes
from xmlparse import loadGeometryBases, loadRef, getXmlScore, minXml

In [88]:
# Edited from exhaustive.py
def computeExhaustive(ref_char, f_read, data_dir, exhaust_dir = "Exhaustive", prog_interval = 100, save = True, xml_dir = "GenXml/Exhaustive", save_file = ""):
    ref_g, ref_l, output_size = loadRef(ref_char, ref_dir)
    g_data, _, base_data, stroke_sets, _, f_names = loadGeometryBases(data_dir, output_size, f_read = f_read)
    n_strokes = len(ref_g)
    for i in range(len(g_data)):
        #print(f"Generating exhaustive scores for sample {f_read[i]}")
        bases = base_data[i]
        stroke_set = stroke_sets[i]
        exhaustive_alignments = permutations(range(1, n_strokes+1))
        exhaustive_scores = np.zeros(factorial(n_strokes))
        for j, p in enumerate(exhaustive_alignments):
            p_xml = minXml(ref_char, bases, stroke_set, p)
            exhaustive_scores[j] = getXmlScore(p_xml, f"{xml_dir}/{i}_{j}_{f_read[i]}", f"{xml_dir}/{i}_{j}_min_{f_read[i]}")
            #exhaustive_scores[j] = getXmlScore(p_xml, False, False)
            #if j%prog_interval == 0:
            #    print(f"Scoring permutation {j} of {len(exhaustive_scores)}")
        if save:
            if save_file == "":
                f_name_cleaned = f_read[i].replace("/", "_")
                f"{exhaust_dir}/exhaust_{ref_char}_{f_name_cleaned}.npy"
            print(f"Wrote exhaustive scores to {save_file}")
            np.save(save_file, exhaustive_scores)
        yield exhaustive_scores

## Exhaustive Comparison Function

In [89]:
"""
Function to compare a heuristic algorithm's accuracy and performance against the exhaustive search.
"""
def compareExhaustive(algo, ref_data, char_data, trials):
    ref_geometry, ref_progress_percentage, output_size = ref_data
    g_data, han_chars, base_data, stroke_sets, _, f_names = char_data
    
    
    def heuristic(algo):
        heuristic_scores = []
        heuristic_alignments = []
        for (geometry_length, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
            geometry, progress_percentage = geometry_length
            heuristic_alignment = np.array(algo(geometry, ref_geometry, progress_percentage, ref_progress_percentage))+1
            heuristic_alignments.append(heuristic_alignment)
            heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
            heuristic_score = getXmlScore(heuristic_xml)
            heuristic_scores.append(heuristic_score)
        return heuristic_scores, heuristic_alignments

    def exhaustive():
        exhaustive_scores = []
        for (gl, han_char, bases, f_name) in zip(g_data, han_chars, base_data, f_names):
            g, l = gl
            exhaust_maxes = []
            for e in computeExhaustive(ref_char, [f_name], data_dir, save = False, xml_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/GenXml/Exhaustive'):
                exhaust_maxes.append(e.max())
            original_score = np.max(exhaust_maxes)
            exhaustive_scores.append(original_score)
        return exhaustive_scores

    wins1 = 0
    wins2 = 0
    scores1, _ = heuristic(algo)
    scores2 = exhaustive()

    for (score1, score2) in zip(scores1, scores2):
        if score1 > score2:
            wins1 += 1
        elif score2 > score1:
            wins2 += 1

    results1 = timeit.timeit("heuristic(algo)", number=trials, globals=locals())
    results2 = timeit.timeit("exhaustive()", number=trials, globals=locals())
    time.sleep(5) # to make it easier to find results
    print("The heuristic algorithm took", results1, "seconds to execute", trials, "times.")
    print("The exhaustive search took", results2, "seconds to execute", trials, "times.")
    print("The heuristic algorithm scored", wins1, "genes more accurately than the exhaustive search.")
    print("The exhaustive search scored", wins2, "genes more accurately than the heuristic algorithm.")
    print("The heuristic algorithm and exhaustive search scored", len(scores1)-wins1-wins2, "genes identically.")

In [90]:
ref_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/Reference' # archetype directory
data_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewGenes' # gene directory
ref_char = "6709"

ref_data = loadRef(ref_char, ref_dir)
char_data = loadGeometryBases(data_dir, ref_data[2])

# Still need to find a way to disable Stylus logging trial output
#stylusengine.setRecordRate(500, ["none"], "", False)
#logging.disable(logging.CRITICAL)

compareExhaustive(alignStrokes, ref_data, char_data, 1)

2024-06-10T22:43:46.005100Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T22:43:46.005152Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-10T22:43:46.009968Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T22:43:46.010008Z [INFO ] TRIAL 0: Fitness is 0.000513359096713
2024-06-10T22:43:46.012658Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T22:43:46.012695Z [INFO ] TRIAL 0: Fitness is 0.000310655915354
2024-06-10T22:43:46.013470Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T22:43:46.013502Z [INFO ] TRIAL 0: Fitness is 0.000299573812395
2024-06-10T22:43:46.014228Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T22:43:46.014258Z [INFO ] TRIAL 0: Fitness is 0.000025056729068
2024-06-10T22:43:46.014983Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T22:43:46.015013Z [INFO ] TRIAL 0: Fitness is 0.000029431767979
2024-06-10T22:43:46.015732Z [INFO 

The first algorithm took 0.008441396057605743 seconds to execute 1 times.
The second algorithm took 1.1497185779735446 seconds to execute 1 times.
The heuristic algorithm scored 0 genes more accurately than the exhaustive search.
The exhaustive search scored 2 genes more accurately than the heuristic algorithm.
The heuristic algorithm and exhaustive search scored 0 genes identically.


### ignore the rest

In [91]:
"""
# Edited from xmlparse.py
def loadRef(ref_char, ref_dir):
    stroke_list = []
    frac_dists = []
    ref_path = f'{ref_dir}/{ref_char}.han'
    ref_xml = open(ref_path, "r").read()
    root = xmltodict.parse(ref_xml)
    bounds = root["hanDefinition"]["bounds"]
    x_min, y_min, x_max, y_max = (float(bounds["@left"]), float(bounds["@bottom"]), float(bounds["@right"]), float(bounds["@top"]))
    scale = (int(x_max-x_min), int(y_max-y_min))
    strokes = root["hanDefinition"]["strokes"]["stroke"]
    if isinstance(strokes, dict):
        strokes = [strokes]
    for stroke in strokes:
        points = stroke["points"]["forward"]
        point_arr = []
        frac_arr = []
        for point in points["pointDistance"]:
            point_arr.append((float(point["@x"])-x_min,
                               float(point["@y"])-y_min))
            frac_arr.append(float(point["@fractionalDistance"]))
        stroke_list.append(np.array(point_arr))
        frac_dists.append(np.array(frac_arr))
    return stroke_list, frac_dists, scale

# Obtaining scores through heuristic algorithm
def heuristicScores(algo, ref_char, ref_data, char_data):
    heuristic_alignments = []
    heuristic_scores = []
    ref_geometry, ref_progress_percentage, output_size = ref_data
    g_data, _, base_data, stroke_sets, _, _ = char_data
    for (geometry_length, bases, stroke_set) in zip(g_data, base_data, stroke_sets):
        geometry, progress_percentage = geometry_length
        heuristic_alignment = np.array(algo(geometry, ref_geometry, progress_percentage, ref_progress_percentage))+1
        heuristic_alignments.append(heuristic_alignment)
        heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
        heuristic_score = getXmlScore(heuristic_xml)
        heuristic_scores.append(heuristic_score)
    return heuristic_scores, heuristic_alignments
"""

'\n# Edited from xmlparse.py\ndef loadRef(ref_char, ref_dir):\n    stroke_list = []\n    frac_dists = []\n    ref_path = f\'{ref_dir}/{ref_char}.han\'\n    ref_xml = open(ref_path, "r").read()\n    root = xmltodict.parse(ref_xml)\n    bounds = root["hanDefinition"]["bounds"]\n    x_min, y_min, x_max, y_max = (float(bounds["@left"]), float(bounds["@bottom"]), float(bounds["@right"]), float(bounds["@top"]))\n    scale = (int(x_max-x_min), int(y_max-y_min))\n    strokes = root["hanDefinition"]["strokes"]["stroke"]\n    if isinstance(strokes, dict):\n        strokes = [strokes]\n    for stroke in strokes:\n        points = stroke["points"]["forward"]\n        point_arr = []\n        frac_arr = []\n        for point in points["pointDistance"]:\n            point_arr.append((float(point["@x"])-x_min,\n                               float(point["@y"])-y_min))\n            frac_arr.append(float(point["@fractionalDistance"]))\n        stroke_list.append(np.array(point_arr))\n        frac_dists.

In [92]:
"""
def getArchetypes(ref_dir):
    ref_chars = []
    ref_datas = []
    for _, _, f_names in os.walk(ref_dir):
        ref_chars.extend(f.split(".")[0] for f in f_names)
        ref_datas.extend(loadRef(f.split(".")[0], ref_dir) for f in f_names)
    ref_chars = list(filter(None, ref_chars))
    ref_datas = list(filter(None, ref_datas))
    return ref_chars, ref_datas

def writeData(archetypes, ref_dir, data_dir):
    all_scores = []
    ref_chars, ref_datas = archetypes
    for (ref_char, ref_data) in zip(ref_chars, ref_datas):
        char_data = loadGeometryBases(data_dir, ref_data[2])
        for stroke in char_data[0][0]:
            if len(ref_data[0]) != len(stroke):
                break
        else:
            heuristic_scores, _ = heuristicScores(alignStrokes, ref_char, ref_data, char_data)
            all_scores.append(heuristic_scores)
    np.array(all_scores)

#def readData():
"""

'\ndef getArchetypes(ref_dir):\n    ref_chars = []\n    ref_datas = []\n    for _, _, f_names in os.walk(ref_dir):\n        ref_chars.extend(f.split(".")[0] for f in f_names)\n        ref_datas.extend(loadRef(f.split(".")[0], ref_dir) for f in f_names)\n    ref_chars = list(filter(None, ref_chars))\n    ref_datas = list(filter(None, ref_datas))\n    return ref_chars, ref_datas\n\ndef writeData(archetypes, ref_dir, data_dir):\n    all_scores = []\n    ref_chars, ref_datas = archetypes\n    for (ref_char, ref_data) in zip(ref_chars, ref_datas):\n        char_data = loadGeometryBases(data_dir, ref_data[2])\n        for stroke in char_data[0][0]:\n            if len(ref_data[0]) != len(stroke):\n                break\n        else:\n            heuristic_scores, _ = heuristicScores(alignStrokes, ref_char, ref_data, char_data)\n            all_scores.append(heuristic_scores)\n    np.array(all_scores)\n\n#def readData():\n'

In [93]:
"""
ref_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewRef' # archetype directory
data_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewGenes' # gene directory

archetypes = getArchetypes(ref_dir)
writeData(archetypes, ref_dir, data_dir)
"""

"\nref_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewRef' # archetype directory\ndata_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewGenes' # gene directory\n\narchetypes = getArchetypes(ref_dir)\nwriteData(archetypes, ref_dir, data_dir)\n"