In [31]:
from pathlib import Path

import os

import numpy as np
import pandas as pd
import xmltodict

from score_strokes import alignStrokes
from xmlparse import loadGeometryBases, getXmlScore, minXml

## Exhaustive Comparison Function

In [32]:
"""
Function to compare a heuristic algorithm's accuracy and performance against the exhaustive search.
"""
#def compareExhaustive()

"\nFunction to compare a heuristic algorithm's accuracy and performance against the exhaustive search.\n"

## Scoring Functions

In [33]:
# Edited from xmlparse.py
def loadRef(ref_char, ref_dir):
    stroke_list = []
    frac_dists = []
    ref_path = f'{ref_dir}/{ref_char}.han'
    ref_xml = open(ref_path, "r").read()
    root = xmltodict.parse(ref_xml)
    bounds = root["hanDefinition"]["bounds"]
    x_min, y_min, x_max, y_max = (float(bounds["@left"]), float(bounds["@bottom"]), float(bounds["@right"]), float(bounds["@top"]))
    scale = (int(x_max-x_min), int(y_max-y_min))
    strokes = root["hanDefinition"]["strokes"]["stroke"]
    if isinstance(strokes, dict):
        strokes = [strokes]
    for stroke in strokes:
        points = stroke["points"]["forward"]
        point_arr = []
        frac_arr = []
        for point in points["pointDistance"]:
            point_arr.append((float(point["@x"])-x_min,
                               float(point["@y"])-y_min))
            frac_arr.append(float(point["@fractionalDistance"]))
        stroke_list.append(np.array(point_arr))
        frac_dists.append(np.array(frac_arr))
    return stroke_list, frac_dists, scale

# Obtaining scores through heuristic algorithm
def heuristicScores(algo, ref_char, ref_data, char_data):
    heuristic_alignments = []
    heuristic_scores = []
    ref_geometry, ref_progress_percentage, output_size = ref_data
    g_data, _, base_data, stroke_sets, _, _ = char_data
    for (geometry_length, bases, stroke_set) in zip(g_data, base_data, stroke_sets):
        geometry, progress_percentage = geometry_length
        heuristic_alignment = np.array(algo(geometry, ref_geometry, progress_percentage, ref_progress_percentage))+1
        heuristic_alignments.append(heuristic_alignment)
        heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
        heuristic_score = getXmlScore(heuristic_xml)
        heuristic_scores.append(heuristic_score)
    return heuristic_scores, heuristic_alignments

## Data Handling Functions

In [34]:
def getArchetypes(ref_dir):
    ref_chars = []
    ref_datas = []
    for _, _, f_names in os.walk(ref_dir):
        ref_chars.extend(f.split(".")[0] for f in f_names)
        ref_datas.extend(loadRef(f.split(".")[0], ref_dir) for f in f_names)
    ref_chars = list(filter(None, ref_chars))
    ref_datas = list(filter(None, ref_datas))
    return ref_chars, ref_datas

def writeData(archetypes, ref_dir, data_dir):
    all_scores = []
    ref_chars, ref_datas = archetypes
    for (ref_char, ref_data) in zip(ref_chars, ref_datas):
        char_data = loadGeometryBases(data_dir, ref_data[2])
        for stroke in char_data[0][0]:
            if len(ref_data[0]) != len(stroke):
                break
        else:
            heuristic_scores, _ = heuristicScores(alignStrokes, ref_char, ref_data, char_data)
            all_scores.append(heuristic_scores)
    np.array(all_scores)

#def readData():

In [35]:
ref_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewRef' # archetype directory
data_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewGenes' # gene directory

archetypes = getArchetypes(ref_dir)
writeData(archetypes, ref_dir, data_dir)

2024-06-10T21:40:19.869006Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T21:40:19.869139Z [INFO ] TRIAL 0: Fitness is 0.001342939948137
2024-06-10T21:40:19.882972Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T21:40:19.883034Z [INFO ] TRIAL 0: Fitness is 0.000513359096713
2024-06-10T21:40:19.938099Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T21:40:19.938144Z [INFO ] TRIAL 0: Fitness is 0.000458958955706
2024-06-10T21:40:19.942835Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T21:40:19.942876Z [INFO ] TRIAL 0: Fitness is 2.919932431679134
2024-06-10T21:40:19.958435Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T21:40:19.958481Z [INFO ] TRIAL 0: Fitness is 0.000000038696358
2024-06-10T21:40:19.963547Z [INFO ] Loaded genome  containing 1 genes - trial set to 0
2024-06-10T21:40:19.963588Z [INFO ] TRIAL 0: Fitness is 0.000000005414955
2024-06-10T21:40:19.976317Z [INFO 

ValueError: 64 columns passed, passed data had 10 columns