In [10]:
from itertools import permutations
from math import factorial
from pathlib import Path

import csv
import os

import numpy as np
import xmltodict

from compare_genes import getScores
from xmlparse import loadGeometryBases, getXmlScore, minXml
from score_strokes import alignStrokes

## Scoring Functions

In [11]:
# Edited from exhaustive.py
def computeExhaustive(ref_char, f_read, data_dir, exhaust_dir = "Exhaustive", prog_interval = 100, save = True, xml_dir = "GenXml/Exhaustive", save_file = ""):
    ref_g, ref_l, output_size = loadRef(ref_char, ref_dir)
    g_data, _, base_data, stroke_sets, _, f_names = loadGeometryBases(data_dir, output_size, f_read = f_read)
    n_strokes = len(ref_g)
    for i in range(len(g_data)):
        #print(f"Generating exhaustive scores for sample {f_read[i]}")
        bases = base_data[i]
        stroke_set = stroke_sets[i]
        exhaustive_alignments = permutations(range(1, n_strokes+1))
        exhaustive_scores = np.zeros(factorial(n_strokes))
        for j, p in enumerate(exhaustive_alignments):
            p_xml = minXml(ref_char, bases, stroke_set, p)
            exhaustive_scores[j] = getXmlScore(p_xml, f"{xml_dir}/{i}_{j}_{f_read[i]}", f"{xml_dir}/{i}_{j}_min_{f_read[i]}")
            #exhaustive_scores[j] = getXmlScore(p_xml, False, False)
            #if j%prog_interval == 0:
            #    print(f"Scoring permutation {j} of {len(exhaustive_scores)}")
        if save:
            if save_file == "":
                f_name_cleaned = f_read[i].replace("/", "_")
                f"{exhaust_dir}/exhaust_{ref_char}_{f_name_cleaned}.npy"
            print(f"Wrote exhaustive scores to {save_file}")
            np.save(save_file, exhaustive_scores)
        yield exhaustive_scores

# Edited from exhaustive.py
def exhaustScore(ref_char, f_name, data_dir, exhaust_dir = "Exhaustive", force_refresh = False, save = True, file_prefix = ""):
    f_name_cleaned = f_name.replace("/", "_")
    exhaust_name = f"{exhaust_dir}/exhaust_{file_prefix}{ref_char}_{f_name_cleaned}.npy"
    exhaust_maxes = []
    if not os.path.isfile(exhaust_name) or force_refresh:
        for e in computeExhaustive(ref_char, [f_name], data_dir, save = save, xml_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/GenXml/Exhaustive', save_file = exhaust_name):
            exhaust_maxes.append(e.max())
    else:
        exhaust_maxes = readExhaustive(ref_char, f_name, exhaust_dir, exhaust_name)
    return np.max(exhaust_maxes)

# Edited from xmlparse.py
def loadRef(han_char, ref_dir = "Reference"):
    stroke_list = []
    frac_dists = []
    ref_path = f"{ref_dir}/{han_char}.han"
    ref_xml = open(ref_path, "r").read()
    root = xmltodict.parse(ref_xml)
    bounds = root["hanDefinition"]["bounds"]
    x_min, y_min, x_max, y_max = (float(bounds["@left"]), float(bounds["@bottom"]), float(bounds["@right"]), float(bounds["@top"]))
    scale = (int(x_max-x_min), int(y_max-y_min))
    strokes = root["hanDefinition"]["strokes"]["stroke"]
    for stroke in strokes:
        points = stroke["points"]["forward"]
        point_arr = []
        frac_arr = []
        for point in points["pointDistance"]:
            point_arr.append((float(point["@x"])-x_min,
                              float(point["@y"])-y_min))
            frac_arr.append(float(point["@fractionalDistance"]))
        stroke_list.append(np.array(point_arr))
        frac_dists.append(np.array(frac_arr))
    return stroke_list, frac_dists, scale

# Obtaining scores through heuristic algorithm
def heuristicScores(algo, ref_char, ref_data, char_data):
    heuristic_alignments = []
    heuristic_scores = []
    ref_geometry, ref_progress_percentage, output_size = ref_data
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    for (geometry_length, bases, stroke_set, _, f_name) in zip(g_data, base_data, stroke_sets, _, f_names):
        geometry, progress_percentage = geometry_length
        heuristic_alignment = np.array(algo(geometry, ref_geometry, progress_percentage, ref_progress_percentage))+1
        heuristic_alignments.append(heuristic_alignment)
        heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
        heuristic_score = getXmlScore(heuristic_xml)
        heuristic_scores.append(heuristic_score)
    return heuristic_scores, heuristic_alignments

# Obtaining scores through exhaustive search
def exhaustiveScores(ref_char, data_dir, char_data):
    g_data, han_chars, base_data, _, _, f_names = char_data
    exhaustive_scores = []
    for (gl, han_char, bases, f_name) in zip(g_data, han_chars, base_data, f_names):
        g, l = gl
        original_score = exhaustScore(ref_char, f_name, data_dir, force_refresh=True, save=False)
        exhaustive_scores.append(original_score)
    return exhaustive_scores

## Gene/Archetype Combos

In [12]:
ref_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewRef' # archetype directory
data_dir = f'{str(Path.home())}/Stylus_Scoring_Generalization/NewGenes' # gene directory

# Retrieve all reference characters
ref_chars = []
for _, _, f_names in os.walk(ref_dir):
    ref_chars.extend(f.split(".")[0] for f in f_names)
ref_chars = list(filter(None, ref_chars))

# Retrieve scores for every gene/archetype combo and write data to CSV
cf = open('test.csv', 'w', newline='')
writer = csv.writer(cf)
writer.writerow(["GeneId", "ArchetypeId", "ExhaustiveScore", "HeuristicScore", "GeneMap", "HeuristicMap"])
gene_names = os.listdir(data_dir)
gene_names.sort()
for i, g in enumerate(gene_names):
    gene_names[i] = g.split(".gene")[0]
for ref_char in ref_chars:
    ref_data = loadRef(ref_char, ref_dir)
    char_data = loadGeometryBases(data_dir, ref_data[2])
    for stroke in char_data[0][0]:
        if len(ref_data[0]) != len(stroke):
            break
    else:
        stroke_orders = char_data[4]
        heuristic_scores, heuristic_alignments = heuristicScores(alignStrokes, ref_char, ref_data, char_data)
        exhaustive_scores = exhaustiveScores(ref_char, data_dir, char_data)
        for (gene_name, heuristic_score, exhaustive_score, stroke_order, heuristic_alignment) in zip(gene_names, heuristic_scores, exhaustive_scores, stroke_orders, heuristic_alignments):
            writer.writerow([gene_name, ref_char, exhaustive_score, heuristic_score, stroke_order, heuristic_alignment])
cf.close()