In [10]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from compare_genes import getScores
from xmlparse import loadRef, loadGeometryBases, getXmlScore, minXml, xmlToDict
from score_strokes import alignStrokes
from exhaustive import computeExhaustive, exhaustScore, exhaustScoreAlignment

ImportError: cannot import name 'xmlToDict' from 'xmlparse' (/home/emmaw11/Stylus_Scoring_Generalization/xmlparse.py)

In [None]:
def loadRef2(han_char, ref_dir = "Reference"):
    """
    Load the data for an archetype character given the UTF-8 name of the character
    Input:
    han_char: String containing the UTF-8 name of the character
    ref_dir: Directory containing the XML archetype files
    Output:
    stroke_list: List of strokes from the archetype
    frac_dist: Fractional distance of the endpoints of each of the strokes
    scale: (x_min, y_min, x_max, y_max) minimum and maximum bounds on archetype strokes
    """
    stroke_list = []
    frac_dists = []
    #this line is the one that I changed, I removed the {han_char[0]}000 from it
    ref_path = f"{ref_dir}/{han_char}.han"
    ref_xml = open(ref_path, "r").read()
    root = xmltodict.parse(ref_xml)
    bounds = root["hanDefinition"]["bounds"]
    x_min, y_min, x_max, y_max = (float(bounds["@left"]), float(bounds["@bottom"]), float(bounds["@right"]), float(bounds["@top"]))
    scale = (int(x_max-x_min), int(y_max-y_min))
    strokes = root["hanDefinition"]["strokes"]["stroke"]
    for stroke in strokes:
        points = stroke["points"]["forward"]
        point_arr = []
        frac_arr = []
        for point in points["pointDistance"]:
            point_arr.append((float(point["@x"])-x_min,
                              float(point["@y"])-y_min))
            frac_arr.append(float(point["@fractionalDistance"]))
        stroke_list.append(np.array(point_arr))
        frac_dists.append(np.array(frac_arr))
    return stroke_list, frac_dists, scale

## Scoring functions

In [None]:
# Obtaining scores through heuristic algorithm without getScores
def heuristicScores(algo, ref_char, char_data):
    ref_g, ref_l, output_size = loadRef2(ref_char, "Reference/6-stroke_characters")
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    heuristic_scores = []
    for (gl, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
        g, l = gl
        heuristic_alignment = np.array(algo(g, ref_g, l, ref_l))+1
        heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
        print(f_name)
        print(heuristic_xml)
        heuristic_score = getXmlScore(heuristic_xml)
        heuristic_scores.append(heuristic_score)
    return heuristic_scores

# Obtaining scores through heuristic algorithm with getScores
def heuristicScoresShort(algo, ref_char, data_dir):
    heuristic_scores, _, marks = getScores(algo, ref_char, data_dir)
    return heuristic_scores, marks
#the below function doesn't work properly because scores doesn't have the same number of elements as exhaustive_scores. Anisa has a more-working
#version of this code, for code that actually checks multiple archetypes
#However, it's supposed to use heuristicScores to generate scores with all of the archetypes in ref_dir, then find the greatest one for that character
#and return it
def heuristicScoresWrap(algo, ref_dir, char_data):
    max_score = -1
    ref_directs = os.listdir(f"{ref_dir}")
    ref_directs.sort()
    for ref_direct in ref_directs:
        ref_list = os.listdir(f"{ref_dir}/{ref_direct}")
        ref_list.sort()
        
        for ref_char in ref_list:
            print(ref_char)
            ref_charShort = ref_char[:4:]
            print(ref_charShort)
            score = heuristicScores(algo, ref_charShort, char_data)
            scores = []
            for i in score:
                if (i > max_score):
                    max_score = i
            scores.append(max_score)
    return scores
    
# Obtaining scores through exhaustive search
def exhaustiveScores(ref_char, char_data, data_dir):
    ref_g, ref_l, output_size = loadRef2(ref_char, "Reference/6-stroke_characters")
    g_data, han_chars, base_data, _, _, f_names = char_data
    exhaustive_scores = []
    for (gl, han_char, bases, f_name) in zip(g_data, han_chars, base_data, f_names):
        g, l = gl
        original_score = exhaustScore(ref_char, f_name, data_dir, force_refresh=True, save=False)
        exhaustive_scores.append(original_score)
    return exhaustive_scores
    

## Loading genes from a new directory

In [None]:
data_dir = "Genes/sixgenes/test"

han_char = "56E0"
ref_g, ref_l, output_size = loadRef2(han_char, "Reference/6-stroke_characters")
char_data = loadGeometryBases(data_dir, output_size)
f_names = []
marks = []
f_names = os.listdir(data_dir)
f_names.sort()
heuristic_scores = heuristicScores(alignStrokes, han_char, char_data)
#heuristic_scores = heuristicScoresWrap(alignStrokes, "Reference/6-stroke characters", char_data)

#heuristic_scores, marks = heuristicScoresShort(alignStrokes, han_char, data_dir)
exhaustive_scores = exhaustiveScores(han_char, char_data, data_dir)

## Generating a table

In [None]:
xml_dir = "GenXml"
#gene_names = [f_name.split(".")[0] for (i, f_name) in enumerate(f_names)]
gene_names = f_names
frame = {}
if marks != []:
    frame = {"Genes": gene_names, "Heuristic Scores": heuristic_scores, "Exhaustive Scores": exhaustive_scores, "Mark": marks}
else:
    frame = {"Genes": gene_names, "Heuristic Scores": heuristic_scores, "Exhaustive Scores": exhaustive_scores}
table = pd.DataFrame(frame)
table