# Run exaustive search for stroke arrangement using stylusengine to check scores and evaluate results

In [1]:
import os
import re
import sys

import matplotlib.pyplot as plt
import numpy as np

from itertools import permutations
from math import factorial

from xmlparse import loadRef, loadGeometryBases, minXml
from score_strokes import alignStrokes

In [2]:
#sys.path.append('/Users/douglasa6/Documents/stylus-master/src')

import stylusengine


stylusengine.setLogFile(b'errors.log')

stylusengine.setScope(
    b'file:///home/tulip/Documents/College/Stewart/stylusapp/hans',
    b'file:///home/tulip/Documents/College/Stewart/stylus/schemas'
)


2023-08-29T23:14:26.882073Z [INFO ] Stylus initialized - Stylus 1.5.0 [RELEASE - Aug 29 2023 15:40:46] (c) 2006-2009 Biologic Institute


0

In [3]:
def getXmlScore(xml_bstring):
    stylusengine.setGenome(xml_bstring, b"")
    g = stylusengine.getGenome([b"all"]).decode()
    score = float(
            re.search(r"score='([e\d.+-]+)'", g).group(1)
        )
    return score


In [4]:
dlen = 20
f_read = [f"4EFB.2.{i}.gene" for i in range(1, dlen)]

han_char = "4EFB"

ref_g, ref_l, output_size = loadRef(han_char, "Reference")
g_data, _, base_data, stroke_sets, stroke_orders, _ = loadGeometryBases("Genes/maint_0.2 on 4EFB.2", han_char, output_size, f_read = f_read)
"""
bad_data = []
for i in range(len(g_data)):
    if len(ref_g) is not len(g_data[i][0]):
        bad_data.append(i)
for i in bad_data[::-1]:
    g_data.pop(i)
    base_data.pop(i)
    stroke_sets.pop(i)
    stroke_orders.pop(i)
"""

good_characters = []
for i in range(len(g_data)):
    if len(g_data[i][0]) == 6:
        print(f"{i} is good")
        good_characters.append(i)
        
character_num = good_characters[2]
print(f"Testing character {character_num}")

g, l = g_data[character_num]
bases = base_data[character_num]
stroke_set = stroke_sets[character_num]
stroke_order = stroke_orders[character_num]


0 is good
1 is good
2 is good
3 is good
4 is good
5 is good
6 is good
7 is good
8 is good
9 is good
10 is good
11 is good
12 is good
15 is good
16 is good
17 is good
18 is good
Testing character 2


In [5]:
# reference-gene alignments are flipped along the index and value - reversing it

heuristic_alignments = alignStrokes(g, ref_g, l, ref_l)+1

print(heuristic_alignments, stroke_order)


[1 2 6 5 3 4] [1 2 6 5 3 4]


In [6]:
heuristic_xml = minXml(han_char, bases, stroke_set, heuristic_alignments)
original_xml = minXml(han_char, bases, stroke_set, stroke_order)
heuristic_score = getXmlScore(heuristic_xml)
original_score = getXmlScore(original_xml)

In [7]:
def saveExhaustive(ref_char, han_char, f_read, data_dir="HanBitmap", exhaust_dir = "Exhaustive"):
    ref_g, ref_l, output_size = loadRef(ref_char, "Reference")
    g_data, _, base_data, stroke_sets, _, f_names = loadGeometryBases(data_dir, han_char, output_size, f_read = f_read)
    print(f"Pruning bad data from {len(f_read)} files...")
    bad_data = []
    for i in range(len(g_data)):
        if len(ref_g) != len(g_data[i][0]) or len(ref_g) != len(g_data[i][1]):
            bad_data.append(i)
    for i in bad_data[::-1]:
        g_data.pop(i)
        base_data.pop(i)
        stroke_sets.pop(i)
    print(f"Pruning finished, dropped {len(f_read)-len(g_data)}/{len(f_read)} bad samples")
    for i in range(len(g_data)):
        print(f"Generating exhaustive scores for sample {f_read[i]}")
        g, l = g_data[i]
        bases = base_data[i]
        stroke_set = stroke_sets[i]
        exhaustive_alignments = permutations(range(1, len(l)+1))
        exhaustive_scores = np.zeros(factorial(len(l)))
        for j, p in enumerate(exhaustive_alignments):
            p_xml = minXml(ref_char, bases, stroke_set, p)
            exhaustive_scores[j] = getXmlScore(p_xml)
            if j%10000 == 0:
                print(f"Scoring permutation {j} of {len(exhaustive_scores)}")
        f_name_cleaned = f_names[i].replace("/", "_")
        print(f"Wrote exhaustive scores to {exhaust_dir}/exhaust_{ref_char}_{han_char}_{f_name_cleaned}.npy")
        np.save(f"{exhaust_dir}/exhaust_{ref_char}_{han_char}_{f_name_cleaned}.npy", exhaustive_scores)


In [8]:
for c in good_characters:
    g, l = g_data[c]
    bases = base_data[c]
    stroke_set = stroke_sets[c]
    stroke_order = stroke_orders[c]
    heuristic_alignments = np.array(alignStrokes(g, ref_g, l, ref_l))+1
    print(stroke_order, heuristic_alignments)
    heuristic_xml = minXml(han_char, bases, stroke_set, heuristic_alignments)
    original_xml = minXml(han_char, bases, stroke_set, stroke_order)
    heuristic_score = getXmlScore(heuristic_xml)
    original_score = getXmlScore(original_xml)
    print(f"{c}: {heuristic_score}, {original_score}")

[1 2 6 5 3 4] [1 2 6 5 3 4]
0: 0.2009721093859917, 0.2009721093859917
[1 2 6 5 3 4] [1 2 6 5 3 4]
1: 0.2013953338277964, 0.2013953338277964
[1 2 6 5 3 4] [1 2 6 5 3 4]
2: 0.2078045801000123, 0.2078045801000123
[1 2 6 5 3 4] [1 2 6 5 3 4]
3: 0.2016256382450882, 0.2016256382450882
[1 2 6 5 3 4] [1 2 6 5 3 4]
4: 0.2145956745779752, 0.2145956745779752
[1 2 6 5 3 4] [1 2 6 5 3 4]
5: 0.2089713007412017, 0.2089713007412017
[1 2 6 5 3 4] [1 2 6 5 3 4]
6: 0.2034092049312096, 0.2034092049312096
[1 2 6 5 3 4] [1 2 6 5 3 4]
7: 0.2230345742767507, 0.2230345742767507
[1 2 6 5 3 4] [1 2 6 5 3 4]
8: 0.2049115164682263, 0.2049115164682263
[1 2 6 5 3 4] [1 2 6 5 3 4]
9: 0.2086181203229363, 0.2086181203229363
[1 2 6 5 3 4] [1 2 6 5 3 4]
10: 0.2125022264934223, 0.2125022264934223
[1 2 6 5 3 4] [1 2 6 5 3 4]
11: 0.207720919891055, 0.207720919891055
[1 2 6 5 3 4] [1 2 6 5 3 4]
12: 0.2107049646295421, 0.2107049646295421
[1 2 6 5 3 4] [1 2 6 5 3 4]
15: 0.2272769228518867, 0.2272769228518867
[1 2 6 5 3 4] [1 2

In [9]:
"""
f_read = ["56DB.08.15.gene"]
ref_char = "56DB"
han_char = "56DB"
data_dir = "Genes//maint_0.001 on 56DB.08/"
"""

'\nf_read = ["56DB.08.15.gene"]\nref_char = "56DB"\nhan_char = "56DB"\ndata_dir = "Genes//maint_0.001 on 56DB.08/"\n'

In [10]:
#saveExhaustive(ref_char, han_char, f_read, data_dir)