# Run exaustive search for stroke arrangement using stylusengine to check scores and evaluate results

In [1]:
import os
import re
import sys

import matplotlib.pyplot as plt
import numpy as np

from itertools import permutations
from math import factorial

from xmlparse import loadRef, loadGeometryBases, minXml
from score_strokes import alignStrokes

In [2]:
sys.path.append('/Users/douglasa6/Documents/stylus-master/src')

import stylusengine

stylusengine.setLogFile(b'errors.log')
stylusengine.setScope(
    b'file:///Applications/Stylus.app/Contents/Resources/hans',
    b'file:///Applications/Stylus.app/Contents/Resources/stylus/schemas'
)

2022-06-29T22:29:53.889249Z [INFO ] Stylus initialized - Stylus 1.5.0 [RELEASE - Jun 28 2022 11:17:28] (c) 2006-2009 Biologic Institute


0

In [3]:
def getXmlScore(xml_bstring):
    stylusengine.setGenome(xml_bstring, b"")
    g = stylusengine.getGenome([b"all"]).decode()
    score = float(
            re.search(r"score='([e\d.+-]+)'", g).group(1)
        )
    return score


In [4]:
f_read = ["599"]

han_char = "5EA6"

ref_g, ref_l, output_size = loadRef(han_char, "Reference")
g_data, _, base_data, stroke_sets, stroke_orders, _ = loadGeometryBases("HanBitmap", han_char, output_size, f_read = f_read)
bad_data = []
for i in range(len(g_data)):
    if len(ref_g) is not len(g_data[i][0]):
        bad_data.append(i)
for i in bad_data[::-1]:
    g_data.pop(i)
    base_data.pop(i)
    stroke_sets.pop(i)
    stroke_orders.pop(i)
        
g, l = g_data[0]
bases = base_data[0]
stroke_set = stroke_sets[0]
stroke_order = stroke_orders[0]


In [5]:
# reference-gene alignments are flipped along the index and value - reversing it
heuristic_alignments_flipped = alignStrokes(g, ref_g, l, ref_l)
heuristic_alignments = np.zeros(len(heuristic_alignments_flipped), dtype=int)
heuristic_alignments[heuristic_alignments_flipped] = np.array([range(len(heuristic_alignments_flipped))])+1

print(heuristic_alignments, stroke_order)

[7 6 9 4 5 1 2 8 3] [6 7 5 4 1 2 3 8 9]


In [6]:
heuristic_xml = minXml(han_char, bases, stroke_set, heuristic_alignments)
original_xml = minXml(han_char, bases, stroke_set, stroke_order)
heuristic_score = getXmlScore(heuristic_xml)
original_score = getXmlScore(original_xml)

In [7]:
def saveExhaustive(ref_char, han_char, f_read):
    ref_g, ref_l, output_size = loadRef(ref_char, "Reference")
    g_data, _, base_data, stroke_sets, _, f_names = loadGeometryBases("HanBitmap", han_char, output_size, f_read = f_read)
    print(f"Pruning bad data from {len(f_read)} files...")
    bad_data = []
    for i in range(len(g_data)):
        if len(ref_g) != len(g_data[i][0]) or len(ref_g) != len(g_data[i][1]):
            bad_data.append(i)
    for i in bad_data[::-1]:
        g_data.pop(i)
        base_data.pop(i)
        stroke_sets.pop(i)
    print(f"Pruning finished, dropped {len(f_read)-len(g_data)}/{len(f_read)} bad samples")
    for i in range(len(g_data)):
        print(f"Generating exhaustive scores for sample {f_read[i]}")
        g, l = g_data[i]
        bases = base_data[i]
        stroke_set = stroke_sets[i]
        exhaustive_alignments = permutations(range(1, len(l)+1))
        exhaustive_scores = np.zeros(factorial(len(l)))
        for j, p in enumerate(exhaustive_alignments):
            p_xml = minXml(ref_char, bases, stroke_set, p)
            exhaustive_scores[j] = getXmlScore(p_xml)
            if j%10000 == 0:
                print(f"Scoring permutation {j} of {len(exhaustive_scores)}")
        f_name_cleaned = f_names[i].replace("/", "_")
        print(f"Wrote exhaustive scores to exhaust_{ref_char}_{han_char}_{f_name_cleaned}.npy")
        np.save(f"exhaust_{ref_char}_{han_char}_{f_name_cleaned}.npy", exhaustive_scores)


In [8]:
stylusengine.setGenome(heuristic_xml, b"")
g = stylusengine.getGenome([b"all"]).decode()
score = float(
            re.search(r"score='([e\d.+-]+)'", g).group(1)
        )

In [9]:
score, original_score

(1.875909095828902e-11, 0.01076588719706897)

In [10]:
f_read = ["400"]
f_read = [i for i in range(2, 600)]
ref_char = "5EA6"
han_char = "5EA6"

In [11]:
saveExhaustive(ref_char, han_char, f_read)

Pruning bad data from 600 files...
Pruning finished, dropped 237/600 bad samples
Generating exhaustive scores for sample 0
Scoring permutation 0 of 362880
Scoring permutation 10000 of 362880
Scoring permutation 20000 of 362880
Scoring permutation 30000 of 362880
Scoring permutation 40000 of 362880
Scoring permutation 50000 of 362880
Scoring permutation 60000 of 362880
Scoring permutation 70000 of 362880
Scoring permutation 80000 of 362880
Scoring permutation 90000 of 362880
Scoring permutation 100000 of 362880
Scoring permutation 110000 of 362880
Scoring permutation 120000 of 362880
Scoring permutation 130000 of 362880
Scoring permutation 140000 of 362880
Scoring permutation 150000 of 362880
Scoring permutation 160000 of 362880
Scoring permutation 170000 of 362880
Scoring permutation 180000 of 362880
Scoring permutation 190000 of 362880
Scoring permutation 200000 of 362880
Scoring permutation 210000 of 362880
Scoring permutation 220000 of 362880
Scoring permutation 230000 of 362880
Sco

KeyboardInterrupt: 