# Render Chinese characters in segments as opposed to as a whole

In [1]:
import os
import xmltodict

import matplotlib.pyplot as plt
import numpy as np

from matplotlib.lines import Line2D
from matplotlib import collections as mc
from PIL import Image, ImageDraw

In [2]:
def xmlToSegments(xfile):
    """
    Convert an XML character genome file to a bitmap array representing the character
    Saves the fitness score of said genome along with the bitmap
    """
    xml_data = open(xfile, "r").read()
    root = xmltodict.parse(xml_data)
    score = root["genome"]["statistics"]["@score"]
    han_char = root["genome"]["genes"]["gene"]["hanReferences"]["hanReference"]["@unicode"]
    segments = root["genome"]["genes"]["gene"]["segments"]["segment"]
    drawn_char = drawSegments(segments)
    return (han_char, score, drawn_char)

def drawSegments(segments, output_size=(32, 32), border=4):
    """
    Process and render the coherent strokes in the xml data
    Saves the segments as line seperated byte arrays
    """
    imgs = b""
    minx, miny, maxx, maxy = None, None, None, None
    for segment in segments:
        if segment["@coherent"] == "true":
            for point in segment["point"]:
                if minx is None:
                    minx = float(point["@x"])
                if miny is None:
                    miny = float(point["@y"])
                if maxx is None:
                    maxx = float(point["@x"])
                if maxy is None:
                    maxy = float(point["@y"])
                if float(point["@x"]) > maxx:
                    maxx = float(point["@x"])
                if float(point["@x"]) < minx:
                    minx = float(point["@x"])
                if float(point["@y"]) > maxy:
                    maxy = float(point["@y"])
                if float(point["@y"]) < miny:
                    miny = float(point["@y"])
    for segment in segments:
        if segment["@coherent"] == "true":
            img = Image.new(mode="1", size=output_size)
            draw = ImageDraw.Draw(img)
            for i in range(len(segment["point"][:-1])):
                x1, y1, x2, y2 = ((float(segment["point"][i]["@x"])-minx)*((output_size[0]-border)/(maxx-minx))+border/2,
                                  (float(segment["point"][i]["@y"])-miny)*((output_size[1]-border)/(maxy-miny))+border/2,
                                  (float(segment["point"][i+1]["@x"])-minx)*((output_size[0]-border)/(maxx-minx))+border/2,
                                  (float(segment["point"][i+1]["@y"])-miny)*((output_size[1]-border)/(maxy-miny))+border/2)
                y1 = -(y1-output_size[1]/2)+output_size[1]/2
                y2 = -(y2-output_size[1]/2)+output_size[1]/2
                draw.line(((x1, y1), (x2, y2)), width=1, fill=1)
            imgs += np.array(img).flatten().tobytes() + b"\n"
    return imgs

def scanSegments(xdir, out_dir="./HanBitmap"):
    """
    Iterates over a directory and scans all of the gene files within, outputting them in the form of bitmap (X) and score (y)
    """
    dir_list = os.listdir(xdir)
    han_i = {}
    for f in dir_list:
        if f.endswith(".gene"):
            han_char, score, bitmap = xmlToSegments(f"{xdir}/{f}")
            if not os.path.isdir(f"{out_dir}/{han_char}"):
                os.mkdir(f"{out_dir}/{han_char}")
            if not han_char in han_i:
                han_i[han_char] = 0
            dfile = open(f"{out_dir}/{han_char}/{han_i[han_char]}", "wb")
            dfile.write(bytes(score, "UTF-8")+b"\n"+bitmap)
            han_i[han_char] = han_i[han_char] + 1


In [3]:
scanSegments("Genes")