In [13]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from compare_genes import getScores
from xmlparse import loadRef, loadRef2, loadGeometryBases, getXmlScore, minXml
from score_strokes import alignStrokes
from exhaustive import computeExhaustive, exhaustScore, exhaustScoreAlignment

## Scoring functions

In [14]:
# Obtaining scores through heuristic algorithm without getScores
def heuristicScores(algo, ref_char, char_data):
    ref_g, ref_l, output_size = loadRef2(ref_char, "Reference/7-stroke_characters")
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    heuristic_scores = []
    for (gl, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
        g, l = gl
        heuristic_alignment = np.array(algo(g, ref_g, l, ref_l))+1
        heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
        heuristic_score = getXmlScore(heuristic_xml)
        heuristic_scores.append(heuristic_score)
    return heuristic_scores

# Obtaining scores through heuristic algorithm with getScores
def heuristicScoresShort(algo, ref_char, data_dir):
    heuristic_scores, _, marks = getScores(algo, ref_char, data_dir)
    return heuristic_scores, marks
#the below function doesn't work properly because scores doesn't have the same number of elements as exhaustive_scores. Anisa has a more-working
#version of this code, for code that actually checks multiple archetypes
#However, it's supposed to use heuristicScores to generate scores with all of the archetypes in ref_dir, then find the greatest one for that character
#and return it
def heuristicScoresWrap(algo, ref_dir, char_data):
    max_score = -1
    ref_directs = os.listdir(f"{ref_dir}")
    ref_directs.sort()
    for ref_direct in ref_directs:
        ref_list = os.listdir(f"{ref_dir}/{ref_direct}")
        ref_list.sort()
        
        for ref_char in ref_list:
            print(ref_char)
            ref_charShort = ref_char[:4:]
            print(ref_charShort)
            score = heuristicScores(algo, ref_charShort, char_data)
            scores = []
            for i in score:
                if (i > max_score):
                    max_score = i
            scores.append(max_score)
    return scores
    
# Obtaining scores through exhaustive search
def exhaustiveScores(ref_char, char_data, data_dir):
    ref_g, ref_l, output_size = loadRef2(ref_char, "Reference/7-stroke_characters")
    g_data, han_chars, base_data, _, _, f_names = char_data
    exhaustive_scores = []
    for (gl, han_char, bases, f_name) in zip(g_data, han_chars, base_data, f_names):
        g, l = gl
        original_score = exhaustScore(ref_char, f_name, data_dir, force_refresh=True, save=False)
        exhaustive_scores.append(original_score)
    return exhaustive_scores
    

## Loading genes from the directory and storing scores
This will compare all of the genes in "Genes/sixgenes/test" with all of the archetypes in "Reference/6-stroke_characters"

all_scores is a 2-d list that holds all of the score numbers. Unfortunately, each han character is a row instead of a column

ref_chars holds all of the han character gene names

### Note about loadRef2!
Holiday's original code didn't include loadRef2! I made it. It's the exact same as loadRef, except the ref_path doesn't include the {han_char[0]}000 part of it. I did this so I wouldn't have to make all of those files in my test folder.

In [None]:
all_scores = []
data_dir = "Genes/sevengenes/maint_0.05 on 4F4D.01"
ref_dir = "Reference/7-stroke_characters"
ref_chars = os.listdir(ref_dir)
for char in ref_chars:
    han_char = char[:4:]
    ref_g, ref_l, output_size = loadRef2(han_char, ref_dir)
    char_data = loadGeometryBases(data_dir, output_size)
    f_names = []
    f_names = os.listdir(data_dir)
    f_names.sort()
    exhaustive_scores = exhaustiveScores(han_char, char_data, data_dir)
    all_scores.append(exhaustive_scores)

## Generating a table

In [None]:
xml_dir = "GenXml"
gene_names = []
#gene_names = [f_name.split(".")[0] for (i, f_name) in enumerate(f_names)]
for name in f_names:
    x = len(name) - 5
    gene_names.append(name[:x])
all_scores_t = np.array(all_scores).T
#below code makes a label for which gene is which
genes = pd.Series(f_names, name="Genes")
frame = {}
frame = pd.DataFrame.from_records(all_scores_t, columns=ref_chars)
#this allows the label for the genes to be added to the data frame
result = pd.concat([genes, frame], axis=1)
table = pd.DataFrame(result)
table.to_csv('ExhaustiveTable.csv', index = "false")
