# Heuristic V1

In [1]:
import pickle
import numpy as np
import StrokeDef
import xmlparse
import xmltodict
import score_strokes

from StrokeDef import loadStrokeDict
from xmlparse import loadGeometryBases
from score_strokes import strokeError, strokeErrorScaled

"""

"""

'\n\n'

## Comparing Gene Strokes to Archetypal Strokes

In [29]:
archetypal_strokes = loadStrokeDict()
#for key in archetypal_strokes:
    #print(archetypal_strokes[key].arial)

"""

Parameters:
    g_stroke_list - list of strokes contained for a given gene file
    
    g_percent_list - list of stroke percentage progresses (corresponding to g_stroke_list) for a given gene file

Return Value:
    total_array - an array of 55 elements, containing the total percentages for each stroke type in the entire gene
    
    percent_arrays - a 2D array with the percentages for each individual stroke (dimensions are number of strokes x 55)
"""
def getGeneStrokePercentages(g_stroke_list, g_percent_list):
    total_array = np.zeros(55)
    percent_arrays = []
    stroke_count = 0
    for g_points, g_frac in zip(g_stroke_list, g_percent_list):
        percent_arrays.append(getStrokePercentage(g_points, g_frac))
        total_array = total_array+percent_arrays[stroke_count]
        stroke_count+1
    return total_array, percent_arrays



"""
Gives a list of scores for how well a given gene stroke matches each of the archetypal strokes

Parameters:
    g_points - a list of the points in the gene stroke
    g_frac_prog - a list of fractional progress of each point in the gene stroke
    
Return Value:
    strokePercentages - an array containing 55 elements, each representing how well geneStroke
    compares to each of the archetypal strokes
"""
def getStrokePercentage(g_points, g_frac_prog):
    
    errors = [] # need to convert errors to scores, what is maximum error?  
    for key in archetypal_strokes:
        stroke_arial = archetypal_strokes[key].arial
        points = convertToPointList(stroke_arial)
        frac_prog = getFractionalProgress(points)
        error = strokeErrorScaled(g_points, points, g_frac_prog, frac_prog)
        if error == 0:
            error = 1
        else:
            error = 1.0/error
        errors.append(error)
    errors = errors/sum(errors)
    return errors    
    

"""
Parameters:
    stroke_arial - arial data from an archetypal stroke to be converted
    
Return Value:
    point_list - an array of pairs, stroke_arial converted to the point list format of a gene stroke
"""
def convertToPointList(stroke_arial):
    point_list = []
    for segment in stroke_arial:
        for point in segment:
            point_list.append(point)
    return point_list

def getFractionalProgress(point_list):
    dists = [0]
    for i in range(len(point_list)-1):
        dists.append(np.linalg.norm((point_list[i]-point_list[i+1]))+dists[-1])
    dists = np.array(dists)
    dists /= dists.max()
    return dists

"""
"""


"""
*Might already be a pre-existing function for this*
Makes it so that the elements of a given list sum to 1,
by dividing each element by the sum of all the elements

Parameters:
    list - list to be manipulated

Return value
    stand_list - list whose elements sum to 1
"""
#def sumToOne()

'\n*Might already be a pre-existing function for this*\nMakes it so that the elements of a given list sum to 1,\nby dividing each element by the sum of all the elements\n\nParameters:\n    list - list to be manipulated\n\nReturn value\n    stand_list - list whose elements sum to 1\n'

## Comparing Gene Stroke Percentage to Archetype Stroke Counts

In [3]:
"""
Compares the stroke percentages of a gene to the stroke counts of every archetype in order to
find the top 100 archetypes that most closely resemble the gene

Parameters:
    geneStrokePercents - an array containing the total percentages
    corresponding to the number of archetypal strokes present in the gene
    
    numStrokes - number of strokes contained in the gene

Return Value:
    archetype_matches - a list of the top 50 archetypes whose stroke counts most closely match the gene's stroke percentages

"""
def strokeCountComparison()

SyntaxError: invalid syntax (<ipython-input-3-a928a5df63e4>, line 15)

## Check Archetypes for Highest Score

In [None]:
"""
IDEAS:
"""

In [None]:
# Test archetypal stroke data
"""
stroke_name = "HZZZG"
archetypal_strokes = loadStrokeDict()
stroke_arial = archetypal_strokes[stroke_name].arial
print(stroke_arial)
points = convertToPointList(stroke_arial)
frac_prog = getFractionalProgress(stroke_arial)
print("frac_prog")
print(frac_prog)
"""

In [None]:
# Test getStrokePercentage()
"""
data_dir = "NewGenes"
char_data = loadGeometryBases(data_dir)
g_data, _, base_data, stroke_sets, _, f_names = char_data
p_list = []
for (gl, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
        g, l = gl
        print()
        print(l)
        print()
        for g_points, g_frac in zip(g, l):
            print("g_points")
            print(len(g_points))
            print(g_points)
            print()
            print(g_frac)
            p_list.append(getStrokePercentage(g_points, g_frac))
print()
print()
print("Percentages:")
print(p_list)
"""

In [31]:
# Test getGeneStrokePercentages()
data_dir = "NewGenes"
char_data = loadGeometryBases(data_dir)
g_data, _, base_data, stroke_sets, _, f_names = char_data
p_list = []
for (gl, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
    g, l = gl
    x, y = getGeneStrokePercentages(g, l)
    print(x)
    print(len(x))
    print(sum(x))
    print(y)
    print(len(y[0]))
    print(sum(y[0]))
    print()

[0.14291234 0.14467698 0.11914005 0.06325556 0.14503249 0.09346554
 0.14633575 0.15315813 0.10389927 0.08937267 0.09514722 0.06656137
 0.04954568 0.15096735 0.09402698 0.08688239 0.11849254 0.09525513
 0.14549774 0.08503969 0.14503249 0.14527691 0.152563   0.13396347
 0.10230695 0.06194516 0.05215552 0.08511885 0.07607016 0.15053549
 0.14815334 0.10108321 0.10819493 0.13959778 0.07342484 0.1521663
 0.10105501 0.04955018 0.11496798 0.14860694 0.14533625 0.07176152
 0.10557041 0.09315017 0.14215939 0.07621142 0.1494263  0.06245163
 0.14633575 0.13181465 0.0678743  0.10493372 0.06295239 0.05674563
 0.1528431 ]
55
6.0
[array([0.02381872, 0.02411283, 0.01985667, 0.01054259, 0.02417208,
       0.01557759, 0.02438929, 0.02552635, 0.01731654, 0.01489544,
       0.01585787, 0.01109356, 0.00825761, 0.02516122, 0.01567116,
       0.0144804 , 0.01974876, 0.01587585, 0.02424962, 0.01417328,
       0.02417208, 0.02421282, 0.02542717, 0.02232725, 0.01705116,
       0.01032419, 0.00869259, 0.01418647,

[0.12101764 0.12018445 0.12505135 0.07488705 0.12011534 0.12504836
 0.12319385 0.12669499 0.12632719 0.11249308 0.12707299 0.08601574
 0.05182106 0.12450373 0.10840296 0.10678123 0.12345312 0.11736294
 0.1226644  0.10074742 0.12011534 0.12036653 0.12703311 0.12454676
 0.12656359 0.06047144 0.06015088 0.11163608 0.08147416 0.1256853
 0.12286755 0.12552063 0.12397575 0.11990543 0.08493876 0.12637725
 0.12229687 0.05226655 0.11674278 0.12383262 0.12083072 0.08943704
 0.11635928 0.12118062 0.12647788 0.08898444 0.12396234 0.08274426
 0.12319385 0.12406499 0.08303141 0.12146292 0.07004742 0.06125303
 0.1263635 ]
55
6.000000000000001
[array([0.02016961, 0.02003074, 0.02084189, 0.01248118, 0.02001922,
       0.02084139, 0.02053231, 0.02111583, 0.02105453, 0.01874885,
       0.02117883, 0.01433596, 0.00863684, 0.02075062, 0.01806716,
       0.01779687, 0.02057552, 0.01956049, 0.02044407, 0.01679124,
       0.02001922, 0.02006109, 0.02117218, 0.02075779, 0.02109393,
       0.01007857, 0.0100251

[0.14491911 0.1468426  0.112576   0.06338206 0.14773701 0.08922594
 0.14106647 0.13847835 0.11151433 0.09655459 0.09482879 0.06642474
 0.05612798 0.14094583 0.09557799 0.09170903 0.11236166 0.10091547
 0.14197998 0.10503858 0.14773701 0.14878487 0.13824248 0.14403686
 0.1178741  0.04975739 0.04924317 0.0894509  0.06962916 0.14055505
 0.14396464 0.09579494 0.10056084 0.14488387 0.07824148 0.13920045
 0.11155826 0.0515101  0.12598308 0.14473177 0.14676891 0.08522225
 0.12670365 0.09027528 0.13615198 0.08030296 0.14342062 0.06208814
 0.14106647 0.13137461 0.06088741 0.09866886 0.07002576 0.05880212
 0.13829404]
55
5.999999999999997
[array([0.02415319, 0.02447377, 0.01876267, 0.01056368, 0.02462283,
       0.01487099, 0.02351108, 0.02307973, 0.01858572, 0.01609243,
       0.0158048 , 0.01107079, 0.00935466, 0.02349097, 0.01592966,
       0.01528484, 0.01872694, 0.01681924, 0.02366333, 0.01750643,
       0.02462283, 0.02479748, 0.02304041, 0.02400614, 0.01964568,
       0.0082929 , 0.008207