In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from compare_genes import getScores
from xmlparse import loadRef, loadGeometryBases, getXmlScore, minXml
from score_strokes import alignStrokes
from exhaustive import computeExhaustive, exhaustScore, exhaustScoreAlignment

2024-06-03T19:28:02.193999Z [INFO ] Stylus initialized - Stylus 1.5.0 [RELEASE - May 21 2024 14:06:24] (c) 2006-2009 Biologic Institute


## Scoring functions

In [2]:
# Obtaining scores through heuristic algorithm without getScores
def heuristicScores(algo, ref_char, char_data):
    ref_g, ref_l, output_size = loadRef(ref_char, "Reference")
    g_data, _, base_data, stroke_sets, _, f_names = char_data
    heuristic_scores = []
    for (gl, bases, stroke_set, f_name) in zip(g_data, base_data, stroke_sets, f_names):
        g, l = gl
        heuristic_alignment = np.array(algo(g, ref_g, l, ref_l))+1
        heuristic_xml = minXml(ref_char, bases, stroke_set, heuristic_alignment)
        heuristic_score = getXmlScore(heuristic_xml)
        heuristic_scores.append(heuristic_score)
    return heuristic_scores

# Obtaining scores through heuristic algorithm with getScores
def heuristicScoresShort(algo, ref_char, data_dir):
    heuristic_scores, _, marks = getScores(algo, ref_char, data_dir)
    return heuristic_scores, marks

# Obtaining scores through exhaustive search
def exhaustiveScores(ref_char, char_data, data_dir):
    g_data, han_chars, base_data, _, _, f_names = char_data
    exhaustive_scores = []
    for (gl, han_char, bases, f_name) in zip(g_data, han_chars, base_data, f_names):
        g, l = gl
        original_score = exhaustScore(ref_char, f_name, data_dir, force_refresh=True, save=False)
        exhaustive_scores.append(original_score)
    return exhaustive_scores
    

## Loading genes from a new directory

In [3]:
data_dir = "NewGenes" # directory storing gene files
han_char = "6709" # archetype character to be referenced against (?)

_, _, output_size = loadRef(han_char, "Reference")
char_data = loadGeometryBases(data_dir, output_size)
marks = []
f_names = os.listdir(data_dir)
f_names.sort()
#heuristic_scores = heuristicScores(alignStrokes, han_char, char_data)
heuristic_scores, marks = heuristicScoresShort(alignStrokes, han_char, data_dir)
exhaustive_scores = exhaustiveScores(han_char, char_data, data_dir)

## Generating a table

In [4]:
#gene_names = [f_name.split(".")[0] for (i, f_name) in enumerate(f_names)]
gene_names = f_names
frame = {}
if marks != []:
    frame = {"Genes": gene_names, "Heuristic Scores": heuristic_scores, "Exhaustive Scores": exhaustive_scores, "Mark": marks}
else:
    frame = {"Genes": gene_names, "Heuristic Scores": heuristic_scores, "Exhaustive Scores": exhaustive_scores}
table = pd.DataFrame(frame)
table

Unnamed: 0,Genes,Heuristic Scores,Exhaustive Scores,Mark
0,0_4EFB.2.1.gene,0.000103,0.000749,False
1,4EFB.2.1.gene,0.000103,0.000749,False
2,56E0.2.10.gene,0.001819,0.024755,False
