In [42]:
import re
from pathlib import Path
import nltk
from scipy.stats import pearsonr
# import jaro

from machine.annotations.range import Range
from machine.corpora import FileParatextProjectSettingsParser, UsfmFileText, UsfmStylesheet
from machine.tokenization import WhitespaceTokenizer, LatinWordTokenizer

class WhitespaceSlashTokenizer(WhitespaceTokenizer):
    def _is_whitespace(self, c: str) -> bool:
        return super()._is_whitespace(c) or c == "\\" or c == "*"

class LatinMarkerWordTokeinzer(LatinWordTokenizer):
    def _process_character(self, data, data_range, ctxt):
        if data[ctxt.index] == "\\":
            space_idx = data.index(" ", ctxt.index+1) if " " in data[ctxt.index+1:] else data_range.end
            slash_idx = data.index("\\", ctxt.index+1) if "\\" in data[ctxt.index+1:] else data_range.end
            star_idx = data.index("*", ctxt.index+1) if "*" in data[ctxt.index+1:] else data_range.end
            marker_end = min(space_idx, slash_idx, star_idx)
            if marker_end == star_idx: # asterisk is part of marker
                marker_end += 1

            token_ranges = (Range.create(ctxt.index, marker_end), None)
            ctxt.index = marker_end
            return token_ranges

        return super()._process_character(data, data_range, ctxt)

In [43]:
'''Code from https://github.com/richmilne/JaroWinkler/blob/master/jaro/jaro.py'''

def jaro(s1, s2):
    # s1 is always the shorter string
    if len(s2) < len(s1):
        s1, s2 = s2, s1

    if len(s1) == 0:
        print("empty sequence")
        print(s1)
        print(s2)
        return 0, 0

    search_range = max((len(s2) // 2) - 1, 0)
    matched1 = [0] * len(s1)
    matched2 = [0] * len(s2)
    num_matches = 0

    for i, char in enumerate(s1):
        for j in range(max(i - search_range, 0), min(i + search_range, len(s2) - 1) + 1):
            if not matched2[j] and char == s2[j]:
                matched1[i] = matched2[j] = 1
                num_matches += 1
                break
    
    if num_matches == 0:
        print("no matches")
        print(s1)
        print(s2)
        return 0, 0

    # number of matched tokens in s1 such that if it is the ith matched token, the the ith matched token in s2 (in linear order) is not what it was matched with
    # this number divided by 2 is the number of transpositions
    half_transposes = 0
    j = 0
    for i, matched in enumerate(matched1):
        if not matched:
            continue
        while not matched2[j]:
            j += 1
        if s1[i] != s2[j]:
            half_transposes += 1
        j += 1

    dist = (
        num_matches / len(s1)
        + num_matches / len(s2)
        + (num_matches - half_transposes // 2) / num_matches
        ) / 3
    return dist, half_transposes

In [44]:
'''
Levenshtein distance
raw score: num of insertions/substitutions/deletions/transpositions
'''
def levenshtein_metrics(gs_toks, ps_toks, num_markers):
    dists = [] # edit distance
    for gs, ps in zip(gs_toks, ps_toks):
        dist = nltk.edit_distance(gs, ps, transpositions=True)
        dists.append(dist)

    dists_per_10_tokens = [d * 10 / len(gs) for d,gs in zip(dists, gs_toks)]
    dists_per_marker = [d / n for d,n in zip(dists, num_markers)]
    return dists, dists_per_10_tokens, dists_per_marker

'''
Jaro similarity
raw score:
avg of:
1. % matches in s1 (m / len(s1))
2. % matches in s2 (m / len(s2))
3. % matches not transposed ((m-t)/m), t = # "half transposes" / 2

TODO: check symmetricality of Jaro similarity
'''
def jaro_metrics(gs_toks, ps_toks, num_markers):
    jaro_scores = []
    half_transposes = []
    for gs, ps in zip(gs_toks, ps_toks):
        score, hts = jaro(gs, ps)
        jaro_scores.append(score)
        half_transposes.append(hts)

    ht_per_tok = [t * 10 / len(g) for t,g in zip(half_transposes, gs_toks)]
    ht_per_mark = [t / n for t,n in zip(half_transposes, num_markers)]
    return jaro_scores, half_transposes, ht_per_tok, ht_per_mark

### Evaluation

In [45]:
# # gold sentences: constructed "goal" files, ideally based on source project but typically based on target project
# # predicted sentences: translated output, based on source project
# project_path = Path("test_S/Paratext/projects/aArpBT_2024_09_19")
# book = "MAT"
# chapters = {2,3,4}
# gold_file_path = Path("zzz_USFM_eval/aa_gold_standard/tpi_aps_41MATaArp_2_4.SFM")
# pred_file_path = Path("zzz_USFM_eval/tpi_aps/41MAT_hmm.SFM")

In [46]:
# settings = FileParatextProjectSettingsParser(project_path).parse()
# gold_file_text = UsfmFileText(
#     settings.stylesheet,
#     settings.encoding,
#     book,
#     gold_file_path,
#     settings.versification,
#     include_markers=True,
#     include_all_text=True,
#     project=settings.name,
# )
# pred_file_text = UsfmFileText(
#     settings.stylesheet,
#     settings.encoding,
#     book,
#     pred_file_path,
#     settings.versification,
#     include_markers=True,
#     include_all_text=True,
#     project=settings.name,
# )

# '''Preprocess'''
# # have to iterate over individually at first because gold standard file only contains the specific chapters
# ignore = ["r", "rem"]
# all_vrefs = []
# gold_sents = []
# for gs in gold_file_text:
#     if (len(gs.ref.path) > 0 and gs.ref.path[-1].name in ignore) or gs.ref.chapter_num not in chapters:
#         continue
#     all_vrefs.append(gs.ref)
#     gold_sents.append(gs.text)
# pred_sents = []
# pv = []
# for ps in pred_file_text:
#     if (len(ps.ref.path) > 0 and ps.ref.path[-1].name in ignore) or ps.ref.chapter_num not in chapters:
#         continue
#     pred_sents.append(ps.text)
#     pv.append(ps.ref)

# alts = f"({'|'.join(['f', 'x', 'fig', 'va', 'vp', 'ca'])})"
# embeds_re = re.compile(fr"\\{alts}.*?\\{alts}\*")

# # assume gold and pred have the exact same ScriptureRefs, as they should be based off the same original file
# tokenizer = WhitespaceSlashTokenizer() # handles 'word\w' case
# # tokenizer = LatinWordTokenizer()
# vrefs = []
# gold_sent_toks = []
# pred_sent_toks = []
# num_markers = [] # number of USFM markers at each vref
# for ref, gs, ps in zip(all_vrefs, gold_sents, pred_sents):
#     # discard embeds, as of now they will always be at the end so they will only inflate the metrics
#     gs = embeds_re.sub("", gs)
#     ps = embeds_re.sub("", ps)

#     if gs.count("\\") > 0:
#         vrefs.append(ref)
#         gold_sent_toks.append(list(tokenizer.tokenize(gs)))
#         pred_sent_toks.append(list(tokenizer.tokenize(ps)))
#         num_markers.append(gs.count("\\"))

# '''
# Levenshtein distance
# raw score: num of insertions/substitutions/deletions/transpositions
# '''
# dists = [] # edit distance
# for vref, gs, ps, num_mark in zip(vrefs, gold_sent_toks, pred_sent_toks, num_markers):
#     dist = nltk.edit_distance(gs, ps, transpositions=True)
#     dists.append(dist)
#     # print(vref, dist, num_mark)
#     # print(gs)
#     # print(ps)

# dists_per_10_tokens = [d * 10 / len(gs) for d,gs in zip(dists, gold_sent_toks)]
# dists_per_marker = [d / n for d,n in zip(dists, num_markers)]

# '''
# Jaro similarity
# raw score:
# avg of:
# 1. % matches in s1 (m / len(s1))
# 2. % matches in s2 (m / len(s2))
# 3. % matches not transposed ((m-t)/m), t = # "half transposes" / 2

# TODO: check symmetricality of Jaro similarity
# '''
# jaro_scores = []
# half_transposes = []
# for vref, gs, ps, num_mark in zip(vrefs, gold_sent_toks, pred_sent_toks, num_markers):
#     score, hts = jaro(gs, ps)
#     jaro_scores.append(score)
#     half_transposes.append(hts)
#     # print(score, half_transposes, num_mark, vref)
#     # print(gs)
#     # print(ps)

# ht_per_10_tokens = [t * 10 / len(g) for t,g in zip(half_transposes, gold_sent_toks)]
# ht_per_marker = [t / n for t,n in zip(half_transposes, num_markers)]


### Evaluation Evaluation
simplified code for constructed files

In [47]:
test_prefix = "23ISA_1_spa" # "41MAT_1_eng"
n = 5
# transpositions per verse, transpositions per marker
# [.92, .92, 1.69, 3.08, 15.62] [.48, .48, .88, 1.60, 8.12]
# [.27, 11.17, 1.73, 5.67, 12.67] [.08, 3.28, .51, 1.67, 3.73]
# human_eval_scores = [.27, 11.17, 1.73, 5.67, 12.67][:n]
human_eval_scores = [.08, 3.28, .51, 1.67, 3.73][:n]

# custom tokenizers handle markers
# tokenizer = WhitespaceSlashTokenizer()
# tokenizer = LatinMarkerWordTokeinzer()
tokenizer = None

gold_file_path = Path(f"zzz_USFM_eval_eval/{test_prefix}/{test_prefix}_gold.SFM")
gold_file_text = UsfmFileText("usfm.sty", "utf-8-sig", test_prefix[2:5], gold_file_path, include_markers=True, include_all_text=True)

pred_file_paths = [Path(f"zzz_USFM_eval_eval/{test_prefix}/{test_prefix}_pred_{i+1}.SFM") for i in range(n)]

sent_idxs = []
vrefs = []
markers = []
for i, gs in enumerate(gold_file_text):
    if gs.text.count("\\") > 0:
        sent_idxs.append(i)
        vrefs.append(gs.ref)
        markers.append(re.findall(r"\\[^\s\\\*]*\*?", gs.text)) # TODO: make sure this works for each gold file, will have to change for the other tokenizers

        if tokenizer and not isinstance(tokenizer, LatinMarkerWordTokeinzer):
            for j in range(len(markers[-1])):
                markers[-1][j] = markers[-1][j].strip("\\*")
num_markers = [len(m) for m in markers]

gold_sent_toks = []
for i, gs in enumerate(gold_file_text):
    if i in sent_idxs:
        if tokenizer:
            gold_sent_toks.append(list(tokenizer.tokenize(gs.text)))
        else:
            gold_sent_toks.append(gs.text)

# for sent in gold_sent_toks:
#     print(" ".join(sent))

'''get scores'''
edit_dists, edit_dists_per_10_tokens, edit_dists_per_marker = [], [], []
jaro_scores, half_transposes, hts_per_10_tokens, hts_per_marker = [], [], [], []
for pred_file_path in pred_file_paths:
    pred_file_text = UsfmFileText("usfm.sty", "utf-8-sig", test_prefix[2:5], pred_file_path, include_markers=True, include_all_text=True)
    pred_sent_toks = []
    for i, ps in enumerate(pred_file_text):
        if i in sent_idxs:
            if tokenizer:
                pred_sent_toks.append(list(tokenizer.tokenize(ps.text)))
            else:
                pred_sent_toks.append(ps.text)

    # TODO: this assumes that the markers will come in order, need to do something like find first instance of marker and cut from string, continue to search whole string
    # insert any missing markers at the end
    # if the prediction file leaves paragraphs at the end of a verse empty, they will not be returned by the parser
    for i in range(len(markers)):
        marker_idx = 0
        if tokenizer:
            for tok in pred_sent_toks[i]:
                if marker_idx >= len(markers[i]):
                    break
                marker_idx += tok == markers[i][marker_idx]
        else:
            pos = 0
            while pos < len(pred_sent_toks[i]):
                if marker_idx >= len(markers[i]):
                    break
                if markers[i][marker_idx] in pred_sent_toks[i][pos:]:
                    pos = pred_sent_toks[i].index(markers[i][marker_idx], pos) + len(markers[i][marker_idx])
                    marker_idx += 1
                else:
                    break
        for j in range(marker_idx, len(markers[i])):
            if tokenizer:
                pred_sent_toks[i].append(markers[i][j])
            else:
                pred_sent_toks[i] += " " + markers[i][j]
    
    # for sent in pred_sent_toks:
    #     print(" ".join(sent))

    dists, dists_per_10_tokens, dists_per_marker = levenshtein_metrics(gold_sent_toks, pred_sent_toks, num_markers)
    edit_dists.append(dists)
    edit_dists_per_10_tokens.append(dists_per_10_tokens)
    edit_dists_per_marker.append(dists_per_marker)

    scores, hts, ht_per_tok, ht_per_mark = jaro_metrics(gold_sent_toks, pred_sent_toks, num_markers)
    jaro_scores.append(scores)
    half_transposes.append(hts)
    hts_per_10_tokens.append(ht_per_tok)
    hts_per_marker.append(ht_per_mark)

'''calculate summary stats'''
avg_dist = [sum(ed) / len(vrefs) for ed in edit_dists]
avg_dist_per_10_tokens = [sum(edpt) / len(vrefs) for edpt in edit_dists_per_10_tokens]
overall_avg_dist_per_10_tokens = [sum(ed) * 10 / sum([len(gs) for gs in gold_sent_toks]) for ed in edit_dists]
avg_dist_per_marker = [sum(edpm) / len(vrefs) for edpm in edit_dists_per_marker]
overall_avg_dist_per_marker = [sum(ed) / sum(num_markers) for ed in edit_dists]

avg_jaro = [sum(js) / len(vrefs) for js in jaro_scores]
avg_transposes = [sum(hts) / len(vrefs) for hts in half_transposes]
avg_ts_per_10_tokens = [sum(htpt) / len(vrefs) for htpt in hts_per_10_tokens]
overall_avg_ts_per_10_tokens = [sum(hts) * 10 / sum([len(g) for g in gold_sent_toks]) for hts in half_transposes]
avg_ts_per_marker = [sum(htpm) / len(vrefs) for htpm in hts_per_marker]
overall_avg_ts_per_marker = [sum(hts) / sum(num_markers) for hts in half_transposes]

'''calculate Pearson's correlation coefficient'''
res = pearsonr(human_eval_scores, avg_dist)
avg_dist_pearson = res.statistic
avg_dist_p = res.pvalue

res = pearsonr(human_eval_scores, avg_dist_per_10_tokens)
avg_dist_per_10_tokens_pearson = res.statistic
avg_dist_per_10_tokens_p = res.pvalue

res = pearsonr(human_eval_scores, overall_avg_dist_per_10_tokens)
overall_avg_dist_per_10_tokens_pearson = res.statistic
overall_avg_dist_per_10_tokens_p = res.pvalue

res = pearsonr(human_eval_scores, avg_dist_per_marker)
avg_dist_per_marker_pearson = res.statistic
avg_dist_per_marker_p = res.pvalue

res = pearsonr(human_eval_scores, overall_avg_dist_per_marker)
overall_avg_dist_per_marker_pearson = res.statistic
overall_avg_dist_per_marker_p = res.pvalue

res = pearsonr(human_eval_scores, avg_jaro)
avg_jaro_pearson = res.statistic
avg_jaro_p = res.pvalue

res = pearsonr(human_eval_scores, avg_transposes)
avg_transposes_pearson = res.statistic
avg_transposes_p = res.pvalue

res = pearsonr(human_eval_scores, avg_ts_per_10_tokens)
avg_ts_per_10_tokens_pearson = res.statistic
avg_ts_per_10_tokens_p = res.pvalue

res = pearsonr(human_eval_scores, overall_avg_ts_per_10_tokens)
overall_avg_ts_per_10_tokens_pearson = res.statistic
overall_avg_ts_per_10_tokens_p = res.pvalue

res = pearsonr(human_eval_scores, avg_ts_per_marker)
avg_ts_per_marker_pearson = res.statistic
avg_ts_per_marker_p = res.pvalue

res = pearsonr(human_eval_scores, overall_avg_ts_per_marker)
overall_avg_ts_per_marker_pearson = res.statistic
overall_avg_ts_per_marker_p = res.pvalue


In [48]:
print("number of markers", num_markers)
print("sent lengths", [len(gs) for gs in gold_sent_toks])
print("avg markers", sum(num_markers) / len(vrefs))
print("avg sent len", sum([len(gs) for gs in gold_sent_toks]) / len(vrefs))
print("all markers", markers)
print("\n")

print("Edit distance")
print("edit distance")
for ed in edit_dists:
    print(ed)
print("dist per 10 tokens")
for edpt in edit_dists_per_10_tokens:
    print(edpt)
print("dist per marker")
for edpm in edit_dists_per_marker:
    print(edpm)
print("\n")

print("Jaro similarity")
print("jaro")
for js in jaro_scores:
    print(js)
print("half transposes")
for hts in half_transposes:
    print(hts)
print("transposes per 10 tokens")
for htpt in hts_per_10_tokens:
    print(htpt)
print("transposes per marker")
for htpm in hts_per_marker:
    print(htpm)
print("\n")

print("avg dist \
      avg dist per 10 tokens \
      overall avg dist per 10 tokens \
      avg dist per marker \
      overall avg dist per marker")
for i in range(len(pred_file_paths)):
    print(f"{avg_dist[i]}\t \
        {avg_dist_per_10_tokens[i]}\t \
        {overall_avg_dist_per_10_tokens[i]}\t \
        {avg_dist_per_marker[i]}\t \
        {overall_avg_dist_per_marker[i]}")
print("\n")

print("avg jaro \
      avg transposes \
      avg t's per 10 tokens \
      overall avg t's per 10 tokens \
      avg t's per marker \
      overall avg t's per marker")
for i in range(len(pred_file_paths)):
    print(f"{avg_jaro[i]}\t \
        {avg_transposes[i]}\t \
        {avg_ts_per_10_tokens[i]}\t \
        {overall_avg_ts_per_10_tokens[i]}\t \
        {avg_ts_per_marker[i]}\t \
        {overall_avg_ts_per_marker[i]}")
print("\n")

print("summary stats")
for i in range(len(pred_file_paths)):
    print(f"{avg_dist[i]}\t \
        {avg_dist_per_10_tokens[i]}\t \
        {overall_avg_dist_per_10_tokens[i]}\t \
        {avg_dist_per_marker[i]}\t \
        {overall_avg_dist_per_marker[i]}\t \
        {avg_jaro[i]}\t \
        {avg_transposes[i]}\t \
        {avg_ts_per_10_tokens[i]}\t \
        {overall_avg_ts_per_10_tokens[i]}\t \
        {avg_ts_per_marker[i]}\t \
        {overall_avg_ts_per_marker[i]}")

print("Pearson correlation coefficients and p-values")
print(f"{avg_dist_pearson}\t \
      {avg_dist_per_10_tokens_pearson}\t \
      {overall_avg_dist_per_10_tokens_pearson}\t \
      {avg_dist_per_marker_pearson}\t \
      {overall_avg_dist_per_marker_pearson}\t \
      {avg_jaro_pearson}\t \
      {avg_transposes_pearson}\t \
      {avg_ts_per_10_tokens_pearson}\t \
      {overall_avg_ts_per_10_tokens_pearson}\t \
      {avg_ts_per_marker_pearson}\t \
      {overall_avg_ts_per_marker_pearson}")
print(f"{avg_dist_p}\t \
      {avg_dist_per_10_tokens_p}\t \
      {overall_avg_dist_per_10_tokens_p}\t \
      {avg_dist_per_marker_p}\t \
      {overall_avg_dist_per_marker_p}\t \
      {avg_jaro_p}\t \
      {avg_transposes_p}\t \
      {avg_ts_per_10_tokens_p}\t \
      {overall_avg_ts_per_10_tokens_p}\t \
      {avg_ts_per_marker_p}\t \
      {overall_avg_ts_per_marker_p}")

number of markers [5, 3, 8, 3, 4, 5, 3, 4, 4, 6, 1, 4, 2, 4, 2, 4, 7, 1, 4, 4, 1, 5, 5, 2, 3, 1, 1, 2, 1, 3]
sent lengths [139, 139, 213, 159, 195, 235, 168, 132, 135, 211, 90, 206, 114, 181, 87, 165, 242, 73, 112, 176, 82, 208, 152, 121, 178, 94, 81, 110, 84, 136]
avg markers 3.4
avg sent len 147.26666666666668
all markers [['\\q1', '\\nd', '\\nd*', '\\q1', '\\q1'], ['\\q1', '\\q1', '\\q1'], ['\\q1', '\\q1', '\\q1', '\\q1', '\\nd', '\\nd*', '\\q1', '\\q1'], ['\\q1', '\\q1', '\\q1'], ['\\q1', '\\q1', '\\q1', '\\q1'], ['\\q1', '\\q1', '\\q1', '\\q1', '\\q1'], ['\\q1', '\\q1', '\\q1'], ['\\nd', '\\nd*', '\\q1', '\\q1'], ['\\nd', '\\nd*', '\\q1', '\\q1'], ['\\nd', '\\nd*', '\\q1', '\\q1', '\\q1', '\\q1'], ['\\q1'], ['\\q1', '\\q1', '\\q1', '\\q1'], ['\\q1', '\\q1'], ['\\q1', '\\q1', '\\q1', '\\q1'], ['\\q1', '\\q1'], ['\\q1', '\\q1', '\\q1', '\\q1'], ['\\nd', '\\nd*', '\\q1', '\\q1', '\\q1', '\\q1', '\\q1'], ['\\q1'], ['\\q1', '\\q1', '\\nd', '\\nd*'], ['\\q1', '\\q1', '\\q1', '\\q1'], ['

In [55]:
human_eval_scores = [.48, .48, .88, 1.60, 8.12, .08, 3.28, .51, 1.67, 3.73]
scores = """9.2308	9.3846	1.5385	10.3846	19.6154	2.1333	6.9667	3.6000	2.6667	7.2333
0.8309	0.8925	0.1194	0.9910	1.8781	0.1367	0.4266	0.2253	0.1618	0.5405
0.8721	0.8866	0.1453	0.9811	1.8532	0.1449	0.4731	0.2445	0.1811	0.4912
4.6154	4.6923	0.7692	5.1923	9.8077	0.4225	1.3375	0.6977	0.6700	2.9867
4.6154	4.6923	0.7692	5.1923	9.8077	0.6275	2.0490	1.0588	0.7843	2.1275
0.9830	0.9823	0.9867	0.9442	0.8762	0.9977	0.9626	0.9881	0.9730	0.9464
11.4615	11.6923	10.3846	34.7692	63.7692	2.4000	31.5333	10.5333	28.7000	42.5333
1.0423	1.1170	0.8018	3.3012	6.0543	0.1538	1.8318	0.6877	1.5918	3.0442
1.0828	1.1047	0.9811	3.2849	6.0247	0.1630	2.1412	0.7153	1.9488	2.8882
5.7308	5.8462	5.1923	17.3846	31.8846	0.4754	5.8596	2.0940	6.3886	16.5456
5.7308	5.8462	5.1923	17.3846	31.8846	0.7059	9.2745	3.0980	8.4412	12.5098"""
scores = [[float(n) for n in line.split("\t")] for line in scores.split("\n")]
print(scores)

correlations = []
p_values = []
for stat in scores:
    res = pearsonr(human_eval_scores, stat)
    correlations.append(res.statistic)
    p_values.append(res.pvalue)
print("\t".join([str(n) for n in correlations]))
print("\t".join([str(n) for n in p_values]))

[[9.2308, 9.3846, 1.5385, 10.3846, 19.6154, 2.1333, 6.9667, 3.6, 2.6667, 7.2333], [0.8309, 0.8925, 0.1194, 0.991, 1.8781, 0.1367, 0.4266, 0.2253, 0.1618, 0.5405], [0.8721, 0.8866, 0.1453, 0.9811, 1.8532, 0.1449, 0.4731, 0.2445, 0.1811, 0.4912], [4.6154, 4.6923, 0.7692, 5.1923, 9.8077, 0.4225, 1.3375, 0.6977, 0.67, 2.9867], [4.6154, 4.6923, 0.7692, 5.1923, 9.8077, 0.6275, 2.049, 1.0588, 0.7843, 2.1275], [0.983, 0.9823, 0.9867, 0.9442, 0.8762, 0.9977, 0.9626, 0.9881, 0.973, 0.9464], [11.4615, 11.6923, 10.3846, 34.7692, 63.7692, 2.4, 31.5333, 10.5333, 28.7, 42.5333], [1.0423, 1.117, 0.8018, 3.3012, 6.0543, 0.1538, 1.8318, 0.6877, 1.5918, 3.0442], [1.0828, 1.1047, 0.9811, 3.2849, 6.0247, 0.163, 2.1412, 0.7153, 1.9488, 2.8882], [5.7308, 5.8462, 5.1923, 17.3846, 31.8846, 0.4754, 5.8596, 2.094, 6.3886, 16.5456], [5.7308, 5.8462, 5.1923, 17.3846, 31.8846, 0.7059, 9.2745, 3.098, 8.4412, 12.5098]]
0.7604658627428915	0.7123505863970165	0.7027875566472089	0.6798388141678817	0.673997650177436	-0.95