In [1]:
import re
import json
import stanza
import argparse

import numpy as np
import pandas as pd

from itertools import chain
from ast import literal_eval
from collections import defaultdict

from functions_score_section import read_alignments, score_vec_rslts_chapter_level, \
build__src_2_tgt_dict, build_tgt_2_src_dict, score_fr_sents

In [2]:
# get vecalign results
vec_rslts_path = "/home/craig.car/repos/chiron/align_texts_project/data/lucretius/lucretius_en1893_vecrslts"
vec_rslts = read_alignments(vec_rslts_path)

In [3]:
lat_dict_path = "/home/craig.car/repos/chiron/align_texts_project/data/lucretius/lat_sent2book_dict.json"

with open(lat_dict_path) as f:
    lat_sent2book_name = json.load(f)

en1893_dict_path = "/home/craig.car/repos/chiron/align_texts_project/data/lucretius/en1893_sent2section_dict.json"

with open(en1893_dict_path) as f:
    en1893_sent2section_name = json.load(f)
    

In [56]:
# get peritext sections in en1893
section_names = []
for item in list(en1893_sent2section_name.values()):
    if isinstance(item, str):
        section_names.append(item)
    else:
        section_names.extend(item)

In [57]:
section_names = np.unique(np.array(extraneous_sections))

In [58]:
section_names

array(['forewordbook0', 'forewordbook0note', 'forewordbook0title',
       'index', 'metric_translationbook0title', 'metric_translationbook1',
       'metric_translationbook1note', 'metric_translationbook1title',
       'metric_translationbook2', 'metric_translationbook2note',
       'metric_translationbook2title', 'metric_translationbook3',
       'metric_translationbook3note', 'metric_translationbook3title',
       'metric_translationbook4', 'metric_translationbook4note',
       'metric_translationbook4title', 'metric_translationbook5',
       'metric_translationbook5note', 'metric_translationbook5title',
       'metric_translationbook6', 'metric_translationbook6note',
       'metric_translationbook6title', 'prose_translationbook0title',
       'prose_translationbook1', 'prose_translationbook1commentary',
       'prose_translationbook1note', 'prose_translationbook1title',
       'prose_translationbook2', 'prose_translationbook2commentary',
       'prose_translationbook2note', 'prose_t

In [60]:
"test1"[-2].isdigit()

False

In [61]:
extraneous_sections = []
for item in section_names:
    if item[-1].isdigit():
        continue
    else:
        extraneous_sections.append(item)
extraneous_sections

['forewordbook0note',
 'forewordbook0title',
 'index',
 'metric_translationbook0title',
 'metric_translationbook1note',
 'metric_translationbook1title',
 'metric_translationbook2note',
 'metric_translationbook2title',
 'metric_translationbook3note',
 'metric_translationbook3title',
 'metric_translationbook4note',
 'metric_translationbook4title',
 'metric_translationbook5note',
 'metric_translationbook5title',
 'metric_translationbook6note',
 'metric_translationbook6title',
 'prose_translationbook0title',
 'prose_translationbook1commentary',
 'prose_translationbook1note',
 'prose_translationbook1title',
 'prose_translationbook2commentary',
 'prose_translationbook2note',
 'prose_translationbook2title',
 'prose_translationbook3commentary',
 'prose_translationbook3note',
 'prose_translationbook3title',
 'prose_translationbook4commentary',
 'prose_translationbook4note',
 'prose_translationbook4title',
 'prose_translationbook5commentary',
 'prose_translationbook5note',
 'prose_translationboo

# Score vecalign results: by prediction

In [62]:
def score_vec_rslts_chapter_level(vr_rslts_lst, el_sent2section_dict,
                                 fr_sent2section_dict, fr_extra_section_names):

    tp_strict = 0 # +1 per alignment if there's an exact match
    tp_lax = 0 # +1 per alignment if there's any overlap
    overlaps = []
    errors = []
    correct_nulls = 0
    correct_text2text = {}

    for idx_align, alignment in enumerate(vr_rslts_lst):
        # skip alignments null on both sides
        if alignment == ([],[]):
            continue
        else:
            src_sents = alignment[0]
            tgt_sents = alignment[1]
            # get set of chapters from src, then from tgt
            chapters_from_src = set()
            chapters_from_tgt = set()
            # if alignment is null on src side, then chapters_from_src remains empty set
            if src_sents != []:
                for src_id in src_sents:
                    if isinstance(el_sent2section_dict[str(src_id)], list):
                        for section_name in el_sent2section_dict[str(src_id)]:
                            chapters_from_src.add(section_name)
                    else:
                        chapters_from_src.add(el_sent2section_dict[str(src_id)])
            # if alignment is null on tgt side, then chapters_from_tgt remains empty set
            if tgt_sents != []:
                for tgt_id in tgt_sents:
                    if isinstance(fr_sent2section_dict[str(tgt_id)], list):
                        for section_name_ in fr_sent2section_dict[str(tgt_id)]:
                            chapters_from_tgt.add(section_name_)
                    else:
                        chapters_from_tgt.add(fr_sent2section_dict[str(tgt_id)])

            # compare the sets: if text, then last 5 characters in format "book[1-6]"
            src_chaps_compare = set()
            for src_chap in chapters_from_src:
                src_chaps_compare.add(src_chap[-5:])
            tgt_chaps_compare = set()
            for tgt_chap in chapters_from_tgt:
                tgt_chaps_compare.add(tgt_chap[-5:])       
            if src_chaps_compare == tgt_chaps_compare:
                tp_strict += 1
                # for correct text2text aligns, en is from prose or metric translation?
                correct_text2text[str(alignment)] = chapters_from_tgt

            # account for correct null : fr extraneous sections 
            elif chapters_from_src == set():
                tgt_counter = 0
                for chapter in chapters_from_tgt:
                    if chapter in fr_extra_section_names:
                        tgt_counter += 1
                # tp_strict if all tgt chapters are extraneous
                if tgt_counter == len(chapters_from_tgt):
                    # tp_strict += 1
                    correct_nulls += 1

            else:
                overlap = src_chaps_compare.intersection(tgt_chaps_compare)
                if len(overlap) != 0:
                    tp_lax += 1
                    overlaps.append(alignment)
                else:
                    # save errors
                    error_dict = {}
                    error_dict["alignment"] = alignment
                    error_dict["alignmnent_idx"] = idx_align
                    error_dict["src_chapters"] = chapters_from_src
                    error_dict["tgt_chapters"] = chapters_from_tgt
                    errors.append(error_dict)
        
    return tp_strict, tp_lax, overlaps, errors, correct_nulls, correct_text2text


In [63]:
test = {}
test[0] = [1,3]

In [64]:
lat_sent2book_name["789"][-5:]

'book3'

In [65]:
en1893_sent2section_name["423"][-5:] == lat_sent2book_name["0"][-5:]

True

In [75]:
tp_strict_, tp_lax_, overlaps_, errors_, correct_nulls_, correct_text2text_ = score_vec_rslts_chapter_level(
    vec_rslts, lat_sent2book_name, en1893_sent2section_name, extraneous_sections)

In [78]:
print(tp_strict_)
print(tp_lax_)
print(len(errors_))
print(correct_nulls_)

1700
455
233
3201


In [68]:
len(vec_rslts)

10534

In [13]:
# was latin text aligned to any metric translation sentences?
correct_lat2entext = set()
for tgt_set in list(correct_text2text_.values()):
    for item in tgt_set:
        correct_lat2entext.add(item)

In [14]:
correct_lat2entext

{'metric_translationbook6',
 'prose_translationbook1',
 'prose_translationbook2',
 'prose_translationbook3',
 'prose_translationbook4',
 'prose_translationbook5',
 'prose_translationbook6'}

In [15]:
correct_metric2lat = []
for keys, values in correct_text2text_.items():
    for section_set in values:
        if section_set.startswith('metric_translation'):
            correct_metric2lat.append((keys, values))

In [16]:
correct_metric2lat

[('([2423], [12667, 12668, 12669, 12670, 12671, 12672, 12673])',
  {'metric_translationbook6'}),
 ('([2424], [12691, 12692, 12693, 12694, 12695, 12696, 12697])',
  {'metric_translationbook6'})]

In [17]:
overlaps_

[([0], [421, 422, 423, 424, 425, 426]),
 ([1], [465, 466, 467, 468, 469, 470, 471]),
 ([6], [477, 478, 479, 480, 481, 482, 483]),
 ([9], [512, 513, 514, 515, 516, 517]),
 ([10], [541, 542, 543, 544, 545, 546]),
 ([15], [583, 584, 585, 586, 587, 588, 589]),
 ([16], [592, 593, 594, 595, 596]),
 ([17], [621, 622]),
 ([23], [631, 632, 633, 634, 635]),
 ([25], [644, 645, 646, 647, 648, 649, 650]),
 ([31], [658, 659, 660, 661, 662, 663, 664]),
 ([34], [673, 674, 675, 676, 677, 678, 679]),
 ([36], [681, 682, 683, 684, 685, 686, 687]),
 ([41], [731, 732, 733, 734, 735, 736]),
 ([48], [746, 747, 748, 749, 750]),
 ([51], [756, 757, 758]),
 ([56], [763, 764, 765, 766, 767, 768, 769]),
 ([58], [777, 778, 779, 780]),
 ([64], [786, 787, 788, 789, 790, 791, 792]),
 ([67], [803, 804]),
 ([76], [831, 832, 833, 834, 835, 836, 837]),
 ([79], [842, 843, 844, 845, 846, 847, 848]),
 ([81], [855, 856, 857, 858, 859, 860, 861]),
 ([89], [875, 876, 877, 878]),
 ([90], [883, 884, 885]),
 ([101], [918, 919]),
 (

In [18]:
errors_

[{'alignment': ([13], [555]),
  'alignmnent_idx': 526,
  'src_chapters': {'book1'},
  'tgt_chapters': {'prose_translationbook1note'}},
 {'alignment': ([14], [569, 570, 571, 572, 573, 574, 575]),
  'alignmnent_idx': 540,
  'src_chapters': {'book1'},
  'tgt_chapters': {'prose_translationbook1note'}},
 {'alignment': ([24], [642, 643]),
  'alignmnent_idx': 589,
  'src_chapters': {'book1'},
  'tgt_chapters': {'prose_translationbook1note'}},
 {'alignment': ([32], [667, 668, 669, 670, 671]),
  'alignmnent_idx': 599,
  'src_chapters': {'book1'},
  'tgt_chapters': {'prose_translationbook1note'}},
 {'alignment': ([33], [672]),
  'alignmnent_idx': 600,
  'src_chapters': {'book1'},
  'tgt_chapters': {'prose_translationbook1note'}},
 {'alignment': ([40], [721, 722, 723, 724, 725, 726, 727]),
  'alignmnent_idx': 635,
  'src_chapters': {'book1'},
  'tgt_chapters': {'prose_translationbook1note'}},
 {'alignment': ([49], [751, 752, 753, 754]),
  'alignmnent_idx': 647,
  'src_chapters': {'book1'},
  'tgt

# Analyze vecalign results: by English sentences

In [79]:
# get dict of tgt sentences' alignments to src sents
en1893_sent2lat_sent_aligns = build_tgt_2_src_dict(vec_rslts)

In [80]:
# test
keys = list(en1893_sent2lat_sent_aligns.keys())
keys = sorted(keys)
keys == [x for x in range(0, 13966)]

True

In [81]:
extraneous_sections

['forewordbook0note',
 'forewordbook0title',
 'index',
 'metric_translationbook0title',
 'metric_translationbook1note',
 'metric_translationbook1title',
 'metric_translationbook2note',
 'metric_translationbook2title',
 'metric_translationbook3note',
 'metric_translationbook3title',
 'metric_translationbook4note',
 'metric_translationbook4title',
 'metric_translationbook5note',
 'metric_translationbook5title',
 'metric_translationbook6note',
 'metric_translationbook6title',
 'prose_translationbook0title',
 'prose_translationbook1commentary',
 'prose_translationbook1note',
 'prose_translationbook1title',
 'prose_translationbook2commentary',
 'prose_translationbook2note',
 'prose_translationbook2title',
 'prose_translationbook3commentary',
 'prose_translationbook3note',
 'prose_translationbook3title',
 'prose_translationbook4commentary',
 'prose_translationbook4note',
 'prose_translationbook4title',
 'prose_translationbook5commentary',
 'prose_translationbook5note',
 'prose_translationboo

In [82]:
"forewordbook0".endswith("0")

True

In [103]:
def score_tgt_sents(tgt2src_sent_aligns_dict, tgt_sent2section_name_dict,
                   src_sent2section_name_dict, extraneous_chapter_names):
    extraneous2null_tpstrict = 0
    extraneous2null_tplax = 0 # at least one overlap
    extraneous2text = 0 # no overlap

    text2text_tpstrict = 0
    text2text_tplax = 0
    text2text_incorrect = 0
    text2text_incorrect_lst = []

    text2null_incorrect = 0
    text2null_lst = []

    for tgt_sent_idx in tgt2src_sent_aligns_dict.keys():
        # get src sentences aligned to it (returns a set)
        src_aligned_sents = tgt2src_sent_aligns_dict[tgt_sent_idx]
        # print(f"src aligned sents is {src_aligned_sents}")
        
        # TODO: necessary? to skip null-null alignments ("null" will not appear as key in dict)
        if str(tgt_sent_idx) in tgt_sent2section_name_dict.keys():
            # get tgt sent chapter (keys are str)
            tgt_sent_chapter = tgt_sent2section_name_dict[str(tgt_sent_idx)]
            # print(f"tgt chapter is {tgt_sent_chapter}")
            # convert to list in case of multiple chapters per sent
            if isinstance(tgt_sent_chapter, str):
                tgt_sent_chapter = [tgt_sent_chapter]
                # print(tgt_sent_chapter)
            
            tgt_chapter_counter = 0
            for tgt_chapter in tgt_sent_chapter:
                if tgt_chapter in extraneous_chapter_names:
                    # print(tgt_sent_idx)
                    # print(tgt_chapter)
                    # get num of tgt to null alignments
                    extraneous2null_counter = 0
                    for item in src_aligned_sents:
                        if item == "null":
                            extraneous2null_counter += 1
                    # compare to number of src sents in alignmnent
                    if extraneous2null_counter == len(src_aligned_sents):
                        # then all src aligned sents are null
                        extraneous2null_tpstrict += 1
                    elif extraneous2null_counter > 0:
                        # then at least one src sent is null (also captures tpstrict)
                        extraneous2null_tplax += 1
                    else:
                        # no src sents are null
                        extraneous2text += 1
                
                #### TODO: ADAPT TO ACCOUNT FOR DIFFERENT FORMATTING ("BOOK 2" for LATIN) ####
                else: # compare src and tgt chapters
                    src_aligned_chapters = set()
                    src_text2text_correct_counter = 0
                    src_text2text_incorrect_counter = 0

                    for item in src_aligned_sents:
                        if item == "null":
                            text2null_incorrect += 1
                            text2null_lst.append(tgt_sent_idx)
                        else:
                            # get chapters of src sent (keys are str)
                            if isinstance(src_sent2section_name_dict[str(item)], list):
                                for section_name in src_sent2section_name_dict[str(item)]:
                                    src_aligned_chapters.add(section_name)
                            else:
                                src_aligned_chapters.add(src_sent2section_name_dict[str(item)])

                    # print(f"src chapters are {src_aligned_chapters}")
                    #### TODO: IS THIS WRONG? ############
                    tgt_chaps_compare = tgt_chapter[-5:]
                    for item in src_aligned_chapters:
                        if tgt_chaps_compare == item:
                            src_text2text_correct_counter += 1
                        else:
                            src_text2text_incorrect_counter += 1

                    if src_text2text_correct_counter == len(src_aligned_sents):
                        text2text_tpstrict += 1
                    elif src_text2text_correct_counter > 0:
                        text2text_tplax += 1
                    else:
                        text2text_incorrect += 1
                        text2text_incorrect_lst.append(tgt_sent_idx)

                tgt_chapter_counter += 1
            if tgt_chapter_counter > 1:
                print(tgt_sent_idx)

    # remove text2null from text2text_incorrect_lst
    text2null_lst = set(text2null_lst)
    text2text_incorrect_lst = set(text2text_incorrect_lst)
    text2text_incorrect_lst -= text2null_lst
    # update num of text2text_incorrect
    text2text_incorrect -= text2null_incorrect
    
    results = [extraneous2null_tpstrict, extraneous2null_tplax, extraneous2text,
               text2text_tpstrict, text2text_tplax, 
               text2text_incorrect, text2text_incorrect_lst,
               text2null_incorrect, text2null_lst]
    
    return results

In [127]:
def score_tgt_sents(tgt2src_sent_aligns_dict, tgt_sent2section_name_dict,
                   src_sent2section_name_dict, extraneous_chapter_names):
    extraneous2null_tpstrict = 0
    extraneous2null_tplax = 0 # at least one overlap
    extraneous2text = 0 # no overlap

    text2text_tpstrict = 0
    text2text_tplax = 0
    text2text_incorrect = 0
    text2text_incorrect_lst = []

    text2null_incorrect = 0
    text2null_lst = []

    for tgt_sent_idx in tgt2src_sent_aligns_dict.keys():
        sent_extraneous2null_tpstrict = 0
        sent_extraneous2null_tplax = 0
        sent_extraneous2text = 0
        sent_text2text_tpstrict = 0
        sent_text2text_tplax = 0
        sent_text2text_incorrect = 0
        
        # get src sentences aligned to it (returns a set)
        src_aligned_sents = tgt2src_sent_aligns_dict[tgt_sent_idx]
        # print(f"src aligned sents is {src_aligned_sents}")
        
        # TODO: necessary? to skip null-null alignments ("null" will not appear as key in dict)
        if str(tgt_sent_idx) in tgt_sent2section_name_dict.keys():
            # get tgt sent chapter (keys are str)
            tgt_sent_chapter = tgt_sent2section_name_dict[str(tgt_sent_idx)]
            # print(f"tgt chapter is {tgt_sent_chapter}")
            # convert to list in case of multiple chapters per sent
            if isinstance(tgt_sent_chapter, str):
                tgt_sent_chapter = [tgt_sent_chapter]
                # print(tgt_sent_chapter)
            
            tgt_chapter_counter = 0
            for tgt_chapter in tgt_sent_chapter:
                if tgt_chapter in extraneous_chapter_names:
                    # print(tgt_sent_idx)
                    # print(tgt_chapter)
                    # get num of tgt to null alignments
                    extraneous2null_counter = 0
                    for item in src_aligned_sents:
                        if item == "null":
                            extraneous2null_counter += 1
                    # compare to number of src sents in alignmnent
                    if extraneous2null_counter == len(src_aligned_sents):
                        # then all src aligned sents are null
                        sent_extraneous2null_tpstrict += 1
                    elif extraneous2null_counter > 0:
                        # then at least one src sent is null (also captures tpstrict)
                        sent_extraneous2null_tplax += 1
                    else:
                        # no src sents are null
                        sent_extraneous2text += 1
                
                #### TODO: ADAPT TO ACCOUNT FOR DIFFERENT FORMATTING ("BOOK 2" for LATIN) ####
                else: # compare src and tgt chapters
                    src_aligned_chapters = set()
                    src_text2text_correct_counter = 0
                    src_text2text_incorrect_counter = 0

                    for item in src_aligned_sents:
                        if item == "null":
                            text2null_incorrect += 1
                            text2null_lst.append(tgt_sent_idx)
                        else:
                            # get chapters of src sent (keys are str)
                            if isinstance(src_sent2section_name_dict[str(item)], list):
                                for section_name in src_sent2section_name_dict[str(item)]:
                                    src_aligned_chapters.add(section_name)
                            else:
                                src_aligned_chapters.add(src_sent2section_name_dict[str(item)])

                    # print(f"src chapters are {src_aligned_chapters}")
                    #### TODO: IS THIS WRONG? ############
                    tgt_chaps_compare = tgt_chapter[-5:]
                    for item in src_aligned_chapters:
                        if tgt_chaps_compare == item:
                            src_text2text_correct_counter += 1
                        else:
                            src_text2text_incorrect_counter += 1

                    if src_text2text_correct_counter == len(src_aligned_sents):
                        sent_text2text_tpstrict += 1
                    elif src_text2text_correct_counter > 0:
                        sent_text2text_tplax += 1
                    else:
                        sent_text2text_incorrect += 1
                        text2text_incorrect_lst.append(tgt_sent_idx)

                tgt_chapter_counter += 1
            
        if tgt_chapter_counter > 1:
            print(tgt_sent_idx)
            if sent_extraneous2null_tpstrict == len(tgt_sent_chapter):
                extraneous2null_tpstrict += 1
            if (sent_extraneous2null_tplax > 0 & sent_extraneous2null_tplax < len(tgt_sent_chapter)):
                extraneous2null_tplax += 1
            if sent_extraneous2text > 0:
                extraneous2text += 1
            if sent_text2text_tpstrict > 0:
                text2text_tpstrict += 1
            if sent_text2text_tplax > 0:
                text2text_tplax += 1
            if sent_text2text_incorrect > 0:
                text2text_incorrect += 1
                
    # remove text2null from text2text_incorrect_lst
    text2null_lst = set(text2null_lst)
    text2text_incorrect_lst = set(text2text_incorrect_lst)
    text2text_incorrect_lst -= text2null_lst
    # update num of text2text_incorrect
    text2text_incorrect -= text2null_incorrect
    
    results = [extraneous2null_tpstrict, extraneous2null_tplax, extraneous2text,
               text2text_tpstrict, text2text_tplax, 
               text2text_incorrect, text2text_incorrect_lst,
               text2null_incorrect, text2null_lst]
    
    return results

In [128]:
rslts_en1893_sents = score_tgt_sents(en1893_sent2lat_sent_aligns, en1893_sent2section_name,
                   lat_sent2book_name, extraneous_sections)

17
18
19
20
21
22
37
38
55
56
57
58
59
69
70
71
88
90
92
93
94
95
96
108
109
110
111
112
113
114
115
116
117
118
119
120
130
132
133
134
135
136
137
138
139
158
159
160
161
185
186
187
188
189
190
191
192
193
215
216
217
218
237
239
240
241
265
266
267
287
288
289
290
293
294
424
478
485
497
509
514
516
532
551
559
593
619
622
631
637
638
659
673
675
681
684
698
704
705
735
748
765
780
790
791
815
846
858
860
876
877
903
911
919
935
938
941
958
961
962
972
1009
1017
1033
1049
1072
1088
1101
1109
1142
1185
1188
1189
1208
1215
1216
1234
1246
1266
1277
1291
1295
1320
1341
1349
1362
1387
1397
1409
1436
1446
1456
1474
1486
1496
1508
1526
1530
1537
1558
1566
1584
1605
1612
1622
1626
1650
1667
1702
1711
1724
1726
1727
1740
1748
1759
1760
1769
1806
1812
1816
1829
1845
1847
1853
1862
1866
1889
1916
1919
1942
1962
1964
1971
1993
2012
2029
2047
2059
2061
2073
2081
2100
2101
2122
2129
2146
2162
2166
2168
2180
2188
2197
2209
2213
2235
2240
2243
2274
2277
2279
2290
2332
2343
2366
2370
2385
2389
2413

In [133]:
en1893_sent2section_name

{'0': 'forewordbook0title',
 '1': 'forewordbook0title',
 '2': 'forewordbook0title',
 '3': 'forewordbook0title',
 '4': 'forewordbook0title',
 '5': 'forewordbook0title',
 '6': 'forewordbook0title',
 '7': 'forewordbook0title',
 '8': 'forewordbook0title',
 '9': 'forewordbook0title',
 '10': 'forewordbook0title',
 '11': 'forewordbook0title',
 '12': 'forewordbook0title',
 '13': 'forewordbook0title',
 '14': 'forewordbook0title',
 '15': 'forewordbook0title',
 '16': 'forewordbook0title',
 '17': ['forewordbook0title', 'forewordbook0note'],
 '18': ['forewordbook0note', 'forewordbook0note'],
 '19': ['forewordbook0note', 'forewordbook0note'],
 '20': ['forewordbook0note', 'forewordbook0note'],
 '21': ['forewordbook0note', 'forewordbook0note'],
 '22': ['forewordbook0note', 'forewordbook0'],
 '23': 'forewordbook0',
 '24': 'forewordbook0',
 '25': 'forewordbook0',
 '26': 'forewordbook0',
 '27': 'forewordbook0',
 '28': 'forewordbook0',
 '29': 'forewordbook0',
 '30': 'forewordbook0',
 '31': 'forewordbook0'

In [130]:
print(rslts_en1893_sents[0])
print(rslts_en1893_sents[1])
print(rslts_en1893_sents[2])
print(rslts_en1893_sents[3])
print(rslts_en1893_sents[4])
print(rslts_en1893_sents[5])
print(rslts_en1893_sents[7])

176
0
428
336
0
-4612
4945


In [131]:
sum(rslts_en1893_sents[0:5])+rslts_en1893_sents[7]

5885

In [121]:
rslts_en1893_sents[5] + 1

-4611