# It's a Machine and Natural Language Tagger

In [1]:
from src.IaMaN.base import LM
from src.utils.data import load_ud
from src.utils.munge import stick_spaces
from pprint import pprint as pprint
from collections import defaultdict
from collections import Counter
from tqdm import tqdm
import numpy as np
import os, re

seed = 691; max_char = 200_000_000
m = 10; space = True; fine_tune = False; num_posttrain = 1000; noise = 0.001
positional = 'dependent'; positionally_encode = True; bits = 50; btype = 'all'
ms_init = 'waiting_time'; update = {'representation', 'contexts', 'tokenizer'}
runners = 10; gpu = False; tokenizer = 'hr-bpe'; decode_method = 'argmax'

print("Loading post-training data...")
posttrain_path = '/data/newstweet/week_2019-40_article_texts/'
total_posttrain = len([posttrain_file for posttrain_file in os.listdir(posttrain_path) if re.search("^\d+.txt$", posttrain_file)])
all_posttrain_files = [posttrain_file for posttrain_file in os.listdir(posttrain_path) if re.search("^\d+.txt$", posttrain_file)]
if num_posttrain:
    np.random.seed(seed)
    posttrain_files = np.random.choice(all_posttrain_files, size=num_posttrain, replace=False)
else:
    posttrain_files = np.array([])
ptdocs = [[open(posttrain_path+posttrain_file).read()] for posttrain_file in tqdm(posttrain_files)]
print("Loading gold-tagged UDs data...")
load_set = "GUM"
all_docs = load_ud("English", num_articles = 0, seed = seed, load_set = load_set, rebuild = True, space = space)
test_docs = [doc for doc in all_docs if 'test' in doc['id'] and len(doc['text']) <= max_char]# [:2]
train_docs = [doc for doc in all_docs if 'test' not in doc['id'] and len(doc['text']) <= max_char]# [:4]
nsamp = len(test_docs)
print('Avail. post-train, total post-train, Avail. gold, total gold-train, total test-gold: ', 
      total_posttrain, len(ptdocs), len(all_docs), len(train_docs), len(test_docs))

Loading post-training data...


100%|██████████| 1000/1000 [00:02<00:00, 473.82it/s]


Loading gold-tagged UDs data...
Avail. post-train, total post-train, Avail. gold, total gold-train, total test-gold:  14198 1000 150 132 18


In [2]:
docs = [["".join([row[1] for row in s]) for s in d['conllu']] for d in train_docs]
tdocs = [["".join([row[1] for row in s]) for s in d['conllu']] for d in test_docs]
covering = [[[row[1] for row in s] for s in d['conllu']] for d in train_docs]
tcovering = [[[row[1] for row in s] for s in d['conllu']] for d in test_docs]
if not space:
    for d_i, d in enumerate(covering):
        for s_i, s in enumerate(d):
            covering[d_i][s_i] = stick_spaces(s)
    for d_i, d in enumerate(tcovering):
        for s_i, s in enumerate(d):
            tcovering[d_i][s_i] = stick_spaces(s)
covering_vocab = set([t for d in covering for s in d for t in s])

# 'lem': [[row[2] for row in s] for s in d['conllu']], # note: for speed, remove lemma layer
train_layers = {d_i: {'sty': [[d['s_type'][s_i] for row in s] for s_i, s in enumerate(d['conllu'])], 
                      'pos': [[row[3] for row in s] for s in d['conllu']], 
                      'sup': [[(str(int(row[6]) - int(row[0])) if int(row[6]) else row[6]) for row in s] for s in d['conllu']], 
                      'dep': [[row[7] for row in s] for s in d['conllu']]}
                for d_i, d in enumerate(train_docs)}
# 'lem': [[row[2] for row in s] for s in d['conllu']], # note: for speed, remove lemma layer
test_layers = {d_i: {'sty': [[d['s_type'][s_i] for row in s] for s_i, s in enumerate(d['conllu'])], 
                     'pos': [[row[3] for row in s] for s in d['conllu']], 
                     'sup': [[(str(int(row[6]) - int(row[0])) if int(row[6]) else row[6]) for row in s] for s in d['conllu']], 
                     'dep': [[row[7] for row in s] for s in d['conllu']]}
               for d_i, d in enumerate(test_docs)}

model = LM(m = m, tokenizer = tokenizer, noise = noise, seed = seed, space = space, positional = positional,
           positionally_encode = positionally_encode, runners = runners, gpu = gpu, bits = bits, 
           btype = btype, ms_init = ms_init)
docs_name = f'{load_set}-{len(train_docs)}'
ptdocs_name = f'newstweet-{num_posttrain}'
data_streams = model.fit(docs, docs_name, covering = covering, all_layers = train_layers, fine_tune = fine_tune)

Training tokenizer


Initializing: 100%|██████████| 6503/6503 [00:01<00:00, 4462.22it/s]
Fitting:  20%|██        | 20/100 [00:43<02:53,  2.17s/it]


Built a vocabulary of 15327 types
Pre-processing data...


Collecting pre-processed data: 100%|██████████| 132/132 [00:00<00:00, 978176.90it/s]
Counting tag-tag transition frequencies: 100%|██████████| 132/132 [00:00<00:00, 224.73it/s]


Counting documents and aggregating counts...


Collecting aggregated counts: 5082533it [03:34, 23669.27it/s] 
Encoding parameters: 100%|██████████| 5082533/5082533 [00:51<00:00, 97986.09it/s] 
Building target vocabularies: 100%|██████████| 23/23 [00:00<00:00, 1020.94it/s]
Stacking output vocabularies for decoders: 100%|██████████| 11/11 [00:00<00:00, 87381.33it/s]
Building dense decoders: 100%|██████████| 23/23 [00:00<00:00, 32.24it/s]
  self._trXs[ltype] = (lambda M: M/M.sum(axis = 1)[:,None])(self._trXs[ltype])
Building transition matrices for tag-sequence decoding: 100%|██████████| 7/7 [00:00<00:00, 3097.39it/s]

Model params, types, encoding size, contexts, vec dim, max sent, and % capacity used: 886810 10568 50 1071 353 258 9.679





In [3]:
model._ms

{'form': 8,
 'bits': 7,
 'sty': 2,
 'pos': 6,
 'sup': 7,
 'dep': 7,
 'nov': 2,
 'iat': 2,
 'bot': 2,
 'eot': 2,
 'eos': 2,
 'eod': 2}

In [4]:
model.interpret([[' Gone again!']], seed = seed)
for doc in model._documents:
    print('opening next doc:')
    for s in doc._sentences:
        print(f'opening next sent: {s._sty}')
        for t in s._tokens:
            print(f'opening next token: {t._form}, {t._lem}, {t._sep}, {t._pos}, {t._sup}, {t._dep}')
            print(["\""+w._form+"\"" for w in t._whatevers])

Pre-processing data...


Collecting pre-processed data: 100%|██████████| 1/1 [00:00<00:00, 20460.02it/s]
Encoding data streams: 100%|██████████| 1/1 [00:00<00:00, 1242.39it/s]
Interpreting documents: 100%|██████████| 1/1 [00:00<00:00, 155.33it/s]


opening next doc:
opening next sent: decl
opening next token:  , None, False, SPACE, 1, space
['" "']
opening next token: Gone, None, False, X, 0, root
['"Gon"', '"e"']
opening next token:  , None, False, SPACE, -1, dep
['" "']
opening next token: again, None, False, PUNCT, -1, punct
['"again"']
opening next token: !, None, True, PUNCT, -1, punct
['"!"']





In [5]:
d_i = 3; s_max = 2
interpret_docs = list([docs[d_i][:s_max]])
print(interpret_docs)
model.interpret(interpret_docs, eval_layers = {0: {tag: train_layers[d_i][tag][:s_max] for tag in train_layers[d_i]}},
                eval_covering = [covering[d_i][:s_max]], seed = seed)

for doc in model._documents:
    print('opening next doc:')
    for s in doc._sentences:
        print(f'opening next sent: {s._sty}')
        for t in s._tokens:
            print(f'opening next token: {t._form}, {t._lem}, {t._sep}, {t._pos}, {t._sup}, {t._dep}')

[[' Emperor Norton ', ' Joshua Abraham Norton (c. 1818 – January 8, 1880), known as Emperor Norton, was a citizen of San Francisco, California, who in 1859 proclaimed himself "Norton I, Emperor of the United States". ']]
Pre-processing data...


Collecting pre-processed data: 100%|██████████| 1/1 [00:00<00:00, 21399.51it/s]
Encoding data streams: 100%|██████████| 1/1 [00:00<00:00, 140.16it/s]
Interpreting documents: 100%|██████████| 1/1 [00:00<00:00,  1.34it/s]

Document 0's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 0's STY accuracy:  0.5
Document 0's Token segmentation performance without space:  {('F', 0.9782608695652174), ('P', 0.9782608695652174), ('R', 0.9782608695652174)}
Document 0's POS accuracy with/out space: 0.7530864197530864 0.5555555555555556
Document 0's SUP:DEP accuracy with/out space:  0.4691358024691358 0.1111111111111111

Overall sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Overall STY accuracy:  0.5
Overall Token segmentation performance without space:  {('F', 0.9782608695652174), ('P', 0.9782608695652174), ('R', 0.9782608695652174)}
Overall POS accuracy with/out space: 0.7530864197530864 0.5555555555555556
Overall SUP:DEP accuracy with/out space:  0.4691358024691358 0.1111111111111111
opening next doc:
opening next sent: decl
opening next token:  , None, False, SPACE, 1, space
opening next token: Emperor, None, False, PROPN, 2, vocative
opening next token:




In [6]:
d_i = 0; s_max = 1
interpret_docs = list([tdocs[d_i][:s_max]])
print(interpret_docs)
model.interpret(interpret_docs, eval_layers = {0: {tag: test_layers[d_i][tag][:s_max] for tag in test_layers[d_i]}},
                eval_covering = [tcovering[d_i][:s_max]], seed = seed)

for doc in model._documents:
    print('opening next doc:')
    for s in doc._sentences:
        print(f'opening next sent: {s._sty}')
        for t in s._tokens:
            print(f'opening next token: {t._form}, {t._lem}, {t._sep}, {t._pos}, {t._sup}, {t._dep}')

[[' The prevalence of discrimination across racial groups in contemporary America: ']]
Pre-processing data...


Collecting pre-processed data: 100%|██████████| 1/1 [00:00<00:00, 22192.08it/s]
Encoding data streams: 100%|██████████| 1/1 [00:00<00:00, 350.02it/s]
Interpreting documents: 100%|██████████| 1/1 [00:00<00:00, 15.07it/s]

Document 0's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 0's STY accuracy:  0.0
Document 0's Token segmentation performance without space:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 0's POS accuracy with/out space: 0.6818181818181818 0.36363636363636365
Document 0's SUP:DEP accuracy with/out space:  0.5454545454545454 0.2727272727272727

Overall sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Overall STY accuracy:  0.0
Overall Token segmentation performance without space:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Overall POS accuracy with/out space: 0.6818181818181818 0.36363636363636365
Overall SUP:DEP accuracy with/out space:  0.5454545454545454 0.2727272727272727
opening next doc:
opening next sent: decl
opening next token:  , None, False, SPACE, 1, space
opening next token: The, None, False, PROPN, 2, vocative
opening next token:  , None, False, SPACE, 1, space
opening next token: prevalence, None, False, PROPN, 0, root




In [7]:
d_i = 1; s_max = 2
interpret_docs = list([tdocs[d_i][:s_max]])
print(interpret_docs)
model.interpret(interpret_docs, eval_layers = {0: {tag: test_layers[d_i][tag][:s_max] for tag in test_layers[d_i]}},
                eval_covering = [tcovering[d_i][:s_max]], seed = seed)
for doc in model._documents:
    print('opening next doc:')
    for s in doc._sentences:
        print(f'opening next sent: {s._sty}')
        for t in s._tokens:
            print(f'opening next token: {t._form}, {t._lem}, {t._sep}, {t._pos}, {t._sup}, {t._dep}')

[[' 2. GUJJOLAAY EEGIMAA, ITS SPEAKERS AND THEIR NEIGHBOURS ', ' This section briefly presents the Gújjolaay Eegimaa (Eegimaa for short; Ethnologue code: ISO 639-3: bqj), its speakers and its varieties. ']]
Pre-processing data...


Collecting pre-processed data: 100%|██████████| 1/1 [00:00<00:00, 21845.33it/s]
Encoding data streams: 100%|██████████| 1/1 [00:00<00:00, 77.72it/s]
Interpreting documents: 100%|██████████| 1/1 [00:00<00:00,  1.31it/s]

Document 0's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 0's STY accuracy:  0.5
Document 0's Token segmentation performance without space:  {('F', 0.6015037593984962), ('P', 0.5128205128205128), ('R', 0.7272727272727273)}
Document 0's POS accuracy with/out space: 0.6515151515151515 0.3611111111111111
Document 0's SUP:DEP accuracy with/out space:  0.3787878787878788 0.05555555555555555

Overall sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Overall STY accuracy:  0.5
Overall Token segmentation performance without space:  {('F', 0.6015037593984962), ('P', 0.5128205128205128), ('R', 0.7272727272727273)}
Overall POS accuracy with/out space: 0.6515151515151515 0.3611111111111111
Overall SUP:DEP accuracy with/out space:  0.3787878787878788 0.05555555555555555
opening next doc:
opening next sent: decl
opening next token:  , None, False, SPACE, 1, space
opening next token: 2, None, False, PROPN, 5, vocative
opening next token: ., 




In [8]:
d_i = 1; s_max = 2
interpret_docs = list([tdocs[d_i][:s_max]])
print(interpret_docs)
model.interpret(interpret_docs, eval_layers = {0: {tag: test_layers[d_i][tag][:s_max] for tag in test_layers[d_i]}},
                covering = [tcovering[d_i][:s_max]], seed = seed)
for doc in model._documents:
    print('opening next doc:')
    for s in doc._sentences:
        print(f'opening next sent: {s._sty}')
        for t in s._tokens:
            print(f'opening next token: {t._form}, {t._lem}, {t._sep}, {t._pos}, {t._sup}, {t._dep}')

[[' 2. GUJJOLAAY EEGIMAA, ITS SPEAKERS AND THEIR NEIGHBOURS ', ' This section briefly presents the Gújjolaay Eegimaa (Eegimaa for short; Ethnologue code: ISO 639-3: bqj), its speakers and its varieties. ']]
Pre-processing data...


Collecting pre-processed data: 100%|██████████| 1/1 [00:00<00:00, 21620.12it/s]
Encoding data streams: 100%|██████████| 1/1 [00:00<00:00, 104.13it/s]
Interpreting documents: 100%|██████████| 1/1 [00:00<00:00,  2.79it/s]

Document 0's STY accuracy:  0.5
Document 0's POS accuracy with/out space: 0.7121212121212122 0.4722222222222222
Document 0's SUP:DEP accuracy with/out space:  0.3484848484848485 0.0

Overall STY accuracy:  0.5
Overall POS accuracy with/out space: 0.7121212121212122 0.4722222222222222
Overall SUP:DEP accuracy with/out space:  0.3484848484848485 0.0
opening next doc:
opening next sent: decl
opening next token:  , None, False, SPACE, 1, space
opening next token: 2., None, False, PROPN, 5, vocative
opening next token:  , None, False, SPACE, 1, space
opening next token: GUJJOLAAY, None, False, PROPN, 0, root
opening next token:  , None, False, SPACE, -1, space
opening next token: EEGIMAA, None, False, PROPN, -2, punct
opening next token: ,, None, False, PUNCT, -1, punct
opening next token:  , None, False, SPACE, 1, space
opening next token: ITS, None, False, PUNCT, -3, punct
opening next token:  , None, False, SPACE, -1, space
opening next token: SPEAKERS, None, False, PROPN, -1, punct
open




In [9]:
model.interpret(tdocs, eval_layers = test_layers, eval_covering = tcovering, seed = seed, verbose_result = False, 
                decode_method = decode_method)

Pre-processing data...


Collecting pre-processed data: 100%|██████████| 18/18 [00:00<00:00, 120602.99it/s]
Encoding data streams: 100%|██████████| 18/18 [00:02<00:00,  6.85it/s]
Interpreting documents:   6%|▌         | 1/18 [00:15<04:15, 15.01s/it]

Document 0's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 0's STY accuracy:  0.6851851851851852
Document 0's Token segmentation performance without space:  {('F', 0.9181619256017505), ('P', 0.9161572052401746), ('R', 0.9201754385964912)}
Document 0's POS accuracy with/out space: 0.7676056338028169 0.560418648905804
Document 0's SUP:DEP accuracy with/out space:  0.556841046277666 0.1912464319695528



Interpreting documents:  11%|█         | 2/18 [00:35<04:52, 18.28s/it]

Document 1's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 1's STY accuracy:  0.8611111111111112
Document 1's Token segmentation performance without space:  {('F', 0.769850866586054), ('P', 0.7095096582466568), ('R', 0.8414096916299559)}
Document 1's POS accuracy with/out space: 0.7576112412177985 0.5394883203559511
Document 1's SUP:DEP accuracy with/out space:  0.5327868852459017 0.14015572858731926



Interpreting documents:  17%|█▋        | 3/18 [00:47<03:50, 15.39s/it]

Document 2's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 2's STY accuracy:  0.9310344827586207
Document 2's Token segmentation performance without space:  {('F', 0.8203939745075318), ('P', 0.7823204419889502), ('R', 0.8623629719853837)}
Document 2's POS accuracy with/out space: 0.7596958174904943 0.5459770114942529
Document 2's SUP:DEP accuracy with/out space:  0.5498098859315589 0.1781609195402299



Interpreting documents:  22%|██▏       | 4/18 [01:04<03:42, 15.92s/it]

Document 3's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 3's STY accuracy:  0.9
Document 3's Token segmentation performance without space:  {('F', 0.8661347517730497), ('P', 0.8466204506065858), ('R', 0.8865698729582577)}
Document 3's POS accuracy with/out space: 0.7895299145299145 0.5991861648016277
Document 3's SUP:DEP accuracy with/out space:  0.5715811965811965 0.20040691759918616



Interpreting documents:  28%|██▊       | 5/18 [01:10<02:43, 12.57s/it]

Document 4's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 4's STY accuracy:  0.46078431372549017
Document 4's Token segmentation performance without space:  {('F', 0.9085992428339643), ('P', 0.9130434782608695), ('R', 0.9041980624327234)}
Document 4's POS accuracy with/out space: 0.7869458128078818 0.5988372093023255
Document 4's SUP:DEP accuracy with/out space:  0.5899014778325123 0.2453488372093023



Interpreting documents:  33%|███▎      | 6/18 [01:22<02:27, 12.28s/it]

Document 5's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 5's STY accuracy:  0.7894736842105263
Document 5's Token segmentation performance without space:  {('F', 0.8612068965517242), ('P', 0.84375), ('R', 0.8794014084507042)}
Document 5's POS accuracy with/out space: 0.7538298996302166 0.5404339250493096
Document 5's SUP:DEP accuracy with/out space:  0.5499207606973059 0.1932938856015779



Interpreting documents:  39%|███▉      | 7/18 [01:37<02:25, 13.23s/it]

Document 6's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 6's STY accuracy:  0.9148936170212766
Document 6's Token segmentation performance without space:  {('F', 0.8839015968925336), ('P', 0.8767123287671232), ('R', 0.8912097476066144)}
Document 6's POS accuracy with/out space: 0.7520576131687243 0.5251231527093596
Document 6's SUP:DEP accuracy with/out space:  0.5457818930041153 0.15467980295566502



Interpreting documents:  44%|████▍     | 8/18 [01:45<01:53, 11.39s/it]

Document 7's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 7's STY accuracy:  0.5344827586206896
Document 7's Token segmentation performance without space:  {('F', 0.8993885491939967), ('P', 0.88998899889989), ('R', 0.9089887640449438)}
Document 7's POS accuracy with/out space: 0.7919038583175205 0.5923172242874845
Document 7's SUP:DEP accuracy with/out space:  0.58191018342821 0.20817843866171004



Interpreting documents:  50%|█████     | 9/18 [01:58<01:48, 12.05s/it]

Document 8's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 8's STY accuracy:  0.5952380952380952
Document 8's Token segmentation performance without space:  {('F', 0.856425702811245), ('P', 0.8305744888023369), ('R', 0.883937823834197)}
Document 8's POS accuracy with/out space: 0.7552404438964242 0.5307328605200946
Document 8's SUP:DEP accuracy with/out space:  0.5443896424167695 0.15011820330969267



Interpreting documents:  56%|█████▌    | 10/18 [02:19<01:58, 14.87s/it]

Document 9's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 9's STY accuracy:  0.72
Document 9's Token segmentation performance without space:  {('F', 0.8424611223799865), ('P', 0.8218997361477572), ('R', 0.8640776699029126)}
Document 9's POS accuracy with/out space: 0.7712743877127439 0.5647709320695102
Document 9's SUP:DEP accuracy with/out space:  0.5396430053964301 0.1524486571879937



Interpreting documents:  61%|██████    | 11/18 [02:28<01:31, 13.01s/it]

Document 10's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 10's STY accuracy:  0.5428571428571428
Document 10's Token segmentation performance without space:  {('F', 0.8525951557093426), ('P', 0.8380952380952381), ('R', 0.8676056338028169)}
Document 10's POS accuracy with/out space: 0.7667785234899329 0.5552
Document 10's SUP:DEP accuracy with/out space:  0.5444630872483222 0.1696



Interpreting documents:  67%|██████▋   | 12/18 [02:42<01:18, 13.13s/it]

Document 11's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 11's STY accuracy:  0.8181818181818182
Document 11's Token segmentation performance without space:  {('F', 0.878585086042065), ('P', 0.8556797020484171), ('R', 0.9027504911591355)}
Document 11's POS accuracy with/out space: 0.7810055865921788 0.5726775956284152
Document 11's SUP:DEP accuracy with/out space:  0.5810055865921788 0.1901639344262295



Interpreting documents:  72%|███████▏  | 13/18 [02:55<01:05, 13.07s/it]

Document 12's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 12's STY accuracy:  0.6545454545454545
Document 12's Token segmentation performance without space:  {('F', 0.8613333333333334), ('P', 0.8426086956521739), ('R', 0.8809090909090909)}
Document 12's POS accuracy with/out space: 0.7636856368563686 0.5544147843942505
Document 12's SUP:DEP accuracy with/out space:  0.5344173441734418 0.14989733059548255



Interpreting documents:  78%|███████▊  | 14/18 [03:06<00:49, 12.47s/it]

Document 13's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 13's STY accuracy:  0.4418604651162791
Document 13's Token segmentation performance without space:  {('F', 0.9105018106570099), ('P', 0.9081527347781218), ('R', 0.9128630705394191)}
Document 13's POS accuracy with/out space: 0.7620429483459082 0.5444444444444444
Document 13's SUP:DEP accuracy with/out space:  0.5461404526987812 0.1511111111111111



Interpreting documents:  83%|████████▎ | 15/18 [03:27<00:45, 15.27s/it]

Document 14's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 14's STY accuracy:  0.8918918918918919
Document 14's Token segmentation performance without space:  {('F', 0.8788484606157536), ('P', 0.8687747035573122), ('R', 0.88915857605178)}
Document 14's POS accuracy with/out space: 0.7784688995215311 0.577939835916135
Document 14's SUP:DEP accuracy with/out space:  0.5574162679425837 0.1741112123974476



Interpreting documents:  89%|████████▉ | 16/18 [03:36<00:26, 13.12s/it]

Document 15's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 15's STY accuracy:  0.8421052631578947
Document 15's Token segmentation performance without space:  {('F', 0.8513157894736841), ('P', 0.8252551020408163), ('R', 0.8790760869565217)}
Document 15's POS accuracy with/out space: 0.7866449511400652 0.5808
Document 15's SUP:DEP accuracy with/out space:  0.5814332247557004 0.1936



Interpreting documents:  94%|█████████▍| 17/18 [03:43<00:11, 11.49s/it]

Document 16's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 16's STY accuracy:  0.32558139534883723
Document 16's Token segmentation performance without space:  {('F', 0.9072164948453609), ('P', 0.9004092769440655), ('R', 0.9141274238227147)}
Document 16's POS accuracy with/out space: 0.7850911974623315 0.5826893353941267
Document 16's SUP:DEP accuracy with/out space:  0.5860428231562252 0.21792890262751158



Interpreting documents: 100%|██████████| 18/18 [03:53<00:00, 12.95s/it]

Document 17's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 17's STY accuracy:  0.4153846153846154
Document 17's Token segmentation performance without space:  {('F', 0.908675799086758), ('P', 0.898014440433213), ('R', 0.9195933456561922)}
Document 17's POS accuracy with/out space: 0.8134981968057702 0.6365461847389559
Document 17's SUP:DEP accuracy with/out space:  0.5734157650695518 0.19477911646586346

Overall sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Overall STY accuracy:  0.6588366890380313
Overall Token segmentation performance without space:  {('F', 0.8694372340710215), ('P', 0.850879949717159), ('R', 0.8888220167423537)}
Overall POS accuracy with/out space: 0.7733836137433121 0.5667242229896399
Overall SUP:DEP accuracy with/out space:  0.558241474892026 0.17939072520966945





In [10]:
model.interpret(tdocs, eval_layers = test_layers, covering = tcovering, seed = seed, verbose_result = False, 
                decode_method = decode_method)

Pre-processing data...


Collecting pre-processed data: 100%|██████████| 18/18 [00:00<00:00, 285975.27it/s]
Encoding data streams: 100%|██████████| 18/18 [00:02<00:00,  6.21it/s]
Interpreting documents:   6%|▌         | 1/18 [00:13<03:43, 13.16s/it]

Document 0's STY accuracy:  0.6851851851851852
Document 0's POS accuracy with/out space: 0.7917505030181087 0.6060894386298763
Document 0's SUP:DEP accuracy with/out space:  0.5653923541247485 0.20456707897240722



Interpreting documents:  11%|█         | 2/18 [00:25<03:26, 12.88s/it]

Document 1's STY accuracy:  0.8611111111111112
Document 1's POS accuracy with/out space: 0.7933255269320844 0.60734149054505
Document 1's SUP:DEP accuracy with/out space:  0.5374707259953162 0.14015572858731926



Interpreting documents:  17%|█▋        | 3/18 [00:34<02:43, 10.93s/it]

Document 2's STY accuracy:  0.9310344827586207
Document 2's POS accuracy with/out space: 0.7802281368821293 0.5847701149425287
Document 2's SUP:DEP accuracy with/out space:  0.5467680608365019 0.1724137931034483



Interpreting documents:  22%|██▏       | 4/18 [00:48<02:49, 12.10s/it]

Document 3's STY accuracy:  0.9
Document 3's POS accuracy with/out space: 0.8092948717948718 0.6368260427263479
Document 3's SUP:DEP accuracy with/out space:  0.5731837606837606 0.20244150559511698



Interpreting documents:  28%|██▊       | 5/18 [00:54<02:07,  9.83s/it]

Document 4's STY accuracy:  0.46078431372549017
Document 4's POS accuracy with/out space: 0.8183497536945813 0.6581395348837209
Document 4's SUP:DEP accuracy with/out space:  0.6016009852216748 0.26976744186046514



Interpreting documents:  33%|███▎      | 6/18 [01:03<01:57,  9.80s/it]

Document 5's STY accuracy:  0.7894736842105263
Document 5's POS accuracy with/out space: 0.7897517168515584 0.6074950690335306
Document 5's SUP:DEP accuracy with/out space:  0.5567881669307977 0.20512820512820512



Interpreting documents:  39%|███▉      | 7/18 [01:17<01:59, 10.90s/it]

Document 6's STY accuracy:  0.9148936170212766
Document 6's POS accuracy with/out space: 0.7870370370370371 0.5921182266009852
Document 6's SUP:DEP accuracy with/out space:  0.5524691358024691 0.16551724137931034



Interpreting documents:  44%|████▍     | 8/18 [01:23<01:35,  9.50s/it]

Document 7's STY accuracy:  0.5344827586206896
Document 7's POS accuracy with/out space: 0.8121442125237192 0.6319702602230484
Document 7's SUP:DEP accuracy with/out space:  0.592662871600253 0.23172242874845106



Interpreting documents:  50%|█████     | 9/18 [01:34<01:29,  9.90s/it]

Document 8's STY accuracy:  0.5952380952380952
Document 8's POS accuracy with/out space: 0.781750924784217 0.5815602836879432
Document 8's SUP:DEP accuracy with/out space:  0.5530209617755857 0.1690307328605201



Interpreting documents:  56%|█████▌    | 10/18 [01:51<01:37, 12.18s/it]

Document 9's STY accuracy:  0.72
Document 9's POS accuracy with/out space: 0.7995018679950187 0.6184834123222749
Document 9's SUP:DEP accuracy with/out space:  0.5579078455790785 0.18404423380726698



Interpreting documents:  61%|██████    | 11/18 [01:59<01:15, 10.75s/it]

Document 10's STY accuracy:  0.5428571428571428
Document 10's POS accuracy with/out space: 0.7969798657718121 0.6128
Document 10's SUP:DEP accuracy with/out space:  0.5486577181208053 0.1776



Interpreting documents:  67%|██████▋   | 12/18 [02:10<01:05, 10.93s/it]

Document 11's STY accuracy:  0.8181818181818182
Document 11's POS accuracy with/out space: 0.8016759776536313 0.6131147540983607
Document 11's SUP:DEP accuracy with/out space:  0.5865921787709497 0.2010928961748634



Interpreting documents:  72%|███████▏  | 13/18 [02:21<00:55, 11.11s/it]

Document 12's STY accuracy:  0.6545454545454545
Document 12's POS accuracy with/out space: 0.7821138211382114 0.5893223819301848
Document 12's SUP:DEP accuracy with/out space:  0.5506775067750678 0.1735112936344969



Interpreting documents:  78%|███████▊  | 14/18 [02:32<00:43, 10.85s/it]

Document 13's STY accuracy:  0.4418604651162791
Document 13's POS accuracy with/out space: 0.7858386535113174 0.59
Document 13's SUP:DEP accuracy with/out space:  0.5461404526987812 0.15333333333333332



Interpreting documents:  83%|████████▎ | 15/18 [02:50<00:39, 13.04s/it]

Document 14's STY accuracy:  0.8918918918918919
Document 14's POS accuracy with/out space: 0.7995215311004785 0.618049225159526
Document 14's SUP:DEP accuracy with/out space:  0.5511961722488038 0.1649954421148587



Interpreting documents:  89%|████████▉ | 16/18 [02:56<00:22, 11.05s/it]

Document 15's STY accuracy:  0.8421052631578947
Document 15's POS accuracy with/out space: 0.8281758957654723 0.6624
Document 15's SUP:DEP accuracy with/out space:  0.5806188925081434 0.2



Interpreting documents:  94%|█████████▍| 17/18 [03:03<00:09,  9.74s/it]

Document 16's STY accuracy:  0.32558139534883723
Document 16's POS accuracy with/out space: 0.8049167327517843 0.6213292117465224
Document 16's SUP:DEP accuracy with/out space:  0.5828707375099128 0.23183925811437403



Interpreting documents: 100%|██████████| 18/18 [03:11<00:00, 10.66s/it]

Document 17's STY accuracy:  0.4153846153846154
Document 17's POS accuracy with/out space: 0.8444100978876867 0.6967871485943775
Document 17's SUP:DEP accuracy with/out space:  0.5821741370427614 0.20983935742971888

Overall STY accuracy:  0.6588366890380313
Overall POS accuracy with/out space: 0.8001676013665957 0.6179699062654169
Overall SUP:DEP accuracy with/out space:  0.5643009089151034 0.1910458806117415





In [11]:
_ = model.fit(ptdocs, ptdocs_name, update = update, fine_tune = fine_tune, bits = bits)
if fine_tune and ptdocs:
    model.fine_tune(docs, covering = covering, all_layers = train_layers, streams = data_streams)

Training tokenizer


Initializing: 100%|██████████| 7503/7503 [00:09<00:00, 779.00it/s]
Fitting:  14%|█▍        | 14/100 [02:35<15:54, 11.10s/it]


Built a vocabulary of 41538 types
Pre-processing data...


Collecting pre-processed data: 100%|██████████| 1132/1132 [00:00<00:00, 2831217.73it/s]
Counting tag-tag transition frequencies: 100%|██████████| 1132/1132 [00:01<00:00, 676.40it/s]


Counting documents and aggregating counts...


Collecting aggregated counts: 11229394it [10:44, 17426.61it/s]
Encoding parameters: 100%|██████████| 11229394/11229394 [02:09<00:00, 86639.13it/s]
Building target vocabularies: 100%|██████████| 23/23 [00:00<00:00, 930.87it/s]
Stacking output vocabularies for decoders: 100%|██████████| 11/11 [00:00<00:00, 92645.27it/s]
Building dense decoders: 100%|██████████| 23/23 [00:00<00:00, 34.82it/s]
Building transition matrices for tag-sequence decoding: 100%|██████████| 7/7 [00:00<00:00, 3192.01it/s]

Model params, types, encoding size, contexts, vec dim, max sent, and % capacity used: 2100689 10300 50 1071 353 18310 21.048





In [12]:
if ptdocs:
    model.interpret(tdocs, eval_layers = test_layers, eval_covering = tcovering, seed = seed, verbose_result = False, 
                    decode_method = decode_method)

Pre-processing data...


Collecting pre-processed data: 100%|██████████| 18/18 [00:00<00:00, 285975.27it/s]
Encoding data streams: 100%|██████████| 18/18 [00:02<00:00,  7.21it/s]
Interpreting documents:   6%|▌         | 1/18 [00:14<04:02, 14.25s/it]

Document 0's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 0's STY accuracy:  0.6851851851851852
Document 0's Token segmentation performance without space:  {('F', 0.9302325581395349), ('P', 0.9340659340659341), ('R', 0.9264305177111717)}
Document 0's POS accuracy with/out space: 0.7701207243460765 0.5651760228353948
Document 0's SUP:DEP accuracy with/out space:  0.5628772635814889 0.19695528068506185



Interpreting documents:  11%|█         | 2/18 [00:34<04:45, 17.84s/it]

Document 1's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 1's STY accuracy:  0.8611111111111112
Document 1's Token segmentation performance without space:  {('F', 0.7701102490812577), ('P', 0.7165653495440729), ('R', 0.8323036187113857)}
Document 1's POS accuracy with/out space: 0.7464871194379391 0.5183537263626251
Document 1's SUP:DEP accuracy with/out space:  0.5415690866510539 0.15016685205784205



Interpreting documents:  17%|█▋        | 3/18 [00:45<03:42, 14.86s/it]

Document 2's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 2's STY accuracy:  0.9310344827586207
Document 2's Token segmentation performance without space:  {('F', 0.8422315536892621), ('P', 0.8059701492537313), ('R', 0.8819095477386935)}
Document 2's POS accuracy with/out space: 0.75893536121673 0.5445402298850575
Document 2's SUP:DEP accuracy with/out space:  0.5460076045627377 0.16666666666666666



Interpreting documents:  22%|██▏       | 4/18 [01:02<03:35, 15.38s/it]

Document 3's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 3's STY accuracy:  0.9
Document 3's Token segmentation performance without space:  {('F', 0.891818181818182), ('P', 0.8782452999104745), ('R', 0.9058171745152355)}
Document 3's POS accuracy with/out space: 0.7964743589743589 0.612410986775178
Document 3's SUP:DEP accuracy with/out space:  0.5662393162393162 0.19023397761953204



Interpreting documents:  28%|██▊       | 5/18 [01:09<02:41, 12.41s/it]

Document 4's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 4's STY accuracy:  0.46078431372549017
Document 4's Token segmentation performance without space:  {('F', 0.920265780730897), ('P', 0.9305711086226204), ('R', 0.9101861993428259)}
Document 4's POS accuracy with/out space: 0.7752463054187192 0.5755813953488372
Document 4's SUP:DEP accuracy with/out space:  0.5880541871921182 0.23604651162790696



Interpreting documents:  33%|███▎      | 6/18 [01:20<02:23, 11.94s/it]

Document 5's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 5's STY accuracy:  0.7894736842105263
Document 5's Token segmentation performance without space:  {('F', 0.8806437192668753), ('P', 0.8701413427561837), ('R', 0.8914027149321267)}
Document 5's POS accuracy with/out space: 0.751188589540412 0.5355029585798816
Document 5's SUP:DEP accuracy with/out space:  0.5388272583201268 0.1765285996055227



Interpreting documents:  39%|███▉      | 7/18 [01:35<02:21, 12.86s/it]

Document 6's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 6's STY accuracy:  0.9148936170212766
Document 6's Token segmentation performance without space:  {('F', 0.8912561029738126), ('P', 0.8822495606326889), ('R', 0.9004484304932735)}
Document 6's POS accuracy with/out space: 0.7566872427983539 0.5339901477832513
Document 6's SUP:DEP accuracy with/out space:  0.5498971193415638 0.15566502463054188



Interpreting documents:  44%|████▍     | 8/18 [01:42<01:50, 11.06s/it]

Document 7's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 7's STY accuracy:  0.5344827586206896
Document 7's Token segmentation performance without space:  {('F', 0.9230769230769231), ('P', 0.9130434782608695), ('R', 0.9333333333333333)}
Document 7's POS accuracy with/out space: 0.8070841239721696 0.6220570012391574
Document 7's SUP:DEP accuracy with/out space:  0.58191018342821 0.20446096654275092



Interpreting documents:  50%|█████     | 9/18 [01:54<01:43, 11.45s/it]

Document 8's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 8's STY accuracy:  0.5952380952380952
Document 8's Token segmentation performance without space:  {('F', 0.9017525225703665), ('P', 0.9003181336161188), ('R', 0.9031914893617021)}
Document 8's POS accuracy with/out space: 0.7620221948212084 0.5437352245862884
Document 8's SUP:DEP accuracy with/out space:  0.5487053020961775 0.1453900709219858



Interpreting documents:  56%|█████▌    | 10/18 [02:14<01:51, 13.93s/it]

Document 9's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 9's STY accuracy:  0.72
Document 9's Token segmentation performance without space:  {('F', 0.8847663217980736), ('P', 0.8831908831908832), ('R', 0.8863473909935669)}
Document 9's POS accuracy with/out space: 0.7779161477791615 0.5774091627172195
Document 9's SUP:DEP accuracy with/out space:  0.5425487754254877 0.14691943127962084



Interpreting documents:  61%|██████    | 11/18 [02:22<01:25, 12.21s/it]

Document 10's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 10's STY accuracy:  0.5428571428571428
Document 10's Token segmentation performance without space:  {('F', 0.8845878136200718), ('P', 0.8814285714285715), ('R', 0.8877697841726618)}
Document 10's POS accuracy with/out space: 0.764261744966443 0.5504
Document 10's SUP:DEP accuracy with/out space:  0.5385906040268457 0.1504



Interpreting documents:  67%|██████▋   | 12/18 [02:34<01:13, 12.27s/it]

Document 11's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 11's STY accuracy:  0.8181818181818182
Document 11's Token segmentation performance without space:  {('F', 0.9363636363636363), ('P', 0.9325955734406438), ('R', 0.9401622718052738)}
Document 11's POS accuracy with/out space: 0.7787709497206704 0.5672131147540984
Document 11's SUP:DEP accuracy with/out space:  0.5782122905027933 0.18579234972677597



Interpreting documents:  72%|███████▏  | 13/18 [02:46<01:01, 12.26s/it]

Document 12's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 12's STY accuracy:  0.6545454545454545
Document 12's Token segmentation performance without space:  {('F', 0.8991248272685399), ('P', 0.8945921173235564), ('R', 0.9037037037037037)}
Document 12's POS accuracy with/out space: 0.759349593495935 0.5462012320328542
Document 12's SUP:DEP accuracy with/out space:  0.5425474254742547 0.162217659137577



Interpreting documents:  78%|███████▊  | 14/18 [02:57<00:47, 11.86s/it]

Document 13's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 13's STY accuracy:  0.4418604651162791
Document 13's Token segmentation performance without space:  {('F', 0.9250780437044746), ('P', 0.9299163179916318), ('R', 0.9202898550724637)}
Document 13's POS accuracy with/out space: 0.75507835171213 0.5311111111111111
Document 13's SUP:DEP accuracy with/out space:  0.5449796865931514 0.14777777777777779



Interpreting documents:  83%|████████▎ | 15/18 [03:19<00:44, 14.69s/it]

Document 14's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 14's STY accuracy:  0.8918918918918919
Document 14's Token segmentation performance without space:  {('F', 0.8932515337423313), ('P', 0.8899755501222494), ('R', 0.896551724137931)}
Document 14's POS accuracy with/out space: 0.777511961722488 0.5761166818596172
Document 14's SUP:DEP accuracy with/out space:  0.5502392344497608 0.16043755697356427



Interpreting documents:  89%|████████▉ | 16/18 [03:27<00:25, 12.78s/it]

Document 15's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 15's STY accuracy:  0.8421052631578947
Document 15's Token segmentation performance without space:  {('F', 0.823529411764706), ('P', 0.7952559300873908), ('R', 0.853887399463807)}
Document 15's POS accuracy with/out space: 0.7760586319218241 0.56
Document 15's SUP:DEP accuracy with/out space:  0.5732899022801303 0.1744



Interpreting documents:  94%|█████████▍| 17/18 [03:35<00:11, 11.22s/it]

Document 16's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 16's STY accuracy:  0.32558139534883723
Document 16's Token segmentation performance without space:  {('F', 0.9004871259568545), ('P', 0.8850889192886456), ('R', 0.9164305949008499)}
Document 16's POS accuracy with/out space: 0.7922283901665345 0.5950540958268934
Document 16's SUP:DEP accuracy with/out space:  0.5836637589214909 0.2071097372488408



Interpreting documents: 100%|██████████| 18/18 [03:44<00:00, 12.47s/it]

Document 17's Sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Document 17's STY accuracy:  0.4153846153846154
Document 17's Token segmentation performance without space:  {('F', 0.9170549860205033), ('P', 0.9085872576177285), ('R', 0.9256820319849483)}
Document 17's POS accuracy with/out space: 0.8088614116434827 0.6275100401606426
Document 17's SUP:DEP accuracy with/out space:  0.5692941782586296 0.18373493975903615

Overall sentence segmentation performance:  {('R', 1.0), ('P', 1.0), ('F', 1.0)}
Overall STY accuracy:  0.6588366890380313
Overall Token segmentation performance without space:  {('F', 0.889170780134021), ('P', 0.8779611174644666), ('R', 0.9006703910614525)}
Overall POS accuracy with/out space: 0.7731257654870108 0.5660458806117414
Overall SUP:DEP accuracy with/out space:  0.5574356990910849 0.17365564874198322





In [13]:
if ptdocs:
    model.interpret(tdocs, eval_layers = test_layers, covering = tcovering, seed = seed, verbose_result = False, 
                    decode_method = decode_method)

Pre-processing data...


Collecting pre-processed data: 100%|██████████| 18/18 [00:00<00:00, 140853.49it/s]
Encoding data streams: 100%|██████████| 18/18 [00:02<00:00,  7.34it/s]
Interpreting documents:   6%|▌         | 1/18 [00:12<03:40, 12.98s/it]

Document 0's STY accuracy:  0.6851851851851852
Document 0's POS accuracy with/out space: 0.7852112676056338 0.5937202664129401
Document 0's SUP:DEP accuracy with/out space:  0.5699195171026157 0.2093244529019981



Interpreting documents:  11%|█         | 2/18 [00:25<03:25, 12.82s/it]

Document 1's STY accuracy:  0.8611111111111112
Document 1's POS accuracy with/out space: 0.7792740046838408 0.5806451612903226
Document 1's SUP:DEP accuracy with/out space:  0.5298594847775175 0.12458286985539488



Interpreting documents:  17%|█▋        | 3/18 [00:34<02:45, 11.01s/it]

Document 2's STY accuracy:  0.9310344827586207
Document 2's POS accuracy with/out space: 0.7878326996197719 0.5991379310344828
Document 2's SUP:DEP accuracy with/out space:  0.5475285171102662 0.17385057471264367



Interpreting documents:  22%|██▏       | 4/18 [00:48<02:49, 12.11s/it]

Document 3's STY accuracy:  0.9
Document 3's POS accuracy with/out space: 0.811965811965812 0.641912512716175
Document 3's SUP:DEP accuracy with/out space:  0.5673076923076923 0.19023397761953204



Interpreting documents:  28%|██▊       | 5/18 [00:54<02:07,  9.82s/it]

Document 4's STY accuracy:  0.46078431372549017
Document 4's POS accuracy with/out space: 0.8054187192118226 0.6325581395348837
Document 4's SUP:DEP accuracy with/out space:  0.603448275862069 0.26627906976744187



Interpreting documents:  33%|███▎      | 6/18 [01:03<01:56,  9.74s/it]

Document 5's STY accuracy:  0.7894736842105263
Document 5's POS accuracy with/out space: 0.7765451664025357 0.5828402366863905
Document 5's SUP:DEP accuracy with/out space:  0.5467511885895404 0.1893491124260355



Interpreting documents:  39%|███▉      | 7/18 [01:16<01:58, 10.79s/it]

Document 6's STY accuracy:  0.9148936170212766
Document 6's POS accuracy with/out space: 0.7880658436213992 0.594088669950739
Document 6's SUP:DEP accuracy with/out space:  0.5550411522633745 0.1684729064039409



Interpreting documents:  44%|████▍     | 8/18 [01:23<01:34,  9.44s/it]

Document 7's STY accuracy:  0.5344827586206896
Document 7's POS accuracy with/out space: 0.8222643896268185 0.6517967781908303
Document 7's SUP:DEP accuracy with/out space:  0.5939278937381404 0.2218091697645601



Interpreting documents:  50%|█████     | 9/18 [01:33<01:28,  9.85s/it]

Document 8's STY accuracy:  0.5952380952380952
Document 8's POS accuracy with/out space: 0.7842170160295932 0.5862884160756501
Document 8's SUP:DEP accuracy with/out space:  0.5561035758323057 0.1595744680851064



Interpreting documents:  56%|█████▌    | 10/18 [01:52<01:39, 12.42s/it]

Document 9's STY accuracy:  0.72
Document 9's POS accuracy with/out space: 0.7986716479867165 0.6169036334913112
Document 9's SUP:DEP accuracy with/out space:  0.5529265255292652 0.16429699842022116



Interpreting documents:  61%|██████    | 11/18 [01:59<01:16, 10.89s/it]

Document 10's STY accuracy:  0.5428571428571428
Document 10's POS accuracy with/out space: 0.7953020134228188 0.6096
Document 10's SUP:DEP accuracy with/out space:  0.5503355704697986 0.1776



Interpreting documents:  67%|██████▋   | 12/18 [02:10<01:06, 11.03s/it]

Document 11's STY accuracy:  0.8181818181818182
Document 11's POS accuracy with/out space: 0.7910614525139665 0.5912568306010929
Document 11's SUP:DEP accuracy with/out space:  0.5787709497206703 0.18469945355191256



Interpreting documents:  72%|███████▏  | 13/18 [02:21<00:54, 10.99s/it]

Document 12's STY accuracy:  0.6545454545454545
Document 12's POS accuracy with/out space: 0.775609756097561 0.5770020533880903
Document 12's SUP:DEP accuracy with/out space:  0.5495934959349593 0.175564681724846



Interpreting documents:  78%|███████▊  | 14/18 [02:32<00:43, 10.78s/it]

Document 13's STY accuracy:  0.4418604651162791
Document 13's POS accuracy with/out space: 0.7800348229831688 0.5788888888888889
Document 13's SUP:DEP accuracy with/out space:  0.5536854323853744 0.1622222222222222



Interpreting documents:  83%|████████▎ | 15/18 [02:50<00:39, 13.02s/it]

Document 14's STY accuracy:  0.8918918918918919
Document 14's POS accuracy with/out space: 0.792822966507177 0.6052871467639015
Document 14's SUP:DEP accuracy with/out space:  0.554066985645933 0.16681859617137648



Interpreting documents:  89%|████████▉ | 16/18 [02:56<00:22, 11.06s/it]

Document 15's STY accuracy:  0.8421052631578947
Document 15's POS accuracy with/out space: 0.8298045602605864 0.6656
Document 15's SUP:DEP accuracy with/out space:  0.5781758957654723 0.1872



Interpreting documents:  94%|█████████▍| 17/18 [03:03<00:09,  9.74s/it]

Document 16's STY accuracy:  0.32558139534883723
Document 16's POS accuracy with/out space: 0.8112609040444092 0.6321483771251932
Document 16's SUP:DEP accuracy with/out space:  0.5900079302141158 0.22565687789799072



Interpreting documents: 100%|██████████| 18/18 [03:11<00:00, 10.66s/it]

Document 17's STY accuracy:  0.4153846153846154
Document 17's POS accuracy with/out space: 0.8397733127253992 0.6877510040160643
Document 17's SUP:DEP accuracy with/out space:  0.5790829469345699 0.20281124497991967

Overall STY accuracy:  0.6588366890380313
Overall POS accuracy with/out space: 0.7968155740346806 0.6113714849531328
Overall SUP:DEP accuracy with/out space:  0.5634951331141623 0.18494079921065615



