In [5]:
import gensim,logging
import os
import sys
from os import listdir,makedirs
from os.path import isfile, join,exists
import glob
import json
import spacy
import re
from nltk.tokenize import sent_tokenize, word_tokenize
from collections import defaultdict,Counter
from nltk.corpus import stopwords
import string
import nltk
import numpy as np
import codecs
import random
from tqdm import tqdm

In [6]:
# get data
# Full cofea dataset
data_dir = join('data')
data_files = [filename for filename in glob.iglob(join(data_dir,'**/*.json'), recursive=True)]
full_data = []
filter_year = [1750,1810]
filter_source = ['Evans Early American Imprints','HeinOnline','National Archives Founders Online']

"""
for file in data_files: # adjust index slice here for the desired source
    if 'evans' in file or 'hein' in file or 'founders' in file:
        with open(file, encoding = 'utf-8') as f:
            #full_data = full_data + f.read().splitlines()
            # if using json
            docs = json.load(f)
            for doc in docs:
                if 'decade' in doc.keys():
                    if filter_year[0] <= int(doc['decade']) <= filter_year[1]:
                        full_data.append(doc['body'])
"""

        
save_name = 'COFEA'
with open(join('data','phrases.txt'), encoding = 'utf-8') as f:
    phrases = f.read().splitlines()
out_dir = join('data','preprocessed')

In [7]:
docs = []
# clean up cofea
for infile in data_files:
    with open(infile) as f:
        data = json.load(f)        
    print(infile, len(data), type(data))    
    for d in data:
        # fix one document with many problems (http://founders.archives.gov/documents/Jefferson/99-01-02-9951)
        if d['id'] == 'fndrs.jefferson.99-01-02-9951':            
            d['author'] == 'Wright, Robert'
            d['year'] = 1809
            d['decade'] = 1800
            d['collection'] = 'Jefferson Papers'
        # fix another that has year and decade listed as 2000:
        elif d['id'] == 'fndrs.jefferson.01-42-02-0442-0002':
            d['year'] = 1804
            d['decade'] = 1800
        # fix one document that clearly has the wrong year/decade (17626/17606)
        elif d['id'] == 'evans.N07112':
            d['year'] = 1762
            d['decade'] = 1760
        # fix years and decades for Elliot's debates (many listed as "2018")
        elif d['source'] == "Elliot's Debates":
            if 'year' in d and int(d['year']) == 2018:
                d.pop('year')
            d['decade'] = 1780
        # convert all years and decades to ints
        if 'year' in d:
            d['year'] = int(d['year'])
        if 'decade' in d:
            d['decade'] = int(d['decade'])
        if d['title'] != 'Editorial Note':
            docs.append(d)

data/statutes_output.json 481 <class 'list'>
data/hein_output.json 285 <class 'list'>
data/founders_output.json 181309 <class 'list'>
data/farrands_output.json 847 <class 'list'>
data/elliots_output.json 652 <class 'list'>
data/evans_output.json 4977 <class 'list'>


In [8]:
for doc in docs:
    if 'source' in doc.keys() and 'decade' in doc.keys():
        if doc['source'] in filter_source and (filter_year[0] <= doc['decade'] <= filter_year[1]):
            full_data.append(doc['body'])
        

In [9]:
nlp = spacy.load("en_core_web_sm")
tokenizer = nlp.tokenizer
stop = stopwords.words('english')

In [10]:
def clean_doc(doc,p_check=False):
    # break documents up into sentences and make sure each token is separated by just one space
    doc = doc.strip()
    doc = re.sub('(\\n|\\t|\\s)+'," ",doc)
    if p_check:
        for p in phrases:
            psub = p.replace(' ','')
            doc = doc.replace(p,psub)
    sents = sent_tokenize(doc)
    sents = [ ' '.join([y.text for y in tokenizer(x)]).lower() for x in sents] 
    return sents

In [11]:
def run_and_save(fname):
    
    #train model
    model = gensim.models.Word2Vec( alpha=0.025, window=4,vector_size=300, min_count=10, workers=12, sg=1, hs=0, negative=5)
    model.build_vocab(gensim.models.word2vec.LineSentence(fname+'.txt'))
    model.train(gensim.models.word2vec.LineSentence(fname+'.txt'), total_examples=model.corpus_count, epochs=5)
    model.wv.save_word2vec_format(fname+ '.tmp') # the gonen code takes the data in this form
    # save .wv.npy and .vocab
    vec = []
    w = codecs.open(fname + '.vocab', 'w', encoding='utf-8')
    vocab_size, embed_dim = None, None
    with codecs.open(fname + '.tmp', 'r', encoding='utf-8', errors='ignore') as r:
        for line in r:
            items = line.strip().split()
            if not vocab_size:
                assert(len(items) == 2)
                vocab_size, embed_dim = int(items[0]), int(items[1])
            else:
                assert(len(items) == embed_dim + 1)
                vec.append([float(item) for item in items[1:]])
                w.write('%s\n'%items[0])
    w.close()
    vec = np.array(vec, dtype=np.float)
    assert(vec.shape[0] == vocab_size)
    assert(vec.shape[1] == embed_dim)
    np.save(fname + '.wv.npy', vec)
    print('saved %s.wv.npy'%fname)
    print('saved %s.vocab'%fname)
    os.remove(fname + '.tmp')
    

In [14]:
out_name = save_name + '.txt' #change name based on source
# Save to text file so it can be efficiently used by word2vec
with open(join(out_dir,out_name), 'w',encoding = 'utf-8') as filehandle:
    for d in tqdm(full_data):
        # clean up the docs for processing
        sents = [clean_doc(d,True)]
        for s in sents:
            filehandle.write('%s\n' % s)

100%|██████████████████████████████████| 172584/172584 [18:44<00:00, 153.51it/s]


In [None]:
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
out_name = join(out_dir,save_name) 
run_and_save(out_name)

2022-05-22 17:24:03,080 : INFO : Word2Vec lifecycle event {'params': 'Word2Vec<vocab=0, vector_size=300, alpha=0.025>', 'datetime': '2022-05-22T17:24:03.079950', 'gensim': '4.2.0', 'python': '3.9.7 | packaged by conda-forge | (default, Sep 29 2021, 19:23:11) \n[GCC 9.4.0]', 'platform': 'Linux-5.4.0-105-generic-x86_64-with-glibc2.31', 'event': 'created'}
2022-05-22 17:24:03,081 : INFO : collecting all words and their counts
2022-05-22 17:24:03,082 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2022-05-22 17:24:04,618 : INFO : PROGRESS: at sentence #10000, processed 4745780 words, keeping 82091 word types
2022-05-22 17:24:06,362 : INFO : PROGRESS: at sentence #20000, processed 10022701 words, keeping 138176 word types
2022-05-22 17:24:08,227 : INFO : PROGRESS: at sentence #30000, processed 15685926 words, keeping 185714 word types
2022-05-22 17:24:09,252 : INFO : PROGRESS: at sentence #40000, processed 18723053 words, keeping 218903 word types
2022-05-22 17:24

2022-05-22 17:25:24,867 : INFO : EPOCH 0 - PROGRESS: at 10.28% examples, 198959 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:25:25,873 : INFO : EPOCH 0 - PROGRESS: at 10.36% examples, 198780 words/s, in_qsize 23, out_qsize 1
2022-05-22 17:25:26,896 : INFO : EPOCH 0 - PROGRESS: at 10.44% examples, 198399 words/s, in_qsize 24, out_qsize 2
2022-05-22 17:25:27,901 : INFO : EPOCH 0 - PROGRESS: at 10.71% examples, 199283 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:25:28,909 : INFO : EPOCH 0 - PROGRESS: at 10.95% examples, 199223 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:25:29,936 : INFO : EPOCH 0 - PROGRESS: at 11.23% examples, 199019 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:25:30,989 : INFO : EPOCH 0 - PROGRESS: at 11.55% examples, 199152 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:25:31,989 : INFO : EPOCH 0 - PROGRESS: at 11.75% examples, 199187 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:25:33,003 : INFO : EPOCH 0 - PROGRESS: at 11.97% examples, 199110 words/s,

2022-05-22 17:26:39,695 : INFO : EPOCH 0 - PROGRESS: at 37.75% examples, 200601 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:26:40,765 : INFO : EPOCH 0 - PROGRESS: at 38.09% examples, 200586 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:26:41,810 : INFO : EPOCH 0 - PROGRESS: at 38.40% examples, 200543 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:26:42,831 : INFO : EPOCH 0 - PROGRESS: at 38.77% examples, 200758 words/s, in_qsize 22, out_qsize 0
2022-05-22 17:26:43,872 : INFO : EPOCH 0 - PROGRESS: at 39.11% examples, 200479 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:26:44,875 : INFO : EPOCH 0 - PROGRESS: at 39.47% examples, 200500 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:26:45,887 : INFO : EPOCH 0 - PROGRESS: at 39.90% examples, 200657 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:26:46,895 : INFO : EPOCH 0 - PROGRESS: at 40.35% examples, 200644 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:26:47,895 : INFO : EPOCH 0 - PROGRESS: at 40.84% examples, 200679 words/s,

2022-05-22 17:27:53,919 : INFO : EPOCH 0 - PROGRESS: at 73.34% examples, 201150 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:27:54,925 : INFO : EPOCH 0 - PROGRESS: at 73.76% examples, 201106 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:27:55,964 : INFO : EPOCH 0 - PROGRESS: at 74.18% examples, 201032 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:27:56,986 : INFO : EPOCH 0 - PROGRESS: at 74.68% examples, 201179 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:27:58,046 : INFO : EPOCH 0 - PROGRESS: at 75.14% examples, 201121 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:27:59,047 : INFO : EPOCH 0 - PROGRESS: at 75.58% examples, 201128 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:28:00,099 : INFO : EPOCH 0 - PROGRESS: at 76.08% examples, 201160 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:28:01,119 : INFO : EPOCH 0 - PROGRESS: at 76.49% examples, 201159 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:28:02,212 : INFO : EPOCH 0 - PROGRESS: at 76.90% examples, 201102 words/s,

2022-05-22 17:29:08,249 : INFO : EPOCH 0 - PROGRESS: at 95.11% examples, 201212 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:29:09,267 : INFO : EPOCH 0 - PROGRESS: at 95.13% examples, 201201 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:29:10,388 : INFO : EPOCH 0 - PROGRESS: at 95.15% examples, 201110 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:29:11,392 : INFO : EPOCH 0 - PROGRESS: at 95.17% examples, 201123 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:29:12,439 : INFO : EPOCH 0 - PROGRESS: at 95.19% examples, 201140 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:29:13,483 : INFO : EPOCH 0 - PROGRESS: at 95.21% examples, 201128 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:29:14,511 : INFO : EPOCH 0 - PROGRESS: at 95.23% examples, 201163 words/s, in_qsize 23, out_qsize 1
2022-05-22 17:29:15,513 : INFO : EPOCH 0 - PROGRESS: at 95.25% examples, 201152 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:29:16,547 : INFO : EPOCH 0 - PROGRESS: at 95.27% examples, 201092 words/s,

2022-05-22 17:30:23,092 : INFO : EPOCH 0 - PROGRESS: at 96.61% examples, 201074 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:30:24,189 : INFO : EPOCH 0 - PROGRESS: at 96.63% examples, 201011 words/s, in_qsize 23, out_qsize 2
2022-05-22 17:30:25,227 : INFO : EPOCH 0 - PROGRESS: at 96.65% examples, 201066 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:30:26,243 : INFO : EPOCH 0 - PROGRESS: at 96.67% examples, 201057 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:30:27,253 : INFO : EPOCH 0 - PROGRESS: at 96.69% examples, 201116 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:30:28,259 : INFO : EPOCH 0 - PROGRESS: at 96.71% examples, 201039 words/s, in_qsize 21, out_qsize 3
2022-05-22 17:30:29,279 : INFO : EPOCH 0 - PROGRESS: at 96.73% examples, 201037 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:30:30,283 : INFO : EPOCH 0 - PROGRESS: at 96.75% examples, 201063 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:30:31,355 : INFO : EPOCH 0 - PROGRESS: at 96.77% examples, 201085 words/s,

2022-05-22 17:31:37,343 : INFO : EPOCH 0 - PROGRESS: at 97.97% examples, 200959 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:31:38,372 : INFO : EPOCH 0 - PROGRESS: at 97.99% examples, 200954 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:31:39,455 : INFO : EPOCH 0 - PROGRESS: at 98.01% examples, 200946 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:31:40,475 : INFO : EPOCH 0 - PROGRESS: at 98.02% examples, 200911 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:31:41,490 : INFO : EPOCH 0 - PROGRESS: at 98.05% examples, 200993 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:31:42,510 : INFO : EPOCH 0 - PROGRESS: at 98.06% examples, 200941 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:31:43,539 : INFO : EPOCH 0 - PROGRESS: at 98.08% examples, 200913 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:31:44,628 : INFO : EPOCH 0 - PROGRESS: at 98.10% examples, 200900 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:31:45,648 : INFO : EPOCH 0 - PROGRESS: at 98.12% examples, 200941 words/s,

2022-05-22 17:32:52,333 : INFO : EPOCH 0 - PROGRESS: at 99.34% examples, 200847 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:32:53,371 : INFO : EPOCH 0 - PROGRESS: at 99.36% examples, 200905 words/s, in_qsize 23, out_qsize 1
2022-05-22 17:32:54,371 : INFO : EPOCH 0 - PROGRESS: at 99.38% examples, 200902 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:32:55,377 : INFO : EPOCH 0 - PROGRESS: at 99.40% examples, 200905 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:32:56,408 : INFO : EPOCH 0 - PROGRESS: at 99.42% examples, 200879 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:32:57,425 : INFO : EPOCH 0 - PROGRESS: at 99.44% examples, 200889 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:32:58,434 : INFO : EPOCH 0 - PROGRESS: at 99.46% examples, 200904 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:32:59,456 : INFO : EPOCH 0 - PROGRESS: at 99.48% examples, 200918 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:33:00,465 : INFO : EPOCH 0 - PROGRESS: at 99.51% examples, 200918 words/s,

2022-05-22 17:34:06,114 : INFO : EPOCH 1 - PROGRESS: at 12.48% examples, 200484 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:34:07,195 : INFO : EPOCH 1 - PROGRESS: at 12.70% examples, 200424 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:34:08,346 : INFO : EPOCH 1 - PROGRESS: at 12.91% examples, 200019 words/s, in_qsize 24, out_qsize 1
2022-05-22 17:34:09,354 : INFO : EPOCH 1 - PROGRESS: at 13.20% examples, 199975 words/s, in_qsize 23, out_qsize 3
2022-05-22 17:34:10,375 : INFO : EPOCH 1 - PROGRESS: at 13.48% examples, 200490 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:34:11,421 : INFO : EPOCH 1 - PROGRESS: at 13.68% examples, 200385 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:34:12,432 : INFO : EPOCH 1 - PROGRESS: at 13.91% examples, 200497 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:34:13,443 : INFO : EPOCH 1 - PROGRESS: at 14.20% examples, 200681 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:34:14,546 : INFO : EPOCH 1 - PROGRESS: at 14.45% examples, 200200 words/s,

2022-05-22 17:36:52,314 : INFO : EPOCH 1 - PROGRESS: at 86.23% examples, 202636 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:36:53,333 : INFO : EPOCH 1 - PROGRESS: at 86.68% examples, 202708 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:36:54,389 : INFO : EPOCH 1 - PROGRESS: at 87.08% examples, 202665 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:36:55,413 : INFO : EPOCH 1 - PROGRESS: at 87.47% examples, 202643 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:36:56,500 : INFO : EPOCH 1 - PROGRESS: at 87.89% examples, 202626 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:36:57,534 : INFO : EPOCH 1 - PROGRESS: at 88.36% examples, 202738 words/s, in_qsize 22, out_qsize 0
2022-05-22 17:36:58,506 : INFO : EPOCH 1 - PROGRESS: at 88.86% examples, 202703 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:36:59,568 : INFO : EPOCH 1 - PROGRESS: at 89.32% examples, 202590 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:37:00,573 : INFO : EPOCH 1 - PROGRESS: at 89.84% examples, 202704 words/s,

2022-05-22 17:38:06,796 : INFO : EPOCH 1 - PROGRESS: at 95.68% examples, 202259 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:38:07,805 : INFO : EPOCH 1 - PROGRESS: at 95.70% examples, 202315 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:38:08,835 : INFO : EPOCH 1 - PROGRESS: at 95.72% examples, 202308 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:38:09,885 : INFO : EPOCH 1 - PROGRESS: at 95.75% examples, 202273 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:38:10,895 : INFO : EPOCH 1 - PROGRESS: at 95.76% examples, 202267 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:38:11,913 : INFO : EPOCH 1 - PROGRESS: at 95.78% examples, 202307 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:38:12,963 : INFO : EPOCH 1 - PROGRESS: at 95.81% examples, 202243 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:38:13,982 : INFO : EPOCH 1 - PROGRESS: at 95.84% examples, 202258 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:38:15,000 : INFO : EPOCH 1 - PROGRESS: at 95.85% examples, 202313 words/s,

2022-05-22 17:39:21,379 : INFO : EPOCH 1 - PROGRESS: at 97.14% examples, 202041 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:39:22,409 : INFO : EPOCH 1 - PROGRESS: at 97.16% examples, 202045 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:39:23,425 : INFO : EPOCH 1 - PROGRESS: at 97.18% examples, 202070 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:39:24,426 : INFO : EPOCH 1 - PROGRESS: at 97.20% examples, 202049 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:39:25,437 : INFO : EPOCH 1 - PROGRESS: at 97.22% examples, 202078 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:39:26,448 : INFO : EPOCH 1 - PROGRESS: at 97.24% examples, 202096 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:39:27,500 : INFO : EPOCH 1 - PROGRESS: at 97.26% examples, 202046 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:39:28,520 : INFO : EPOCH 1 - PROGRESS: at 97.28% examples, 202039 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:39:29,542 : INFO : EPOCH 1 - PROGRESS: at 97.30% examples, 202047 words/s,

2022-05-22 17:40:35,930 : INFO : EPOCH 1 - PROGRESS: at 98.50% examples, 201820 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:40:36,962 : INFO : EPOCH 1 - PROGRESS: at 98.52% examples, 201806 words/s, in_qsize 24, out_qsize 1
2022-05-22 17:40:37,982 : INFO : EPOCH 1 - PROGRESS: at 98.54% examples, 201839 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:40:39,016 : INFO : EPOCH 1 - PROGRESS: at 98.56% examples, 201813 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:40:40,018 : INFO : EPOCH 1 - PROGRESS: at 98.58% examples, 201810 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:40:41,050 : INFO : EPOCH 1 - PROGRESS: at 98.60% examples, 201813 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:40:42,075 : INFO : EPOCH 1 - PROGRESS: at 98.62% examples, 201844 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:40:43,085 : INFO : EPOCH 1 - PROGRESS: at 98.64% examples, 201778 words/s, in_qsize 23, out_qsize 1
2022-05-22 17:40:44,092 : INFO : EPOCH 1 - PROGRESS: at 98.66% examples, 201793 words/s,

2022-05-22 17:41:50,003 : INFO : EPOCH 1 - PROGRESS: at 99.92% examples, 201675 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:41:51,037 : INFO : EPOCH 1 - PROGRESS: at 99.94% examples, 201722 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:41:52,041 : INFO : EPOCH 1 - PROGRESS: at 99.96% examples, 201717 words/s, in_qsize 23, out_qsize 1
2022-05-22 17:41:53,135 : INFO : EPOCH 1 - PROGRESS: at 99.98% examples, 201693 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:41:53,735 : INFO : EPOCH 1: training on 145309394 raw words (102709210 effective words) took 509.0s, 201798 effective words/s
2022-05-22 17:41:54,827 : INFO : EPOCH 2 - PROGRESS: at 0.42% examples, 157741 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:41:55,857 : INFO : EPOCH 2 - PROGRESS: at 0.95% examples, 182884 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:41:56,879 : INFO : EPOCH 2 - PROGRESS: at 1.63% examples, 182354 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:41:57,887 : INFO : EPOCH 2 - PROGRESS: at 2.38% exa

2022-05-22 17:43:04,720 : INFO : EPOCH 2 - PROGRESS: at 23.83% examples, 201632 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:43:05,726 : INFO : EPOCH 2 - PROGRESS: at 24.29% examples, 201808 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:43:06,728 : INFO : EPOCH 2 - PROGRESS: at 24.65% examples, 201803 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:43:07,759 : INFO : EPOCH 2 - PROGRESS: at 25.09% examples, 201846 words/s, in_qsize 21, out_qsize 2
2022-05-22 17:43:08,819 : INFO : EPOCH 2 - PROGRESS: at 25.75% examples, 201867 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:43:09,856 : INFO : EPOCH 2 - PROGRESS: at 26.13% examples, 201899 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:43:10,890 : INFO : EPOCH 2 - PROGRESS: at 26.46% examples, 201641 words/s, in_qsize 20, out_qsize 4
2022-05-22 17:43:11,997 : INFO : EPOCH 2 - PROGRESS: at 27.01% examples, 201804 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:43:13,077 : INFO : EPOCH 2 - PROGRESS: at 27.48% examples, 201688 words/s,

2022-05-22 17:44:19,409 : INFO : EPOCH 2 - PROGRESS: at 53.50% examples, 201922 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:44:20,427 : INFO : EPOCH 2 - PROGRESS: at 53.87% examples, 202043 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:44:21,479 : INFO : EPOCH 2 - PROGRESS: at 54.18% examples, 201927 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:44:22,499 : INFO : EPOCH 2 - PROGRESS: at 54.64% examples, 202104 words/s, in_qsize 24, out_qsize 1
2022-05-22 17:45:52,541 : INFO : EPOCH 2 - PROGRESS: at 94.81% examples, 202471 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:45:53,557 : INFO : EPOCH 2 - PROGRESS: at 94.83% examples, 202456 words/s, in_qsize 24, out_qsize 2
2022-05-22 17:45:54,600 : INFO : EPOCH 2 - PROGRESS: at 94.85% examples, 202406 words/s, in_qsize 24, out_qsize 1
2022-05-22 17:45:55,627 : INFO : EPOCH 2 - PROGRESS: at 94.87% examples, 202420 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:45:56,645 : INFO : EPOCH 2 - PROGRESS: at 94.89% examples, 202462 words/s,

2022-05-22 17:47:03,073 : INFO : EPOCH 2 - PROGRESS: at 96.26% examples, 202326 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:47:04,109 : INFO : EPOCH 2 - PROGRESS: at 96.27% examples, 202333 words/s, in_qsize 23, out_qsize 1
2022-05-22 17:47:05,109 : INFO : EPOCH 2 - PROGRESS: at 96.29% examples, 202375 words/s, in_qsize 22, out_qsize 0
2022-05-22 17:47:06,115 : INFO : EPOCH 2 - PROGRESS: at 96.31% examples, 202325 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:47:07,131 : INFO : EPOCH 2 - PROGRESS: at 96.33% examples, 202262 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:47:08,143 : INFO : EPOCH 2 - PROGRESS: at 96.35% examples, 202290 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:47:09,148 : INFO : EPOCH 2 - PROGRESS: at 96.38% examples, 202329 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:47:10,165 : INFO : EPOCH 2 - PROGRESS: at 96.39% examples, 202288 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:47:11,175 : INFO : EPOCH 2 - PROGRESS: at 96.41% examples, 202338 words/s,

2022-05-22 17:48:17,640 : INFO : EPOCH 2 - PROGRESS: at 97.65% examples, 202251 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:48:18,654 : INFO : EPOCH 2 - PROGRESS: at 97.67% examples, 202228 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:48:19,682 : INFO : EPOCH 2 - PROGRESS: at 97.69% examples, 202255 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:48:20,684 : INFO : EPOCH 2 - PROGRESS: at 97.71% examples, 202229 words/s, in_qsize 24, out_qsize 1
2022-05-22 17:48:21,714 : INFO : EPOCH 2 - PROGRESS: at 97.73% examples, 202257 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:48:22,781 : INFO : EPOCH 2 - PROGRESS: at 97.75% examples, 202230 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:48:23,804 : INFO : EPOCH 2 - PROGRESS: at 97.77% examples, 202217 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:48:24,855 : INFO : EPOCH 2 - PROGRESS: at 97.78% examples, 202196 words/s, in_qsize 21, out_qsize 2
2022-05-22 17:48:25,887 : INFO : EPOCH 2 - PROGRESS: at 97.80% examples, 202178 words/s,

2022-05-22 17:49:31,943 : INFO : EPOCH 2 - PROGRESS: at 99.02% examples, 202055 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:49:32,965 : INFO : EPOCH 2 - PROGRESS: at 99.04% examples, 202066 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:49:34,079 : INFO : EPOCH 2 - PROGRESS: at 99.06% examples, 202040 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:49:35,104 : INFO : EPOCH 2 - PROGRESS: at 99.08% examples, 202050 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:49:36,167 : INFO : EPOCH 2 - PROGRESS: at 99.10% examples, 202041 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:49:37,179 : INFO : EPOCH 2 - PROGRESS: at 99.12% examples, 202056 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:49:38,180 : INFO : EPOCH 2 - PROGRESS: at 99.14% examples, 202035 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:49:39,256 : INFO : EPOCH 2 - PROGRESS: at 99.16% examples, 202046 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:49:40,330 : INFO : EPOCH 2 - PROGRESS: at 99.18% examples, 202052 words/s,

2022-05-22 17:50:45,730 : INFO : EPOCH 3 - PROGRESS: at 7.66% examples, 199791 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:50:46,731 : INFO : EPOCH 3 - PROGRESS: at 8.11% examples, 200581 words/s, in_qsize 20, out_qsize 1
2022-05-22 17:50:47,751 : INFO : EPOCH 3 - PROGRESS: at 8.38% examples, 199960 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:50:48,791 : INFO : EPOCH 3 - PROGRESS: at 8.87% examples, 200215 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:50:49,798 : INFO : EPOCH 3 - PROGRESS: at 9.41% examples, 200035 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:50:50,843 : INFO : EPOCH 3 - PROGRESS: at 9.93% examples, 199961 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:50:51,877 : INFO : EPOCH 3 - PROGRESS: at 10.17% examples, 200641 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:50:52,951 : INFO : EPOCH 3 - PROGRESS: at 10.29% examples, 200040 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:50:54,039 : INFO : EPOCH 3 - PROGRESS: at 10.38% examples, 200304 words/s, in_qs

2022-05-22 17:52:00,188 : INFO : EPOCH 3 - PROGRESS: at 35.03% examples, 201681 words/s, in_qsize 24, out_qsize 2
2022-05-22 17:52:01,212 : INFO : EPOCH 3 - PROGRESS: at 35.41% examples, 201740 words/s, in_qsize 24, out_qsize 1
2022-05-22 17:52:02,290 : INFO : EPOCH 3 - PROGRESS: at 35.80% examples, 201661 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:52:03,296 : INFO : EPOCH 3 - PROGRESS: at 36.20% examples, 201706 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:52:04,431 : INFO : EPOCH 3 - PROGRESS: at 36.69% examples, 201694 words/s, in_qsize 23, out_qsize 1
2022-05-22 17:52:05,427 : INFO : EPOCH 3 - PROGRESS: at 37.15% examples, 201885 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:52:06,443 : INFO : EPOCH 3 - PROGRESS: at 37.54% examples, 201855 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:52:07,463 : INFO : EPOCH 3 - PROGRESS: at 37.88% examples, 201855 words/s, in_qsize 24, out_qsize 3
2022-05-22 17:52:08,491 : INFO : EPOCH 3 - PROGRESS: at 38.17% examples, 201845 words/s,

2022-05-22 17:53:15,176 : INFO : EPOCH 3 - PROGRESS: at 64.41% examples, 201407 words/s, in_qsize 7, out_qsize 10
2022-05-22 17:53:16,195 : INFO : EPOCH 3 - PROGRESS: at 70.95% examples, 201502 words/s, in_qsize 24, out_qsize 4
2022-05-22 17:53:17,313 : INFO : EPOCH 3 - PROGRESS: at 71.51% examples, 201578 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:53:18,322 : INFO : EPOCH 3 - PROGRESS: at 71.95% examples, 201632 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:53:19,345 : INFO : EPOCH 3 - PROGRESS: at 72.36% examples, 201655 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:53:20,374 : INFO : EPOCH 3 - PROGRESS: at 72.74% examples, 201610 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:53:21,488 : INFO : EPOCH 3 - PROGRESS: at 73.30% examples, 201603 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:53:22,513 : INFO : EPOCH 3 - PROGRESS: at 73.77% examples, 201690 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:53:23,551 : INFO : EPOCH 3 - PROGRESS: at 74.21% examples, 201654 words/s,

2022-05-22 17:54:30,028 : INFO : EPOCH 3 - PROGRESS: at 94.99% examples, 202050 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:54:31,075 : INFO : EPOCH 3 - PROGRESS: at 95.02% examples, 201969 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:54:32,113 : INFO : EPOCH 3 - PROGRESS: at 95.04% examples, 201964 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:54:33,132 : INFO : EPOCH 3 - PROGRESS: at 95.06% examples, 201945 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:54:34,187 : INFO : EPOCH 3 - PROGRESS: at 95.08% examples, 201936 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:54:35,226 : INFO : EPOCH 3 - PROGRESS: at 95.10% examples, 201928 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:54:36,227 : INFO : EPOCH 3 - PROGRESS: at 95.12% examples, 201955 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:54:37,241 : INFO : EPOCH 3 - PROGRESS: at 95.15% examples, 201975 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:54:38,286 : INFO : EPOCH 3 - PROGRESS: at 95.17% examples, 201985 words/s,

2022-05-22 17:55:44,400 : INFO : EPOCH 3 - PROGRESS: at 96.50% examples, 201707 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:55:45,403 : INFO : EPOCH 3 - PROGRESS: at 96.52% examples, 201731 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:55:46,439 : INFO : EPOCH 3 - PROGRESS: at 96.54% examples, 201765 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:55:47,515 : INFO : EPOCH 3 - PROGRESS: at 96.56% examples, 201673 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:55:48,561 : INFO : EPOCH 3 - PROGRESS: at 96.58% examples, 201687 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:55:49,614 : INFO : EPOCH 3 - PROGRESS: at 96.60% examples, 201655 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:55:50,732 : INFO : EPOCH 3 - PROGRESS: at 96.62% examples, 201670 words/s, in_qsize 23, out_qsize 1
2022-05-22 17:55:51,778 : INFO : EPOCH 3 - PROGRESS: at 96.64% examples, 201696 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:55:52,786 : INFO : EPOCH 3 - PROGRESS: at 96.66% examples, 201704 words/s,

2022-05-22 17:56:58,675 : INFO : EPOCH 3 - PROGRESS: at 97.87% examples, 201632 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:56:59,681 : INFO : EPOCH 3 - PROGRESS: at 97.89% examples, 201647 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:57:00,707 : INFO : EPOCH 3 - PROGRESS: at 97.90% examples, 201642 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:57:01,745 : INFO : EPOCH 3 - PROGRESS: at 97.92% examples, 201626 words/s, in_qsize 21, out_qsize 2
2022-05-22 17:57:02,759 : INFO : EPOCH 3 - PROGRESS: at 97.94% examples, 201649 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:57:03,804 : INFO : EPOCH 3 - PROGRESS: at 97.96% examples, 201618 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:57:04,808 : INFO : EPOCH 3 - PROGRESS: at 97.98% examples, 201693 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:57:05,828 : INFO : EPOCH 3 - PROGRESS: at 98.00% examples, 201632 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:57:06,834 : INFO : EPOCH 3 - PROGRESS: at 98.02% examples, 201603 words/s,

2022-05-22 17:58:13,217 : INFO : EPOCH 3 - PROGRESS: at 99.24% examples, 201608 words/s, in_qsize 21, out_qsize 2
2022-05-22 17:58:14,222 : INFO : EPOCH 3 - PROGRESS: at 99.27% examples, 201632 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:58:15,237 : INFO : EPOCH 3 - PROGRESS: at 99.28% examples, 201631 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:58:16,260 : INFO : EPOCH 3 - PROGRESS: at 99.30% examples, 201594 words/s, in_qsize 24, out_qsize 1
2022-05-22 17:58:17,343 : INFO : EPOCH 3 - PROGRESS: at 99.32% examples, 201620 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:58:18,346 : INFO : EPOCH 3 - PROGRESS: at 99.34% examples, 201612 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:58:19,351 : INFO : EPOCH 3 - PROGRESS: at 99.35% examples, 201616 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:58:20,363 : INFO : EPOCH 3 - PROGRESS: at 99.37% examples, 201638 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:58:21,376 : INFO : EPOCH 3 - PROGRESS: at 99.39% examples, 201604 words/s,

2022-05-22 17:59:27,476 : INFO : EPOCH 4 - PROGRESS: at 11.43% examples, 200726 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:59:28,477 : INFO : EPOCH 4 - PROGRESS: at 11.70% examples, 201196 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:59:29,533 : INFO : EPOCH 4 - PROGRESS: at 11.88% examples, 200777 words/s, in_qsize 24, out_qsize 0
2022-05-22 17:59:30,551 : INFO : EPOCH 4 - PROGRESS: at 12.15% examples, 201011 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:59:31,558 : INFO : EPOCH 4 - PROGRESS: at 12.38% examples, 201260 words/s, in_qsize 22, out_qsize 1
2022-05-22 17:59:32,602 : INFO : EPOCH 4 - PROGRESS: at 12.54% examples, 200798 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:59:33,615 : INFO : EPOCH 4 - PROGRESS: at 12.76% examples, 201032 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:59:34,615 : INFO : EPOCH 4 - PROGRESS: at 12.98% examples, 200959 words/s, in_qsize 23, out_qsize 0
2022-05-22 17:59:35,639 : INFO : EPOCH 4 - PROGRESS: at 13.31% examples, 201336 words/s,

2022-05-22 18:00:41,589 : INFO : EPOCH 4 - PROGRESS: at 39.71% examples, 202394 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:00:42,626 : INFO : EPOCH 4 - PROGRESS: at 40.08% examples, 202212 words/s, in_qsize 24, out_qsize 1
2022-05-22 18:00:43,646 : INFO : EPOCH 4 - PROGRESS: at 40.54% examples, 202135 words/s, in_qsize 22, out_qsize 1
2022-05-22 18:00:44,662 : INFO : EPOCH 4 - PROGRESS: at 41.08% examples, 202333 words/s, in_qsize 24, out_qsize 0
2022-05-22 18:00:45,769 : INFO : EPOCH 4 - PROGRESS: at 41.50% examples, 202138 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:00:46,804 : INFO : EPOCH 4 - PROGRESS: at 41.97% examples, 202333 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:00:47,815 : INFO : EPOCH 4 - PROGRESS: at 42.09% examples, 202361 words/s, in_qsize 24, out_qsize 0
2022-05-22 18:00:48,837 : INFO : EPOCH 4 - PROGRESS: at 42.45% examples, 202224 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:00:49,852 : INFO : EPOCH 4 - PROGRESS: at 42.79% examples, 202241 words/s,

2022-05-22 18:01:56,345 : INFO : EPOCH 4 - PROGRESS: at 76.18% examples, 202455 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:01:57,351 : INFO : EPOCH 4 - PROGRESS: at 76.59% examples, 202499 words/s, in_qsize 24, out_qsize 1
2022-05-22 18:01:58,363 : INFO : EPOCH 4 - PROGRESS: at 77.03% examples, 202521 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:01:59,479 : INFO : EPOCH 4 - PROGRESS: at 77.53% examples, 202443 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:02:00,528 : INFO : EPOCH 4 - PROGRESS: at 78.10% examples, 202559 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:02:01,661 : INFO : EPOCH 4 - PROGRESS: at 78.64% examples, 202487 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:02:02,665 : INFO : EPOCH 4 - PROGRESS: at 79.16% examples, 202524 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:02:03,668 : INFO : EPOCH 4 - PROGRESS: at 79.69% examples, 202573 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:02:04,674 : INFO : EPOCH 4 - PROGRESS: at 80.30% examples, 202504 words/s,

2022-05-22 18:03:10,931 : INFO : EPOCH 4 - PROGRESS: at 95.25% examples, 202382 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:03:11,934 : INFO : EPOCH 4 - PROGRESS: at 95.27% examples, 202425 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:03:12,947 : INFO : EPOCH 4 - PROGRESS: at 95.29% examples, 202424 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:03:13,959 : INFO : EPOCH 4 - PROGRESS: at 95.31% examples, 202366 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:03:14,962 : INFO : EPOCH 4 - PROGRESS: at 95.33% examples, 202405 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:03:15,972 : INFO : EPOCH 4 - PROGRESS: at 95.34% examples, 202358 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:03:16,973 : INFO : EPOCH 4 - PROGRESS: at 95.37% examples, 202376 words/s, in_qsize 24, out_qsize 2
2022-05-22 18:03:18,032 : INFO : EPOCH 4 - PROGRESS: at 95.39% examples, 202325 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:03:19,061 : INFO : EPOCH 4 - PROGRESS: at 95.41% examples, 202373 words/s,

2022-05-22 18:04:25,003 : INFO : EPOCH 4 - PROGRESS: at 96.74% examples, 202165 words/s, in_qsize 24, out_qsize 0
2022-05-22 18:04:26,059 : INFO : EPOCH 4 - PROGRESS: at 96.76% examples, 202144 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:04:27,109 : INFO : EPOCH 4 - PROGRESS: at 96.78% examples, 202169 words/s, in_qsize 22, out_qsize 1
2022-05-22 18:04:28,159 : INFO : EPOCH 4 - PROGRESS: at 96.80% examples, 202167 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:04:29,211 : INFO : EPOCH 4 - PROGRESS: at 96.82% examples, 202181 words/s, in_qsize 24, out_qsize 1
2022-05-22 18:04:30,217 : INFO : EPOCH 4 - PROGRESS: at 96.84% examples, 202165 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:04:31,303 : INFO : EPOCH 4 - PROGRESS: at 96.86% examples, 202127 words/s, in_qsize 23, out_qsize 2
2022-05-22 18:04:32,333 : INFO : EPOCH 4 - PROGRESS: at 96.88% examples, 202150 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:04:33,387 : INFO : EPOCH 4 - PROGRESS: at 96.89% examples, 202130 words/s,

2022-05-22 18:05:40,316 : INFO : EPOCH 4 - PROGRESS: at 98.11% examples, 202007 words/s, in_qsize 23, out_qsize 3
2022-05-22 18:05:41,532 : INFO : EPOCH 4 - PROGRESS: at 98.13% examples, 202004 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:05:42,552 : INFO : EPOCH 4 - PROGRESS: at 98.15% examples, 202041 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:05:43,715 : INFO : EPOCH 4 - PROGRESS: at 98.17% examples, 201940 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:05:44,819 : INFO : EPOCH 4 - PROGRESS: at 98.19% examples, 202018 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:05:45,861 : INFO : EPOCH 4 - PROGRESS: at 98.21% examples, 201972 words/s, in_qsize 23, out_qsize 2
2022-05-22 18:05:46,867 : INFO : EPOCH 4 - PROGRESS: at 98.23% examples, 201986 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:05:47,948 : INFO : EPOCH 4 - PROGRESS: at 98.24% examples, 201967 words/s, in_qsize 23, out_qsize 0
2022-05-22 18:05:48,967 : INFO : EPOCH 4 - PROGRESS: at 98.26% examples, 201987 words/s,

In [16]:
'h'

'h'