Organizing English LibreSpeech Corpus into Manfests for Speech

In [15]:
import os
import re
import json
import glob
from pathlib import Path

from pathlib import PurePath
from pydub import AudioSegment

from nemo_text_processing.text_normalization.normalize import Normalizer
from nemo.collections import nlp as nemo_nlp



### Intitiate text normalizer and puctuator

In [16]:
normalizer = Normalizer(input_case='lower_cased', lang="en")
punctuator = nemo_nlp.models.PunctuationCapitalizationModel.from_pretrained("punctuation_en_distilbert")

 NeMo-text-processing :: INFO     :: Creating ClassifyFst grammars.
 NeMo-text-processing :: DEBUG    :: cardinal:  0.42s -- 6247 nodes
 NeMo-text-processing :: DEBUG    :: ordinal:  1.04s -- 1478 nodes
 NeMo-text-processing :: DEBUG    :: decimal:  0.20s -- 3151 nodes
 NeMo-text-processing :: DEBUG    :: fraction:  0.19s -- 4254 nodes
 NeMo-text-processing :: DEBUG    :: measure:  9.44s -- 49430 nodes
 NeMo-text-processing :: DEBUG    :: date:  0.53s -- 4456 nodes
 NeMo-text-processing :: DEBUG    :: time:  0.12s -- 418 nodes
 NeMo-text-processing :: DEBUG    :: telephone:  0.46s -- 3467 nodes
 NeMo-text-processing :: DEBUG    :: electronic:  0.14s -- 902 nodes
 NeMo-text-processing :: DEBUG    :: money:  7.76s -- 13153 nodes
 NeMo-text-processing :: DEBUG    :: whitelist:  1.19s -- 16688 nodes
 NeMo-text-processing :: DEBUG    :: punct:  2.21s -- 259 nodes
 NeMo-text-processing :: DEBUG    :: word:  4.78s -- 1295 nodes
 NeMo-text-processing :: DEBUG    :: serial:  7.76s -- 10772 node

[NeMo I 2023-11-12 08:23:32 cloud:58] Found existing object /home/ubuntu/.cache/torch/NeMo/NeMo_1.21.0rc0/punctuation_en_distilbert/6bdea9786c4395fbbe02e4143d2e1cee/punctuation_en_distilbert.nemo.
[NeMo I 2023-11-12 08:23:32 cloud:64] Re-using file from: /home/ubuntu/.cache/torch/NeMo/NeMo_1.21.0rc0/punctuation_en_distilbert/6bdea9786c4395fbbe02e4143d2e1cee/punctuation_en_distilbert.nemo
[NeMo I 2023-11-12 08:23:32 common:913] Instantiating model from pre-trained checkpoint
[NeMo I 2023-11-12 08:23:34 tokenizer_utils:130] Getting HuggingFace AutoTokenizer with pretrained_model_name: distilbert-base-uncased, vocab_file: /tmp/tmpb7mnbbwt/tokenizer.vocab_file, merges_files: None, special_tokens_dict: {}, and use_fast: False


[NeMo W 2023-11-12 08:23:35 modelPT:258] You tried to register an artifact under config key=tokenizer.vocab_file but an artifact for it has already been registered.
[NeMo W 2023-11-12 08:23:35 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    use_audio: false
    audio_file: null
    sample_rate: 16000
    use_bucketing: true
    batch_size: 32
    preload_audios: true
    use_tarred_dataset: false
    label_info_save_dir: null
    text_file: text_train.txt
    labels_file: labels_train.txt
    tokens_in_batch: null
    max_seq_length: 128
    num_samples: -1
    use_cache: true
    cache_dir: null
    get_label_frequences: false
    verbose: true
    n_jobs: 0
    tar_metadata_file: null
    tar_shuffle_n: 1
    shard_strategy: scatter
    shuffle: true
    drop_last: false
    pin_memory: true
    num_workers: 8
    persistent_wor

[NeMo I 2023-11-12 08:23:38 save_restore_connector:249] Model PunctuationCapitalizationModel was successfully restored from /home/ubuntu/.cache/torch/NeMo/NeMo_1.21.0rc0/punctuation_en_distilbert/6bdea9786c4395fbbe02e4143d2e1cee/punctuation_en_distilbert.nemo.


### Start Hugging Face NLP systems

In [17]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

entity_tokenizer = AutoTokenizer.from_pretrained("Babelscape/wikineural-multilingual-ner")
entity_model = AutoModelForTokenClassification.from_pretrained("Babelscape/wikineural-multilingual-ner")

hf_nlp = pipeline("ner", model=entity_model, tokenizer=entity_tokenizer, grouped_entities=True)


def tag_entities(text):

    ner_results = hf_nlp(text)
    print(ner_results)

    # example: [{'entity_group': 'PER', 'score': 0.8913538, 'word': 'Min', 'start': 0, 'end': 3}, {'entity_group': 'LOC', 'score': 0.9983326, 'word': 'West Van Buren Street', 'start': 93, 'end': 114}]
    for ner_dict in ner_results:

        entity_group = ner_dict['entity_group']
        start = ner_dict['start']
        end = ner_dict['end']
        word = ner_dict['word']

        text = text.replace(word, "B-"+entity_group+" "+word+" E-"+entity_group)

    print("ner tagged text", text)


    return text

    


### Start pretrained Emotion Classification system

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
from transformers import AutoConfig, Wav2Vec2FeatureExtractor
from src.models import Wav2Vec2ForSpeechClassification, HubertForSpeechClassification

emotion_model = HubertForSpeechClassification.from_pretrained("Rajaram1996/Hubert_emotion")
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/hubert-base-ls960")
sampling_rate=16000 # defined by the model; must convert mp3 to this rate.
config = AutoConfig.from_pretrained("Rajaram1996/Hubert_emotion")

def speech_file_to_array_fn(path, sampling_rate):
    speech_array, _sampling_rate = torchaudio.load(path)
    resampler = torchaudio.transforms.Resample(_sampling_rate, sampling_rate)
    speech = resampler(speech_array).squeeze().numpy()
    return speech


def predict(path, sampling_rate):
    speech = speech_file_to_array_fn(path, sampling_rate)
    inputs = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
    inputs = {key: inputs[key].to(device) for key in inputs}

    with torch.no_grad():
        logits = model(**inputs).logits

    scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
    outputs = [{"Emotion": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in
               enumerate(scores)]
    return outputs

def get_emotion_labels(audio_file, sampling_rate=16000, score=50.0):
    sound_array = speech_file_to_array_fn(audio_file, sampling_rate)
    
    inputs = feature_extractor(sound_array, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
    inputs = {key: inputs[key].to("cpu").float() for key in inputs}

    with torch.no_grad():
        logits = emotion_model(**inputs).logits

    scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]

    outputs = [{
        "emo": config.id2label[i],
        "score": round(score * 100, 1)}
        for i, score in enumerate(scores)
    ]

    #[{'emo': 'female_neutral', 'score': 73.9}, {'emo': 'female_happy', 'score': 24.8}]
    emotion_labels = [row for row in sorted(outputs, key=lambda x:x["score"], reverse=True) if row['score'] != '0.0%'][:2]

    all_labels = []
    for emotion_dict in emotion_labels:
        label = emotion_dict['emo'].split("_")[1].upper()
        score = emotion_dict['score']

        if score > 50.0:
            all_labels.append(label)

    return all_labels


Some weights of the model checkpoint at Rajaram1996/Hubert_emotion were not used when initializing HubertForSpeechClassification: ['hubert.encoder.pos_conv_embed.conv.weight_v', 'hubert.encoder.pos_conv_embed.conv.weight_g']
- This IS expected if you are initializing HubertForSpeechClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing HubertForSpeechClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of HubertForSpeechClassification were not initialized from the model checkpoint at Rajaram1996/Hubert_emotion and are newly initialized: ['hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight

### Librespeech: Get data, un-compress it and then set paths

In [19]:
#unzip downloaded files
os.system("tar -xzvf train.tar.gz")
os.system("tar -xzvf test.tar.gz")
os.system("tar -xzvf dev.tar.gz")

tar (child): train.tar.gz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now
tar (child): test.tar.gz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now
tar (child): dev.tar.gz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now


512

In [20]:
#define paths to folders created afte unzipping
LIBRE = '/n/disk1/audio_datasets/EN_libre/'
TRAIN_DATA = Path(LIBRE+'/LibriSpeech/train-clean-360/')
TRAIN_DATA_WAV = str(TRAIN_DATA) + '-wav/'
os.system('mkdir -p ' + TRAIN_DATA_WAV)
TRAIN_DATA_WAV = Path(TRAIN_DATA_WAV)


DEV_DATA = Path(LIBRE+'/LibriSpeech/dev-clean/')
DEV_DATA_WAV = str(DEV_DATA) + '-wav/'
os.system('mkdir -p ' + DEV_DATA_WAV)
DEV_DATA_WAV = Path(DEV_DATA_WAV)

TEST_DATA = Path(LIBRE+'/LibriSpeech/test-clean/')
TEST_DATA_WAV = str(TEST_DATA) + '-wav/'
os.system('mkdir -p ' + TEST_DATA_WAV)
TEST_DATA_WAV = Path(TEST_DATA_WAV)

In [21]:
allpath = [TRAIN_DATA, DEV_DATA, TEST_DATA]


def normalize(text):

    text = text.lower()
    normalized = normalizer.normalize(text, verbose=True, punct_post_process=True)
    normalized = [normalized]
    norm_punctuated = punctuator.add_punctuation_capitalization(normalized)[0]
    return norm_punctuated

def read_transcription(filepath):

    trans = open(filepath,'r').readlines()
    trans_dict = {}
    for line in trans:
        line = line.strip().split()
        text = ' '.join(line[1:])
        text = normalize(text)

        trans_dict[line[0]] = text
    
    return trans_dict

def process_librispeech(datakey):

    print(datakey)
    datafolders = glob.glob(str(datakey)+'/*')
    print(datafolders)

    datakey_wav = str(datakey) + '-wav/'
    os.system('mkdir -p ' + datakey_wav)
    datakey_wav = Path(datakey_wav)

    manifest = open(datakey_wav.name+'.json','w')

    for folder in datafolders[:4]:
        sessdirs = glob.glob(folder + '/*')

        for sessdir in sessdirs:
            print("ss", sessdir)
            segments = glob.glob(sessdir + '/*')
            print("segments", segments)

            transcription = [x for x in segments if re.search(".txt", x)][0]
            trans_dict = read_transcription(transcription)
            #print(trans_dict)


            for segment in segments:
                #allfiles = glob.glob(segment + '/*')
                
                filepath = PurePath(segment)

                if ".flac" in str(filepath):
                    
                    sample_dict = {}

                    print("Old path", filepath)

                    filekey = filepath.name.replace(filepath.suffix, "")
                    transcription = trans_dict[filekey]
                    wav_filepath = str(datakey_wav) + "/" + filekey + ".wav"
                    sample_dict['audiofilepath'] = wav_filepath
                    sample_dict['text'] = transcription
                    sample_dict['tagged_text'] = transcription

                    flac_tmp_audio_data = AudioSegment.from_file(filepath, filepath.suffix[1:])
                    flac_tmp_audio_data.export(wav_filepath, format="wav")
                    sample_dict['instruction'] = "transcribe speech"

                    json.dump(sample_dict, manifest)
                    manifest.write("\n")


                    tagged_transcription = tag_entities(transcription)
                    sample_dict['text'] = transcription
                    sample_dict['tagged_text'] = tagged_transcription
                    sample_dict['instruction'] = "transcribe and mark named entities"
                    json.dump(sample_dict, manifest)
                    manifest.write("\n")


                    emotion_labels = get_emotion_labels(audio_file=wav_filepath, sampling_rate=16000)
                    print("Emotion Labels", emotion_labels)
                    emotion_labels = ' '.join(emotion_labels)

                    final_transcription = tagged_transcription + " " + emotion_labels

                    sample_dict['text'] = final_transcription
                    sample_dict['tagged_text'] = transcription
                    sample_dict['instruction'] = "transcribe, mark named entitites and track speaker emotion"
                    json.dump(sample_dict, manifest)
                    manifest.write("\n")

                    sample_dict['prompt'] = final_transcription
                    print("tagged transcription", tagged_transcription)
    
    manifest.close()

for datakey in allpath[1:2]:
    #print(datapath)
    print(datakey)
    process_librispeech(datakey)

 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "was" } tokens { name: "in" } tokens { name: "a" } tokens { name: "fevered" } tokens { name: "state" } tokens { name: "of" } tokens { name: "mind" } tokens { name: "owing" } tokens { name: "to" } tokens { name: "the" } tokens { name: "blight" } tokens { name: "his" } tokens { name: "wife's" } tokens { name: "action" } tokens { name: "threatened" } tokens { name: "to" } tokens { name: "cast" } tokens { name: "upon" } tokens { name: "his" } tokens { name: "entire" } tokens { name: "future" }


/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean
/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean
['/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/1988', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/777', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/84', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/1673', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/8297', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/5536', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/6345', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/6295', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/6313', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/5338', '/n/disk1/

100%|██████████| 1/1 [00:00<00:00, 12.66batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "would" } tokens { name: "have" } tokens { name: "to" } tokens { name: "pay" } tokens { name: "her" } tokens { name: "the" } tokens { name: "money" } tokens { name: "which" } tokens { name: "she" } tokens { name: "would" } tokens { name: "now" } tokens { name: "regularly" } tokens { name: "demand" } tokens { name: "or" } tokens { name: "there" } tokens { name: "would" } tokens { name: "be" } tokens { name: "trouble" } tokens { name: "it" } tokens { name: "did" } tokens { name: "not" } tokens { name: "matter" } tokens { name: "what" } tokens { name: "he" } tokens { name: "did" }


[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_infer_dataset:127] Max length: 28
[NeMo I 2023-11-12 08:24:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:22 data_preprocessing:406] Min: 26 |                  Max: 26 |                  Mean: 26.0 |                  Median: 26.0
[NeMo I 2023-11-12 08:24:22 data_preprocessing:412] 75 percentile: 26.00
[NeMo I 2023-11-12 08:24:22 data_preprocessing:413] 99 percentile: 26.00


100%|██████████| 1/1 [00:00<00:00,  9.02batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "hurstwood" } tokens { name: "walked" } tokens { name: "the" } tokens { name: "floor" } tokens { name: "mentally" } tokens { name: "arranging" } tokens { name: "the" } tokens { name: "chief" } tokens { name: "points" } tokens { name: "of" } tokens { name: "his" } tokens { name: "situation" }


[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:22 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:22 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:22 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 15.61batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "also" } tokens { name: "thought" } tokens { name: "of" } tokens { name: "his" } tokens { name: "managerial" } tokens { name: "position" }


[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:24:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:22 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:24:22 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:24:22 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 26.71batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "how" } tokens { name: "would" } tokens { name: "the" } tokens { name: "papers" } tokens { name: "talk" } tokens { name: "about" } tokens { name: "it" }


[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:24:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:22 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:24:22 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:24:22 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 24.15batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "many" } tokens { name: "little" } tokens { name: "wrinkles" } tokens { name: "gathered" } tokens { name: "between" } tokens { name: "his" } tokens { name: "eyes" } tokens { name: "as" } tokens { name: "he" } tokens { name: "contemplated" } tokens { name: "this" } tokens { name: "and" } tokens { name: "his" } tokens { name: "brow" } tokens { name: "moistened" }


[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:24:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:22 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:24:22 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:24:22 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 17.66batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "could" } tokens { name: "arrange" } tokens { name: "that" } tokens { name: "satisfactorily" } tokens { name: "for" } tokens { name: "carrie" } tokens { name: "would" } tokens { name: "be" } tokens { name: "glad" } tokens { name: "to" } tokens { name: "wait" } tokens { name: "if" } tokens { name: "necessary" }


[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:22 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:24:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:22 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:24:22 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:24:22 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 16.72batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "would" } tokens { name: "see" } tokens { name: "how" } tokens { name: "things" } tokens { name: "turned" } tokens { name: "out" } tokens { name: "to" } tokens { name: "morrow" } tokens { name: "and" } tokens { name: "then" } tokens { name: "he" } tokens { name: "would" } tokens { name: "talk" } tokens { name: "to" } tokens { name: "her" } tokens { name: "they" } tokens { name: "were" } tokens { name: "going" } tokens { name: "to" } tokens { name: "meet" } tokens { name: "as" } tokens { name: "usual" }


[NeMo I 2023-11-12 08:24:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:23 punctuation_capitalization_infer_dataset:127] Max length: 25
[NeMo I 2023-11-12 08:24:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:23 data_preprocessing:406] Min: 23 |                  Max: 23 |                  Mean: 23.0 |                  Median: 23.0
[NeMo I 2023-11-12 08:24:23 data_preprocessing:412] 75 percentile: 23.00
[NeMo I 2023-11-12 08:24:23 data_preprocessing:413] 99 percentile: 23.00


100%|██████████| 1/1 [00:00<00:00, 13.41batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "for" } tokens { name: "some" } tokens { name: "reason" } tokens { name: "he" } tokens { name: "felt" } tokens { name: "as" } tokens { name: "if" } tokens { name: "something" } tokens { name: "might" } tokens { name: "come" } tokens { name: "that" } tokens { name: "way" } tokens { name: "and" } tokens { name: "was" } tokens { name: "relieved" } tokens { name: "when" } tokens { name: "all" } tokens { name: "the" } tokens { name: "envelopes" } tokens { name: "had" } tokens { name: "been" } tokens { name: "scanned" } tokens { name: "and" } tokens { name: "nothing" } tokens { name: "suspicious" } tokens { name: "noticed" }


[NeMo I 2023-11-12 08:24:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:23 punctuation_capitalization_infer_dataset:127] Max length: 29
[NeMo I 2023-11-12 08:24:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:23 data_preprocessing:406] Min: 27 |                  Max: 27 |                  Mean: 27.0 |                  Median: 27.0
[NeMo I 2023-11-12 08:24:23 data_preprocessing:412] 75 percentile: 27.00
[NeMo I 2023-11-12 08:24:23 data_preprocessing:413] 99 percentile: 27.00


100%|██████████| 1/1 [00:00<00:00, 12.25batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "while" } tokens { name: "the" } tokens { name: "danger" } tokens { name: "had" } tokens { name: "not" } tokens { name: "lessened" } tokens { name: "it" } tokens { name: "had" } tokens { name: "not" } tokens { name: "as" } tokens { name: "yet" } tokens { name: "materialised" } tokens { name: "and" } tokens { name: "with" } tokens { name: "him" } tokens { name: "no" } tokens { name: "news" } tokens { name: "was" } tokens { name: "good" } tokens { name: "news" }


[NeMo I 2023-11-12 08:24:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:23 punctuation_capitalization_infer_dataset:127] Max length: 24
[NeMo I 2023-11-12 08:24:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:23 data_preprocessing:406] Min: 22 |                  Max: 22 |                  Mean: 22.0 |                  Median: 22.0
[NeMo I 2023-11-12 08:24:23 data_preprocessing:412] 75 percentile: 22.00
[NeMo I 2023-11-12 08:24:23 data_preprocessing:413] 99 percentile: 22.00


100%|██████████| 1/1 [00:00<00:00, 17.64batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "so" } tokens { name: "little" } tokens { name: "did" } tokens { name: "he" } tokens { name: "consider" } tokens { name: "drouet" } tokens { name: "that" } tokens { name: "it" } tokens { name: "never" } tokens { name: "once" } tokens { name: "occurred" } tokens { name: "to" } tokens { name: "him" } tokens { name: "to" } tokens { name: "worry" } tokens { name: "about" } tokens { name: "his" } tokens { name: "finding" } tokens { name: "out" }


[NeMo I 2023-11-12 08:24:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:23 punctuation_capitalization_infer_dataset:127] Max length: 23
[NeMo I 2023-11-12 08:24:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:23 data_preprocessing:406] Min: 21 |                  Max: 21 |                  Mean: 21.0 |                  Median: 21.0
[NeMo I 2023-11-12 08:24:23 data_preprocessing:412] 75 percentile: 21.00
[NeMo I 2023-11-12 08:24:23 data_preprocessing:413] 99 percentile: 21.00


100%|██████████| 1/1 [00:00<00:00, 19.97batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "grew" } tokens { name: "restless" } tokens { name: "as" } tokens { name: "he" } tokens { name: "ruminated" } tokens { name: "and" } tokens { name: "then" } tokens { name: "decided" } tokens { name: "that" } tokens { name: "perhaps" } tokens { name: "it" } tokens { name: "was" } tokens { name: "nothing" }


[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:24:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:24 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:24:24 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:24:24 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 17.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "had" } tokens { name: "not" } tokens { name: "been" } tokens { name: "able" } tokens { name: "to" } tokens { name: "get" } tokens { name: "away" } tokens { name: "this" } tokens { name: "morning" }


[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:24:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:24 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:24:24 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:24:24 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 22.29batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "would" } tokens { name: "get" } tokens { name: "one" } tokens { name: "to" } tokens { name: "day" } tokens { name: "it" } tokens { name: "would" } tokens { name: "probably" } tokens { name: "be" } tokens { name: "on" } tokens { name: "his" } tokens { name: "desk" } tokens { name: "when" } tokens { name: "he" } tokens { name: "got" } tokens { name: "back" } tokens { name: "he" } tokens { name: "would" } tokens { name: "look" } tokens { name: "for" } tokens { name: "it" } tokens { name: "at" } tokens { name: "once" }


[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_infer_dataset:127] Max length: 26
[NeMo I 2023-11-12 08:24:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:24 data_preprocessing:406] Min: 24 |                  Max: 24 |                  Mean: 24.0 |                  Median: 24.0
[NeMo I 2023-11-12 08:24:24 data_preprocessing:412] 75 percentile: 24.00
[NeMo I 2023-11-12 08:24:24 data_preprocessing:413] 99 percentile: 24.00


100%|██████████| 1/1 [00:00<00:00, 16.44batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "after" } tokens { name: "a" } tokens { name: "time" } tokens { name: "he" } tokens { name: "gave" } tokens { name: "up" } tokens { name: "waiting" } tokens { name: "and" } tokens { name: "drearily" } tokens { name: "headed" } tokens { name: "for" } tokens { name: "the" } tokens { name: "madison" } tokens { name: "car" }


[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:24:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:24 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:24:24 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:24:24 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 18.04batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "went" } tokens { name: "in" } tokens { name: "and" } tokens { name: "examined" } tokens { name: "his" } tokens { name: "letters" } tokens { name: "but" } tokens { name: "there" } tokens { name: "was" } tokens { name: "nothing" } tokens { name: "from" } tokens { name: "carrie" }


[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:24 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:24 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:24 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 19.69batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "fortunately" } tokens { name: "there" } tokens { name: "was" } tokens { name: "nothing" } tokens { name: "from" } tokens { name: "his" } tokens { name: "wife" } tokens { name: "either" }


[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:24 punctuation_capitalization_infer_dataset:127] Max length: 10
[NeMo I 2023-11-12 08:24:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:24 data_preprocessing:406] Min: 8 |                  Max: 8 |                  Mean: 8.0 |                  Median: 8.0
[NeMo I 2023-11-12 08:24:24 data_preprocessing:412] 75 percentile: 8.00
[NeMo I 2023-11-12 08:24:24 data_preprocessing:413] 99 percentile: 8.00


100%|██████████| 1/1 [00:00<00:00, 24.88batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "at" } tokens { name: "one" } tokens { name: "thirty" } tokens { name: "he" } tokens { name: "went" } tokens { name: "to" } tokens { name: "rector's" } tokens { name: "for" } tokens { name: "lunch" } tokens { name: "and" } tokens { name: "when" } tokens { name: "he" } tokens { name: "returned" } tokens { name: "a" } tokens { name: "messenger" } tokens { name: "was" } tokens { name: "waiting" } tokens { name: "for" } tokens { name: "him" }


[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_infer_dataset:127] Max length: 23
[NeMo I 2023-11-12 08:24:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:25 data_preprocessing:406] Min: 21 |                  Max: 21 |                  Mean: 21.0 |                  Median: 21.0
[NeMo I 2023-11-12 08:24:25 data_preprocessing:412] 75 percentile: 21.00
[NeMo I 2023-11-12 08:24:25 data_preprocessing:413] 99 percentile: 21.00


100%|██████████| 1/1 [00:00<00:00, 14.76batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "his" } tokens { name: "first" } tokens { name: "impulse" } tokens { name: "was" } tokens { name: "to" } tokens { name: "write" } tokens { name: "but" } tokens { name: "four" } tokens { name: "words" } tokens { name: "in" } tokens { name: "reply" } tokens { name: "go" } tokens { name: "to" } tokens { name: "the" } tokens { name: "devil" }


[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:24:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:25 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:24:25 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:24:25 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 15.28batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "but" } tokens { name: "he" } tokens { name: "compromised" } tokens { name: "by" } tokens { name: "telling" } tokens { name: "the" } tokens { name: "boy" } tokens { name: "that" } tokens { name: "there" } tokens { name: "would" } tokens { name: "be" } tokens { name: "no" } tokens { name: "reply" }


[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:25 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:25 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:25 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 21.68batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "then" } tokens { name: "he" } tokens { name: "sat" } tokens { name: "down" } tokens { name: "in" } tokens { name: "his" } tokens { name: "chair" } tokens { name: "and" } tokens { name: "gazed" } tokens { name: "without" } tokens { name: "seeing" } tokens { name: "contemplating" } tokens { name: "the" } tokens { name: "result" } tokens { name: "of" } tokens { name: "his" } tokens { name: "work" }


[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:24:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:25 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:24:25 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:24:25 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 15.98batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "what" } tokens { name: "would" } tokens { name: "she" } tokens { name: "do" } tokens { name: "about" } tokens { name: "that" } tokens { name: "the" } tokens { name: "confounded" } tokens { name: "wretch" }


[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:24:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:25 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:24:25 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:24:25 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 16.78batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "later" } tokens { name: "however" } tokens { name: "his" } tokens { name: "old" } tokens { name: "discretion" } tokens { name: "asserted" } tokens { name: "itself" }


[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:24:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:25 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:24:25 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:24:25 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 35.61batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "something" } tokens { name: "had" } tokens { name: "to" } tokens { name: "be" } tokens { name: "done" } tokens { name: "a" } tokens { name: "climax" } tokens { name: "was" } tokens { name: "near" } tokens { name: "and" } tokens { name: "she" } tokens { name: "would" } tokens { name: "not" } tokens { name: "sit" } tokens { name: "idle" }


[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:25 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:24:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:25 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:24:25 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:24:25 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 15.96batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "knew" } tokens { name: "her" } tokens { name: "well" } tokens { name: "enough" } tokens { name: "to" } tokens { name: "know" } tokens { name: "that" } tokens { name: "when" } tokens { name: "she" } tokens { name: "had" } tokens { name: "decided" } tokens { name: "upon" } tokens { name: "a" } tokens { name: "plan" } tokens { name: "she" } tokens { name: "would" } tokens { name: "follow" } tokens { name: "it" } tokens { name: "up" }


[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_infer_dataset:127] Max length: 22
[NeMo I 2023-11-12 08:24:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:26 data_preprocessing:406] Min: 20 |                  Max: 20 |                  Mean: 20.0 |                  Median: 20.0
[NeMo I 2023-11-12 08:24:26 data_preprocessing:412] 75 percentile: 20.00
[NeMo I 2023-11-12 08:24:26 data_preprocessing:413] 99 percentile: 20.00


100%|██████████| 1/1 [00:00<00:00,  9.52batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "arose" } tokens { name: "from" } tokens { name: "his" } tokens { name: "chair" } tokens { name: "and" } tokens { name: "went" } tokens { name: "and" } tokens { name: "looked" } tokens { name: "out" } tokens { name: "into" } tokens { name: "the" } tokens { name: "street" }


[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:26 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:26 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:26 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 24.57batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "long" } tokens { name: "drizzle" } tokens { name: "had" } tokens { name: "begun" } tokens { name: "pedestrians" } tokens { name: "had" } tokens { name: "turned" } tokens { name: "up" } tokens { name: "collars" } tokens { name: "and" } tokens { name: "trousers" } tokens { name: "at" } tokens { name: "the" } tokens { name: "bottom" }


[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:24:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:26 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:24:26 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:24:26 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 19.37batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "hurstwood" } tokens { name: "almost" } tokens { name: "exclaimed" } tokens { name: "out" } tokens { name: "loud" } tokens { name: "at" } tokens { name: "the" } tokens { name: "insistency" } tokens { name: "of" } tokens { name: "this" } tokens { name: "thing" }


[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:26 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:26 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:26 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 22.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "put" } tokens { name: "on" } tokens { name: "his" } tokens { name: "hat" } tokens { name: "and" } tokens { name: "looked" } tokens { name: "around" } tokens { name: "for" } tokens { name: "his" } tokens { name: "umbrella" }


[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:24:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:26 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:24:26 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:24:26 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 24.83batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "would" } tokens { name: "have" } tokens { name: "some" } tokens { name: "arrangement" } tokens { name: "of" } tokens { name: "this" } tokens { name: "thing" }


[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:26 punctuation_capitalization_infer_dataset:127] Max length: 10
[NeMo I 2023-11-12 08:24:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:26 data_preprocessing:406] Min: 8 |                  Max: 8 |                  Mean: 8.0 |                  Median: 8.0
[NeMo I 2023-11-12 08:24:26 data_preprocessing:412] 75 percentile: 8.00
[NeMo I 2023-11-12 08:24:26 data_preprocessing:413] 99 percentile: 8.00


100%|██████████| 1/1 [00:00<00:00, 21.85batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "began" } tokens { name: "to" } tokens { name: "wish" } tokens { name: "that" } tokens { name: "he" } tokens { name: "had" } tokens { name: "compromised" } tokens { name: "in" } tokens { name: "some" } tokens { name: "way" } tokens { name: "or" } tokens { name: "other" } tokens { name: "that" } tokens { name: "he" } tokens { name: "had" } tokens { name: "sent" } tokens { name: "the" } tokens { name: "money" } tokens { name: "perhaps" } tokens { name: "he" } tokens { name: "could" } tokens { name: "do" } tokens { name: "it" } tokens { name: "up" } tokens { name: "here" }


[NeMo I 2023-11-12 08:24:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:27 punctuation_capitalization_infer_dataset:127] Max length: 28
[NeMo I 2023-11-12 08:24:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:27 data_preprocessing:406] Min: 26 |                  Max: 26 |                  Mean: 26.0 |                  Median: 26.0
[NeMo I 2023-11-12 08:24:27 data_preprocessing:412] 75 percentile: 26.00
[NeMo I 2023-11-12 08:24:27 data_preprocessing:413] 99 percentile: 26.00


100%|██████████| 1/1 [00:00<00:00,  6.79batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "would" } tokens { name: "go" } tokens { name: "in" } tokens { name: "and" } tokens { name: "see" } tokens { name: "anyhow" } tokens { name: "he" } tokens { name: "would" } tokens { name: "have" } tokens { name: "no" } tokens { name: "row" }


[NeMo I 2023-11-12 08:24:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:27 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:27 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:27 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:27 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 24.97batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "by" } tokens { name: "the" } tokens { name: "time" } tokens { name: "he" } tokens { name: "reached" } tokens { name: "his" } tokens { name: "own" } tokens { name: "street" } tokens { name: "he" } tokens { name: "was" } tokens { name: "keenly" } tokens { name: "alive" } tokens { name: "to" } tokens { name: "the" } tokens { name: "difficulties" } tokens { name: "of" } tokens { name: "his" } tokens { name: "situation" } tokens { name: "and" } tokens { name: "wished" } tokens { name: "over" } tokens { name: "and" } tokens { name: "over" } tokens { name: "that" } tokens { name: "some" } tokens { name: "solution" } tokens { name: "would" } tokens { name: "offer" } tokens { name: "itself" } tokens { name: "that" } tokens { name: "he" } tokens { name: "could" } tokens { name: "see" } tokens { name: "his" } tokens { name: "way" } tokens { name: "out" }


[NeMo I 2023-11-12 08:24:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:27 punctuation_capitalization_infer_dataset:127] Max length: 39
[NeMo I 2023-11-12 08:24:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:27 data_preprocessing:406] Min: 37 |                  Max: 37 |                  Mean: 37.0 |                  Median: 37.0
[NeMo I 2023-11-12 08:24:27 data_preprocessing:412] 75 percentile: 37.00
[NeMo I 2023-11-12 08:24:27 data_preprocessing:413] 99 percentile: 37.00


100%|██████████| 1/1 [00:00<00:00,  6.11batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "then" } tokens { name: "he" } tokens { name: "rang" } tokens { name: "the" } tokens { name: "bell" } tokens { name: "no" } tokens { name: "answer" }


[NeMo I 2023-11-12 08:24:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:27 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:24:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:27 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:24:27 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:24:27 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 23.23batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "rang" } tokens { name: "again" } tokens { name: "this" } tokens { name: "time" } tokens { name: "harder" } tokens { name: "still" } tokens { name: "no" } tokens { name: "answer" }


[NeMo I 2023-11-12 08:24:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:28 punctuation_capitalization_infer_dataset:127] Max length: 11
[NeMo I 2023-11-12 08:24:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:28 data_preprocessing:406] Min: 9 |                  Max: 9 |                  Mean: 9.0 |                  Median: 9.0
[NeMo I 2023-11-12 08:24:28 data_preprocessing:412] 75 percentile: 9.00
[NeMo I 2023-11-12 08:24:28 data_preprocessing:413] 99 percentile: 9.00


100%|██████████| 1/1 [00:00<00:00, 24.53batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149896/2277-149896-0026.flac
[]
ner tagged text The long drizzle had begun, pedestrians had turned up collars and trousers at the bottom.
Emotion Labels ['NEUTRAL']
tagged transcription The long drizzle had begun, pedestrians had turned up collars and trousers at the bottom.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149896/2277-149896-0005.flac
[]
ner tagged text Many little wrinkles gathered between his eyes as he contemplated this and his brow moistened.
Emotion Labels ['NEUTRAL']
tagged transcription Many little wrinkles gathered between his eyes as he contemplated this and his brow moistened.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149896/2277-149896-0033.flac
[{'entity_group': 'MISC', 'score': 0.7273847, 'word': 'bell', 'start': 17, 'end': 21}]
ner tagged text Then he rang the B-MISC bell E-MISC, no answer.
Emotion Labels ['SAD']
tagged transcription Then he

 NeMo-text-processing :: DEBUG    :: tokens { name: "when" } tokens { name: "hurstwood" } tokens { name: "got" } tokens { name: "back" } tokens { name: "to" } tokens { name: "his" } tokens { name: "office" } tokens { name: "again" } tokens { name: "he" } tokens { name: "was" } tokens { name: "in" } tokens { name: "a" } tokens { name: "greater" } tokens { name: "quandary" } tokens { name: "than" } tokens { name: "ever" }


Emotion Labels ['NEUTRAL']
tagged transcription Then he sat down in his chair and gazed without seeing contemplating the result of his work.
ss /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897
segments ['/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0021.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0001.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0031.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0017.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0005.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0025.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0034.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0036.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpee

100%|██████████| 1/1 [00:00<00:00, 17.97batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "could" } tokens { name: "hardly" } tokens { name: "realise" } tokens { name: "how" } tokens { name: "it" } tokens { name: "had" } tokens { name: "all" } tokens { name: "come" } tokens { name: "about" }


[NeMo I 2023-11-12 08:24:48 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:48 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:24:48 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:48 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:24:48 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:24:48 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 16.21batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "no" } tokens { name: "letter" } tokens { name: "had" } tokens { name: "come" } tokens { name: "no" } tokens { name: "word" } tokens { name: "of" } tokens { name: "any" } tokens { name: "kind" } tokens { name: "and" } tokens { name: "yet" } tokens { name: "here" } tokens { name: "it" } tokens { name: "was" } tokens { name: "late" } tokens { name: "in" } tokens { name: "the" } tokens { name: "evening" } tokens { name: "and" } tokens { name: "she" } tokens { name: "had" } tokens { name: "agreed" } tokens { name: "to" } tokens { name: "meet" } tokens { name: "him" } tokens { name: "that" } tokens { name: "morning" }


[NeMo I 2023-11-12 08:24:49 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:49 punctuation_capitalization_infer_dataset:127] Max length: 29
[NeMo I 2023-11-12 08:24:49 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:49 data_preprocessing:406] Min: 27 |                  Max: 27 |                  Mean: 27.0 |                  Median: 27.0
[NeMo I 2023-11-12 08:24:49 data_preprocessing:412] 75 percentile: 27.00
[NeMo I 2023-11-12 08:24:49 data_preprocessing:413] 99 percentile: 27.00


100%|██████████| 1/1 [00:00<00:00, 14.38batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "saw" } tokens { name: "that" } tokens { name: "in" } tokens { name: "the" } tokens { name: "excitement" } tokens { name: "of" } tokens { name: "recent" } tokens { name: "events" } tokens { name: "he" } tokens { name: "had" } tokens { name: "not" } tokens { name: "formulated" } tokens { name: "a" } tokens { name: "plan" } tokens { name: "upon" } tokens { name: "that" } tokens { name: "score" }


[NeMo I 2023-11-12 08:24:49 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:49 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:24:49 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:49 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:24:49 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:24:49 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 17.99batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "was" } tokens { name: "getting" } tokens { name: "some" } tokens { name: "vague" } tokens { name: "comfort" } tokens { name: "out" } tokens { name: "of" } tokens { name: "a" } tokens { name: "good" } tokens { name: "cigar" } tokens { name: "but" } tokens { name: "it" } tokens { name: "was" } tokens { name: "no" } tokens { name: "panacea" } tokens { name: "for" } tokens { name: "the" } tokens { name: "ill" } tokens { name: "which" } tokens { name: "affected" } tokens { name: "him" }


[NeMo I 2023-11-12 08:24:49 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:49 punctuation_capitalization_infer_dataset:127] Max length: 26
[NeMo I 2023-11-12 08:24:49 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:49 data_preprocessing:406] Min: 24 |                  Max: 24 |                  Mean: 24.0 |                  Median: 24.0
[NeMo I 2023-11-12 08:24:49 data_preprocessing:412] 75 percentile: 24.00
[NeMo I 2023-11-12 08:24:49 data_preprocessing:413] 99 percentile: 24.00


100%|██████████| 1/1 [00:00<00:00, 16.39batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "was" } tokens { name: "with" } tokens { name: "great" } tokens { name: "opposition" } tokens { name: "after" } tokens { name: "two" } tokens { name: "or" } tokens { name: "three" } tokens { name: "hours" } tokens { name: "of" } tokens { name: "the" } tokens { name: "most" } tokens { name: "urgent" } tokens { name: "mental" } tokens { name: "affirmation" } tokens { name: "and" } tokens { name: "denial" } tokens { name: "that" } tokens { name: "at" } tokens { name: "last" } tokens { name: "he" } tokens { name: "got" } tokens { name: "an" } tokens { name: "envelope" } tokens { name: "placed" } tokens { name: "in" } tokens { name: "it" } tokens { name: "the" } tokens { name: "requested" } tokens { name: "amount" } tokens { name: "and" } tokens { name: "slowly" } tokens { name: "sealed" } tokens { name: "it" } tokens { name: "up" }


[NeMo I 2023-11-12 08:24:49 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:49 punctuation_capitalization_infer_dataset:127] Max length: 40
[NeMo I 2023-11-12 08:24:49 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:49 data_preprocessing:406] Min: 38 |                  Max: 38 |                  Mean: 38.0 |                  Median: 38.0
[NeMo I 2023-11-12 08:24:49 data_preprocessing:412] 75 percentile: 38.00
[NeMo I 2023-11-12 08:24:49 data_preprocessing:413] 99 percentile: 38.00


100%|██████████| 1/1 [00:00<00:00, 13.18batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "then" } tokens { name: "he" } tokens { name: "called" } tokens { name: "harry" } tokens { name: "the" } tokens { name: "boy" } tokens { name: "of" } tokens { name: "all" } tokens { name: "work" } tokens { name: "around" } tokens { name: "the" } tokens { name: "place" }


[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:24:50 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:50 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:24:50 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:24:50 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 29.96batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "you" } tokens { name: "take" } tokens { name: "this" } tokens { name: "to" } tokens { name: "this" } tokens { name: "address" } tokens { name: "he" } tokens { name: "said" } tokens { name: "handing" } tokens { name: "him" } tokens { name: "the" } tokens { name: "envelope" } tokens { name: "and" } tokens { name: "give" } tokens { name: "it" } tokens { name: "to" } tokens { name: "missus" } tokens { name: "hurstwood" } tokens { name: "yes" } tokens { name: "sir" } tokens { name: "said" } tokens { name: "the" } tokens { name: "boy" }


[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:24:50 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:50 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:24:50 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:24:50 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 14.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "any" } tokens { name: "answer" } tokens { name: "i" } tokens { name: "guess" } tokens { name: "not" }


[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_infer_dataset:127] Max length: 7
[NeMo I 2023-11-12 08:24:50 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:50 data_preprocessing:406] Min: 5 |                  Max: 5 |                  Mean: 5.0 |                  Median: 5.0
[NeMo I 2023-11-12 08:24:50 data_preprocessing:412] 75 percentile: 5.00
[NeMo I 2023-11-12 08:24:50 data_preprocessing:413] 99 percentile: 5.00


100%|██████████| 1/1 [00:00<00:00, 11.34batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "boy" } tokens { name: "hastened" } tokens { name: "away" } tokens { name: "and" } tokens { name: "the" } tokens { name: "manager" } tokens { name: "fell" } tokens { name: "to" } tokens { name: "his" } tokens { name: "musings" }


[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_infer_dataset:127] Max length: 16
[NeMo I 2023-11-12 08:24:50 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:50 data_preprocessing:406] Min: 14 |                  Max: 14 |                  Mean: 14.0 |                  Median: 14.0
[NeMo I 2023-11-12 08:24:50 data_preprocessing:412] 75 percentile: 14.00
[NeMo I 2023-11-12 08:24:50 data_preprocessing:413] 99 percentile: 14.00


100%|██████████| 1/1 [00:00<00:00, 19.35batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "was" } tokens { name: "beaten" } tokens { name: "for" } tokens { name: "to" } tokens { name: "night" } tokens { name: "and" } tokens { name: "he" } tokens { name: "might" } tokens { name: "just" } tokens { name: "as" } tokens { name: "well" } tokens { name: "make" } tokens { name: "the" } tokens { name: "best" } tokens { name: "of" } tokens { name: "it" }


[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:24:50 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:50 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:24:50 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:24:50 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 19.11batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "would" } tokens { name: "take" } tokens { name: "the" } tokens { name: "envelope" } tokens { name: "and" } tokens { name: "know" } tokens { name: "that" } tokens { name: "she" } tokens { name: "had" } tokens { name: "triumphed" }


[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:50 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:24:50 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:50 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:24:50 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:24:50 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 21.74batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "if" } tokens { name: "he" } tokens { name: "only" } tokens { name: "had" } tokens { name: "that" } tokens { name: "letter" } tokens { name: "back" } tokens { name: "he" } tokens { name: "wouldn't" } tokens { name: "send" } tokens { name: "it" }


[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:51 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:51 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:51 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:51 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 19.89batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "for" } tokens { name: "relief" } tokens { name: "he" } tokens { name: "arose" } tokens { name: "and" } tokens { name: "joined" } tokens { name: "in" } tokens { name: "conversation" } tokens { name: "with" } tokens { name: "a" } tokens { name: "few" } tokens { name: "friends" } tokens { name: "who" } tokens { name: "were" } tokens { name: "drinking" }


[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:24:51 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:51 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:24:51 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:24:51 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 17.18batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "all" } tokens { name: "the" } tokens { name: "time" } tokens { name: "his" } tokens { name: "thoughts" } tokens { name: "would" } tokens { name: "run" } tokens { name: "out" } tokens { name: "to" } tokens { name: "his" } tokens { name: "home" } tokens { name: "and" } tokens { name: "see" } tokens { name: "the" } tokens { name: "scene" } tokens { name: "being" } tokens { name: "therein" } tokens { name: "enacted" }


[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:24:51 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:51 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:24:51 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:24:51 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 18.02batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "in" } tokens { name: "about" } tokens { name: "an" } tokens { name: "hour" } tokens { name: "and" } tokens { name: "three" } tokens { name: "quarters" } tokens { name: "the" } tokens { name: "boy" } tokens { name: "returned" }


[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:24:51 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:51 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:24:51 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:24:51 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 23.61batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "fancied" } tokens { name: "as" } tokens { name: "he" } tokens { name: "sat" } tokens { name: "at" } tokens { name: "his" } tokens { name: "desk" } tokens { name: "that" } tokens { name: "nothing" } tokens { name: "would" } tokens { name: "be" } tokens { name: "done" } tokens { name: "for" } tokens { name: "a" } tokens { name: "week" } tokens { name: "or" } tokens { name: "two" } tokens { name: "meanwhile" } tokens { name: "he" } tokens { name: "would" } tokens { name: "have" } tokens { name: "time" } tokens { name: "to" } tokens { name: "think" }


[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_infer_dataset:127] Max length: 29
[NeMo I 2023-11-12 08:24:51 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:51 data_preprocessing:406] Min: 27 |                  Max: 27 |                  Mean: 27.0 |                  Median: 27.0
[NeMo I 2023-11-12 08:24:51 data_preprocessing:412] 75 percentile: 27.00
[NeMo I 2023-11-12 08:24:51 data_preprocessing:413] 99 percentile: 27.00


100%|██████████| 1/1 [00:00<00:00, 16.84batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "how" } tokens { name: "about" } tokens { name: "that" } tokens { name: "now" } tokens { name: "his" } tokens { name: "pain" } tokens { name: "at" } tokens { name: "her" } tokens { name: "failure" } tokens { name: "to" } tokens { name: "meet" } tokens { name: "or" } tokens { name: "write" } tokens { name: "him" } tokens { name: "rapidly" } tokens { name: "increased" } tokens { name: "as" } tokens { name: "he" } tokens { name: "devoted" } tokens { name: "himself" } tokens { name: "to" } tokens { name: "this" } tokens { name: "subject" }


[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:51 punctuation_capitalization_infer_dataset:127] Max length: 25
[NeMo I 2023-11-12 08:24:51 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:51 data_preprocessing:406] Min: 23 |                  Max: 23 |                  Mean: 23.0 |                  Median: 23.0
[NeMo I 2023-11-12 08:24:51 data_preprocessing:412] 75 percentile: 23.00
[NeMo I 2023-11-12 08:24:51 data_preprocessing:413] 99 percentile: 23.00


100%|██████████| 1/1 [00:00<00:00, 15.95batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "decided" } tokens { name: "to" } tokens { name: "write" } tokens { name: "her" } tokens { name: "care" } tokens { name: "of" } tokens { name: "the" } tokens { name: "west" } tokens { name: "side" } tokens { name: "post" } tokens { name: "office" } tokens { name: "and" } tokens { name: "ask" } tokens { name: "for" } tokens { name: "an" } tokens { name: "explanation" } tokens { name: "as" } tokens { name: "well" } tokens { name: "as" } tokens { name: "to" } tokens { name: "have" } tokens { name: "her" } tokens { name: "meet" } tokens { name: "him" }


[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:24:52 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:52 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:24:52 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:24:52 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 17.56batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "three" } tokens { name: "o'clock" } tokens { name: "came" } tokens { name: "four" } tokens { name: "five" } tokens { name: "six" } tokens { name: "and" } tokens { name: "no" } tokens { name: "letter" }


[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:24:52 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:52 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:24:52 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:24:52 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 22.63batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "helpless" } tokens { name: "manager" } tokens { name: "paced" } tokens { name: "the" } tokens { name: "floor" } tokens { name: "and" } tokens { name: "grimly" } tokens { name: "endured" } tokens { name: "the" } tokens { name: "gloom" } tokens { name: "of" } tokens { name: "defeat" }


[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:52 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:52 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:52 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:52 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 16.43batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "saw" } tokens { name: "a" } tokens { name: "busy" } tokens { name: "saturday" } tokens { name: "ushered" } tokens { name: "out" } tokens { name: "the" } tokens { name: "sabbath" } tokens { name: "in" } tokens { name: "and" } tokens { name: "nothing" } tokens { name: "done" }


[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:24:52 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:52 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:24:52 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:24:52 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 22.29batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "all" } tokens { name: "day" } tokens { name: "the" } tokens { name: "bar" } tokens { name: "being" } tokens { name: "closed" } tokens { name: "he" } tokens { name: "brooded" } tokens { name: "alone" } tokens { name: "shut" } tokens { name: "out" } tokens { name: "from" } tokens { name: "home" } tokens { name: "from" } tokens { name: "the" } tokens { name: "excitement" } tokens { name: "of" } tokens { name: "his" } tokens { name: "resort" } tokens { name: "from" } tokens { name: "carrie" } tokens { name: "and" } tokens { name: "without" } tokens { name: "the" } tokens { name: "ability" } tokens { name: "to" } tokens { name: "alter" } tokens { name: "his" } tokens { name: "condition" } tokens { name: "one" } tokens { name: "iota" }


[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:52 punctuation_capitalization_infer_dataset:127] Max length: 35
[NeMo I 2023-11-12 08:24:52 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:52 data_preprocessing:406] Min: 33 |                  Max: 33 |                  Mean: 33.0 |                  Median: 33.0
[NeMo I 2023-11-12 08:24:52 data_preprocessing:412] 75 percentile: 33.00
[NeMo I 2023-11-12 08:24:52 data_preprocessing:413] 99 percentile: 33.00


100%|██████████| 1/1 [00:00<00:00, 14.47batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "was" } tokens { name: "the" } tokens { name: "worst" } tokens { name: "sunday" } tokens { name: "he" } tokens { name: "had" } tokens { name: "spent" } tokens { name: "in" } tokens { name: "his" } tokens { name: "life" }


[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:24:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:53 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:24:53 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:24:53 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 21.04batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "seemed" } tokens { name: "as" } tokens { name: "if" } tokens { name: "his" } tokens { name: "family" } tokens { name: "troubles" } tokens { name: "were" } tokens { name: "just" } tokens { name: "beginning" }


[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:24:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:53 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:24:53 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:24:53 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 15.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "was" } tokens { name: "quite" } tokens { name: "certain" } tokens { name: "now" } tokens { name: "that" } tokens { name: "she" } tokens { name: "knew" } tokens { name: "he" } tokens { name: "was" } tokens { name: "married" } tokens { name: "and" } tokens { name: "was" } tokens { name: "angered" } tokens { name: "at" } tokens { name: "his" } tokens { name: "perfidy" }


[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:24:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:53 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:24:53 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:24:53 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00,  9.26batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "had" } tokens { name: "loved" } tokens { name: "her" } tokens { name: "earnestly" } tokens { name: "enough" } tokens { name: "but" } tokens { name: "now" } tokens { name: "that" } tokens { name: "the" } tokens { name: "possibility" } tokens { name: "of" } tokens { name: "losing" } tokens { name: "her" } tokens { name: "stared" } tokens { name: "him" } tokens { name: "in" } tokens { name: "the" } tokens { name: "face" } tokens { name: "she" } tokens { name: "seemed" } tokens { name: "much" } tokens { name: "more" } tokens { name: "attractive" }


[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:24:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:53 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:24:53 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:24:53 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 12.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "would" } tokens { name: "go" } tokens { name: "to" } tokens { name: "her" } tokens { name: "and" } tokens { name: "tell" } tokens { name: "her" } tokens { name: "all" } tokens { name: "his" } tokens { name: "family" } tokens { name: "complications" }


[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:53 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:24:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:53 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:24:53 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:24:53 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 12.60batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "would" } tokens { name: "explain" } tokens { name: "to" } tokens { name: "her" } tokens { name: "just" } tokens { name: "where" } tokens { name: "he" } tokens { name: "stood" } tokens { name: "and" } tokens { name: "how" } tokens { name: "much" } tokens { name: "he" } tokens { name: "needed" } tokens { name: "her" }


[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:24:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:54 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:24:54 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:24:54 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 17.73batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "did" } tokens { name: "manage" } tokens { name: "to" } tokens { name: "bring" } tokens { name: "himself" } tokens { name: "into" } tokens { name: "the" } tokens { name: "mood" } tokens { name: "to" } tokens { name: "go" } tokens { name: "out" } tokens { name: "to" } tokens { name: "carrie" } tokens { name: "but" } tokens { name: "when" } tokens { name: "he" } tokens { name: "got" } tokens { name: "in" } tokens { name: "ogden" } tokens { name: "place" } tokens { name: "he" } tokens { name: "thought" } tokens { name: "he" } tokens { name: "saw" } tokens { name: "a" } tokens { name: "man" } tokens { name: "watching" } tokens { name: "him" } tokens { name: "and" } tokens { name: "went" } tokens { name: "away" }


[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_infer_dataset:127] Max length: 34
[NeMo I 2023-11-12 08:24:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:54 data_preprocessing:406] Min: 32 |                  Max: 32 |                  Mean: 32.0 |                  Median: 32.0
[NeMo I 2023-11-12 08:24:54 data_preprocessing:412] 75 percentile: 32.00
[NeMo I 2023-11-12 08:24:54 data_preprocessing:413] 99 percentile: 32.00


100%|██████████| 1/1 [00:00<00:00, 15.71batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "did" } tokens { name: "not" } tokens { name: "go" } tokens { name: "within" } tokens { name: "a" } tokens { name: "block" } tokens { name: "of" } tokens { name: "the" } tokens { name: "house" }


[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:24:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:54 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:24:54 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:24:54 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 30.85batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "troubled" } tokens { name: "over" } tokens { name: "many" } tokens { name: "little" } tokens { name: "details" } tokens { name: "and" } tokens { name: "talked" } tokens { name: "perfunctorily" } tokens { name: "to" } tokens { name: "everybody" }


[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:24:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:54 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:24:54 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:24:54 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 16.54batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "stayed" } tokens { name: "at" } tokens { name: "his" } tokens { name: "desk" } tokens { name: "long" } tokens { name: "after" } tokens { name: "all" } tokens { name: "others" } tokens { name: "had" } tokens { name: "gone" } tokens { name: "and" } tokens { name: "only" } tokens { name: "quitted" } tokens { name: "it" } tokens { name: "when" } tokens { name: "the" } tokens { name: "night" } tokens { name: "watchman" } tokens { name: "on" } tokens { name: "his" } tokens { name: "round" } tokens { name: "pulled" } tokens { name: "at" } tokens { name: "the" } tokens { name: "front" } tokens { name: "door" } tokens { name: "to" } tokens { name: "see" } tokens { name: "if" } tokens { name: "it" } tokens { name: "was" } tokens { name: "safely" } tokens { name: "locked" }


[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:54 punctuation_capitalization_infer_dataset:127] Max length: 38
[NeMo I 2023-11-12 08:24:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:54 data_preprocessing:406] Min: 36 |                  Max: 36 |                  Mean: 36.0 |                  Median: 36.0
[NeMo I 2023-11-12 08:24:54 data_preprocessing:412] 75 percentile: 36.00
[NeMo I 2023-11-12 08:24:54 data_preprocessing:413] 99 percentile: 36.00


100%|██████████| 1/1 [00:00<00:00, 14.45batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "on" } tokens { name: "wednesday" } tokens { name: "he" } tokens { name: "received" } tokens { name: "another" } tokens { name: "polite" } tokens { name: "note" } tokens { name: "from" } tokens { name: "mc" } tokens { name: "gregor" } tokens { name: "james" } tokens { name: "and" } tokens { name: "hay" } tokens { name: "it" } tokens { name: "read" }


[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:24:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:55 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:24:55 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:24:55 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 22.03batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "dear" } tokens { name: "sir" } tokens { name: "we" } tokens { name: "beg" } tokens { name: "to" } tokens { name: "inform" } tokens { name: "you" } tokens { name: "that" } tokens { name: "we" } tokens { name: "are" } tokens { name: "instructed" } tokens { name: "to" } tokens { name: "wait" } tokens { name: "until" } tokens { name: "to" } tokens { name: "morrow" } tokens { name: "thursday" } tokens { name: "at" } tokens { name: "one" } tokens { name: "o'clock" } tokens { name: "before" } tokens { name: "filing" } tokens { name: "suit" } tokens { name: "against" } tokens { name: "you" } tokens { name: "on" } tokens { name: "behalf" } tokens { name: "of" } tokens { name: "missus" } tokens { name: "julia" } tokens { name: "hurstwood" } tokens { name: "for" } tokens { name: "divorce" } tokens { name: "and" } tokens { name: "alimony" }


[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_infer_dataset:127] Max length: 42
[NeMo I 2023-11-12 08:24:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:55 data_preprocessing:406] Min: 40 |                  Max: 40 |                  Mean: 40.0 |                  Median: 40.0
[NeMo I 2023-11-12 08:24:55 data_preprocessing:412] 75 percentile: 40.00
[NeMo I 2023-11-12 08:24:55 data_preprocessing:413] 99 percentile: 40.00


100%|██████████| 1/1 [00:00<00:00, 16.58batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "very" } tokens { name: "truly" } tokens { name: "yours" } tokens { name: "et" } tokens { name: "cetera" } tokens { name: "compromise" }


[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:24:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:55 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:24:55 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:24:55 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 36.69batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "so" } tokens { name: "here" } tokens { name: "it" } tokens { name: "was" } tokens { name: "spread" } tokens { name: "out" } tokens { name: "clear" } tokens { name: "before" } tokens { name: "him" } tokens { name: "and" } tokens { name: "now" } tokens { name: "he" } tokens { name: "knew" } tokens { name: "what" } tokens { name: "to" } tokens { name: "expect" }


[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:24:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:55 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:24:55 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:24:55 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 13.09batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "if" } tokens { name: "he" } tokens { name: "didn't" } tokens { name: "go" } tokens { name: "and" } tokens { name: "see" } tokens { name: "them" } tokens { name: "they" } tokens { name: "would" } tokens { name: "sue" } tokens { name: "him" } tokens { name: "promptly" }


[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:24:55 punctuation_capitalization_infer_dataset:127] Max length: 16
[NeMo I 2023-11-12 08:24:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:24:55 data_preprocessing:406] Min: 14 |                  Max: 14 |                  Mean: 14.0 |                  Median: 14.0
[NeMo I 2023-11-12 08:24:55 data_preprocessing:412] 75 percentile: 14.00
[NeMo I 2023-11-12 08:24:55 data_preprocessing:413] 99 percentile: 14.00


100%|██████████| 1/1 [00:00<00:00, 13.74batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0021.flac
[{'entity_group': 'MISC', 'score': 0.85783535, 'word': 'Saturday', 'start': 14, 'end': 22}]
ner tagged text He saw a busy B-MISC Saturday E-MISC ushered out the sabbath in and nothing done.
Emotion Labels ['HAPPY']
tagged transcription He saw a busy B-MISC Saturday E-MISC ushered out the sabbath in and nothing done.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0001.flac
[]
ner tagged text He could hardly realise how it had all come about.
Emotion Labels ['HAPPY']
tagged transcription He could hardly realise how it had all come about.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149897/2277-149897-0031.flac
[]
ner tagged text He troubled over many little details and talked perfunctorily to everybody.
Emotion Labels ['HAPPY']
tagged transcription He troubled over many little details and talked perfunctorily to everybody.
Old 

 NeMo-text-processing :: DEBUG    :: tokens { name: "minnie's" } tokens { name: "flat" } tokens { name: "as" } tokens { name: "the" } tokens { name: "one" } tokens { name: "floor" } tokens { name: "resident" } tokens { name: "apartments" } tokens { name: "were" } tokens { name: "then" } tokens { name: "being" } tokens { name: "called" } tokens { name: "was" } tokens { name: "in" } tokens { name: "a" } tokens { name: "part" } tokens { name: "of" } tokens { name: "west" } tokens { name: "van" } tokens { name: "buren" } tokens { name: "street" } tokens { name: "inhabited" } tokens { name: "by" } tokens { name: "families" } tokens { name: "of" } tokens { name: "labourers" } tokens { name: "and" } tokens { name: "clerks" } tokens { name: "men" } tokens { name: "who" } tokens { name: "had" } tokens { name: "come" } tokens { name: "and" } tokens { name: "were" } tokens { name: "still" } tokens { name: "coming" } tokens { name: "with" } tokens { name: "the" } tokens { name: "rush" } tokens { n

[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_infer_dataset:127] Max length: 55
[NeMo I 2023-11-12 08:25:20 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:20 data_preprocessing:406] Min: 53 |                  Max: 53 |                  Mean: 53.0 |                  Median: 53.0
[NeMo I 2023-11-12 08:25:20 data_preprocessing:412] 75 percentile: 53.00
[NeMo I 2023-11-12 08:25:20 data_preprocessing:413] 99 percentile: 53.00


100%|██████████| 1/1 [00:00<00:00, 15.48batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "to" } tokens { name: "carrie" } tokens { name: "the" } tokens { name: "sound" } tokens { name: "of" } tokens { name: "the" } tokens { name: "little" } tokens { name: "bells" } tokens { name: "upon" } tokens { name: "the" } tokens { name: "horse" } tokens { name: "cars" } tokens { name: "as" } tokens { name: "they" } tokens { name: "tinkled" } tokens { name: "in" } tokens { name: "and" } tokens { name: "out" } tokens { name: "of" } tokens { name: "hearing" } tokens { name: "was" } tokens { name: "as" } tokens { name: "pleasing" } tokens { name: "as" } tokens { name: "it" } tokens { name: "was" } tokens { name: "novel" }


[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_infer_dataset:127] Max length: 30
[NeMo I 2023-11-12 08:25:20 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:20 data_preprocessing:406] Min: 28 |                  Max: 28 |                  Mean: 28.0 |                  Median: 28.0
[NeMo I 2023-11-12 08:25:20 data_preprocessing:412] 75 percentile: 28.00
[NeMo I 2023-11-12 08:25:20 data_preprocessing:413] 99 percentile: 28.00


100%|██████████| 1/1 [00:00<00:00, 17.66batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "to" } tokens { name: "him" } tokens { name: "the" } tokens { name: "presence" } tokens { name: "or" } tokens { name: "absence" } tokens { name: "of" } tokens { name: "his" } tokens { name: "wife's" } tokens { name: "sister" } tokens { name: "was" } tokens { name: "a" } tokens { name: "matter" } tokens { name: "of" } tokens { name: "indifference" }


[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:25:20 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:20 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:25:20 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:25:20 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 21.09batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "was" } tokens { name: "of" } tokens { name: "a" } tokens { name: "clean" } tokens { name: "saving" } tokens { name: "disposition" } tokens { name: "and" } tokens { name: "had" } tokens { name: "already" } tokens { name: "paid" } tokens { name: "a" } tokens { name: "number" } tokens { name: "of" } tokens { name: "monthly" } tokens { name: "instalments" } tokens { name: "on" } tokens { name: "two" } tokens { name: "lots" } tokens { name: "far" } tokens { name: "out" } tokens { name: "on" } tokens { name: "the" } tokens { name: "west" } tokens { name: "side" }


[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_infer_dataset:127] Max length: 29
[NeMo I 2023-11-12 08:25:20 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:20 data_preprocessing:406] Min: 27 |                  Max: 27 |                  Mean: 27.0 |                  Median: 27.0
[NeMo I 2023-11-12 08:25:20 data_preprocessing:412] 75 percentile: 27.00
[NeMo I 2023-11-12 08:25:20 data_preprocessing:413] 99 percentile: 27.00


100%|██████████| 1/1 [00:00<00:00, 16.43batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "his" } tokens { name: "ambition" } tokens { name: "was" } tokens { name: "some" } tokens { name: "day" } tokens { name: "to" } tokens { name: "build" } tokens { name: "a" } tokens { name: "house" } tokens { name: "on" } tokens { name: "them" }


[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:20 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:25:20 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:20 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:25:20 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:25:20 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 23.20batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "had" } tokens { name: "some" } tokens { name: "slight" } tokens { name: "gift" } tokens { name: "of" } tokens { name: "observation" } tokens { name: "and" } tokens { name: "that" } tokens { name: "sense" } tokens { name: "so" } tokens { name: "rich" } tokens { name: "in" } tokens { name: "every" } tokens { name: "woman" } tokens { name: "intuition" }


[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:25:21 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:21 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:25:21 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:25:21 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 16.20batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "walls" } tokens { name: "of" } tokens { name: "the" } tokens { name: "rooms" } tokens { name: "were" } tokens { name: "discordantly" } tokens { name: "papered" }


[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:25:21 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:21 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:25:21 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:25:21 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 22.50batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "floors" } tokens { name: "were" } tokens { name: "covered" } tokens { name: "with" } tokens { name: "matting" } tokens { name: "and" } tokens { name: "the" } tokens { name: "hall" } tokens { name: "laid" } tokens { name: "with" } tokens { name: "a" } tokens { name: "thin" } tokens { name: "rag" } tokens { name: "carpet" }


[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:25:21 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:21 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:25:21 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:25:21 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 14.96batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "then" } tokens { name: "she" } tokens { name: "walked" } tokens { name: "and" } tokens { name: "sang" } tokens { name: "to" } tokens { name: "it" } tokens { name: "until" } tokens { name: "hanson" } tokens { name: "disturbed" } tokens { name: "in" } tokens { name: "his" } tokens { name: "reading" } tokens { name: "came" } tokens { name: "and" } tokens { name: "took" } tokens { name: "it" }


[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:25:21 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:21 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:25:21 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:25:21 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 17.83batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "one" } tokens { name: "could" } tokens { name: "see" } tokens { name: "that" } tokens { name: "he" } tokens { name: "was" } tokens { name: "very" } tokens { name: "much" } tokens { name: "wrapped" } tokens { name: "up" } tokens { name: "in" } tokens { name: "his" } tokens { name: "offspring" }


[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:21 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:25:21 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:21 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:25:21 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:25:21 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 19.28batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "now" } tokens { name: "now" } tokens { name: "he" } tokens { name: "said" } tokens { name: "walking" } tokens { name: "there" } tokens { name: "there" } tokens { name: "and" } tokens { name: "there" } tokens { name: "was" } tokens { name: "a" } tokens { name: "certain" } tokens { name: "swedish" } tokens { name: "accent" } tokens { name: "noticeable" } tokens { name: "in" } tokens { name: "his" } tokens { name: "voice" }


[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:25:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:22 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:25:22 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:25:22 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 14.73batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "seemed" } tokens { name: "to" } tokens { name: "be" } tokens { name: "thinking" } tokens { name: "of" } tokens { name: "something" } tokens { name: "else" }


[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_infer_dataset:127] Max length: 10
[NeMo I 2023-11-12 08:25:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:22 data_preprocessing:406] Min: 8 |                  Max: 8 |                  Mean: 8.0 |                  Median: 8.0
[NeMo I 2023-11-12 08:25:22 data_preprocessing:412] 75 percentile: 8.00
[NeMo I 2023-11-12 08:25:22 data_preprocessing:413] 99 percentile: 8.00


100%|██████████| 1/1 [00:00<00:00, 26.17batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "minnie" } tokens { name: "began" } tokens { name: "to" } tokens { name: "explain" } tokens { name: "but" } tokens { name: "her" } tokens { name: "husband" } tokens { name: "took" } tokens { name: "this" } tokens { name: "part" } tokens { name: "of" } tokens { name: "the" } tokens { name: "conversation" } tokens { name: "to" } tokens { name: "himself" }


[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:25:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:22 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:25:22 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:25:22 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 16.60batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "you" } tokens { name: "could" } tokens { name: "get" } tokens { name: "home" } tokens { name: "easy" } tokens { name: "too" } tokens { name: "it" } tokens { name: "isn't" } tokens { name: "very" } tokens { name: "far" }


[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:25:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:22 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:25:22 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:25:22 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 19.54batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "asked" } tokens { name: "minnie" } tokens { name: "for" } tokens { name: "ink" } tokens { name: "and" } tokens { name: "paper" } tokens { name: "which" } tokens { name: "were" } tokens { name: "upon" } tokens { name: "the" } tokens { name: "mantel" } tokens { name: "in" } tokens { name: "the" } tokens { name: "dining" } tokens { name: "room" } tokens { name: "and" } tokens { name: "when" } tokens { name: "the" } tokens { name: "latter" } tokens { name: "had" } tokens { name: "gone" } tokens { name: "to" } tokens { name: "bed" } tokens { name: "at" } tokens { name: "ten" } tokens { name: "got" } tokens { name: "out" } tokens { name: "drouet's" } tokens { name: "card" } tokens { name: "and" } tokens { name: "wrote" } tokens { name: "him" }


[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_infer_dataset:127] Max length: 40
[NeMo I 2023-11-12 08:25:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:22 data_preprocessing:406] Min: 38 |                  Max: 38 |                  Mean: 38.0 |                  Median: 38.0
[NeMo I 2023-11-12 08:25:22 data_preprocessing:412] 75 percentile: 38.00
[NeMo I 2023-11-12 08:25:22 data_preprocessing:413] 99 percentile: 38.00


100%|██████████| 1/1 [00:00<00:00, 10.85batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "wanted" } tokens { name: "to" } tokens { name: "make" } tokens { name: "some" } tokens { name: "reference" } tokens { name: "to" } tokens { name: "their" } tokens { name: "relations" } tokens { name: "upon" } tokens { name: "the" } tokens { name: "train" } tokens { name: "but" } tokens { name: "was" } tokens { name: "too" } tokens { name: "timid" }


[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:22 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:25:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:22 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:25:22 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:25:22 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 17.97batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "anything" } tokens { name: "was" } tokens { name: "good" } tokens { name: "enough" } tokens { name: "so" } tokens { name: "long" } tokens { name: "as" } tokens { name: "it" } tokens { name: "paid" } tokens { name: "say" } tokens { name: "five" } tokens { name: "dollars" } tokens { name: "a" } tokens { name: "week" } tokens { name: "to" } tokens { name: "begin" } tokens { name: "with" }


[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:25:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:23 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:25:23 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:25:23 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 18.52batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "a" } tokens { name: "shop" } tokens { name: "girl" } tokens { name: "was" } tokens { name: "the" } tokens { name: "destiny" } tokens { name: "prefigured" } tokens { name: "for" } tokens { name: "the" } tokens { name: "newcomer" }


[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:25:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:23 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:25:23 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:25:23 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 22.92batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "was" } tokens { name: "under" } tokens { name: "such" } tokens { name: "auspicious" } tokens { name: "circumstances" } tokens { name: "that" } tokens { name: "she" } tokens { name: "started" } tokens { name: "out" } tokens { name: "this" } tokens { name: "morning" } tokens { name: "to" } tokens { name: "look" } tokens { name: "for" } tokens { name: "work" }


[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:25:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:23 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:25:23 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:25:23 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 17.01batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "narrow" } tokens { name: "board" } tokens { name: "walks" } tokens { name: "extended" } tokens { name: "out" } tokens { name: "passing" } tokens { name: "here" } tokens { name: "a" } tokens { name: "house" } tokens { name: "and" } tokens { name: "there" } tokens { name: "a" } tokens { name: "store" } tokens { name: "at" } tokens { name: "far" } tokens { name: "intervals" } tokens { name: "eventually" } tokens { name: "ending" } tokens { name: "on" } tokens { name: "the" } tokens { name: "open" } tokens { name: "prairie" }


[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_infer_dataset:127] Max length: 24
[NeMo I 2023-11-12 08:25:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:23 data_preprocessing:406] Min: 22 |                  Max: 22 |                  Mean: 22.0 |                  Median: 22.0
[NeMo I 2023-11-12 08:25:23 data_preprocessing:412] 75 percentile: 22.00
[NeMo I 2023-11-12 08:25:23 data_preprocessing:413] 99 percentile: 22.00


100%|██████████| 1/1 [00:00<00:00, 13.32batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "gave" } tokens { name: "an" } tokens { name: "imposing" } tokens { name: "appearance" } tokens { name: "to" } tokens { name: "most" } tokens { name: "of" } tokens { name: "the" } tokens { name: "wholesale" } tokens { name: "houses" } tokens { name: "whose" } tokens { name: "offices" } tokens { name: "were" } tokens { name: "upon" } tokens { name: "the" } tokens { name: "ground" } tokens { name: "floor" } tokens { name: "and" } tokens { name: "in" } tokens { name: "plain" } tokens { name: "view" } tokens { name: "of" } tokens { name: "the" } tokens { name: "street" }


[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:25:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:23 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:25:23 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:25:23 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 15.54batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "these" } tokens { name: "vast" } tokens { name: "buildings" } tokens { name: "what" } tokens { name: "were" } tokens { name: "they" }


[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:23 punctuation_capitalization_infer_dataset:127] Max length: 8
[NeMo I 2023-11-12 08:25:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:23 data_preprocessing:406] Min: 6 |                  Max: 6 |                  Mean: 6.0 |                  Median: 6.0
[NeMo I 2023-11-12 08:25:23 data_preprocessing:412] 75 percentile: 6.00
[NeMo I 2023-11-12 08:25:23 data_preprocessing:413] 99 percentile: 6.00


100%|██████████| 1/1 [00:00<00:00, 22.28batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2277/149874/2277-149874-0000.flac
[{'entity_group': 'PER', 'score': 0.8913538, 'word': 'Min', 'start': 0, 'end': 3}, {'entity_group': 'LOC', 'score': 0.9983326, 'word': 'West Van Buren Street', 'start': 93, 'end': 114}]
ner tagged text B-PER Min E-PERnie's flat, as the one floor resident apartments were then being called, was in a part of B-LOC West Van Buren Street E-LOC, inhabited by families of labourers and clerks, men who had come and were still coming with the rush of population pouring in at the rate of fifty thousand a year.
Emotion Labels ['DISGUST']
tagged transcription B-PER Min E-PERnie's flat, as the one floor resident apartments were then being called, was in a part of B-LOC West Van Buren Street E-LOC, inhabited by families of labourers and clerks, men who had come and were still coming with the rush of population pouring in at the rate of fifty thousand a year.
Old path /n/disk1/audio_datasets/EN_libre/Libr

 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "was" } tokens { name: "four" } tokens { name: "years" } tokens { name: "older" } tokens { name: "than" } tokens { name: "i" } tokens { name: "to" } tokens { name: "be" } tokens { name: "sure" } tokens { name: "and" } tokens { name: "had" } tokens { name: "seen" } tokens { name: "more" } tokens { name: "of" } tokens { name: "the" } tokens { name: "world" } tokens { name: "but" } tokens { name: "i" } tokens { name: "was" } tokens { name: "a" } tokens { name: "boy" } tokens { name: "and" } tokens { name: "she" } tokens { name: "was" } tokens { name: "a" } tokens { name: "girl" } tokens { name: "and" } tokens { name: "i" } tokens { name: "resented" } tokens { name: "her" } tokens { name: "protecting" } tokens { name: "manner" }


Emotion Labels ['HAPPY']
tagged transcription These vast buildings, what were they?
ss /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960
segments ['/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0013.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0008.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0014.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0011.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0000.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0010.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0005.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960.trans.txt', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0001.flac', '/n/disk

100%|██████████| 1/1 [00:00<00:00, 17.83batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "this" } tokens { name: "change" } tokens { name: "came" } tokens { name: "about" } tokens { name: "from" } tokens { name: "an" } tokens { name: "adventure" } tokens { name: "we" } tokens { name: "had" } tokens { name: "together" }


[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:25:39 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:39 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:25:39 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:25:39 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 22.42batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "one" } tokens { name: "day" } tokens { name: "when" } tokens { name: "i" } tokens { name: "rode" } tokens { name: "over" } tokens { name: "to" } tokens { name: "the" } tokens { name: "shimerdas" } tokens { name: "i" } tokens { name: "found" } tokens { name: "antonia" } tokens { name: "starting" } tokens { name: "off" } tokens { name: "on" } tokens { name: "foot" } tokens { name: "for" } tokens { name: "russian" } tokens { name: "peter's" } tokens { name: "house" } tokens { name: "to" } tokens { name: "borrow" } tokens { name: "a" } tokens { name: "spade" } tokens { name: "ambrosch" } tokens { name: "needed" }


[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_infer_dataset:127] Max length: 34
[NeMo I 2023-11-12 08:25:39 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:39 data_preprocessing:406] Min: 32 |                  Max: 32 |                  Mean: 32.0 |                  Median: 32.0
[NeMo I 2023-11-12 08:25:39 data_preprocessing:412] 75 percentile: 32.00
[NeMo I 2023-11-12 08:25:39 data_preprocessing:413] 99 percentile: 32.00


100%|██████████| 1/1 [00:00<00:00, 17.01batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "there" } tokens { name: "had" } tokens { name: "been" } tokens { name: "another" } tokens { name: "black" } tokens { name: "frost" } tokens { name: "the" } tokens { name: "night" } tokens { name: "before" } tokens { name: "and" } tokens { name: "the" } tokens { name: "air" } tokens { name: "was" } tokens { name: "clear" } tokens { name: "and" } tokens { name: "heady" } tokens { name: "as" } tokens { name: "wine" }


[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:25:39 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:39 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:25:39 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:25:39 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 17.01batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "was" } tokens { name: "on" } tokens { name: "one" } tokens { name: "of" } tokens { name: "these" } tokens { name: "gravel" } tokens { name: "beds" } tokens { name: "that" } tokens { name: "i" } tokens { name: "met" } tokens { name: "my" } tokens { name: "adventure" }


[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:25:39 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:39 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:25:39 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:25:39 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 14.84batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "whirled" } tokens { name: "round" } tokens { name: "and" } tokens { name: "there" } tokens { name: "on" } tokens { name: "one" } tokens { name: "of" } tokens { name: "those" } tokens { name: "dry" } tokens { name: "gravel" } tokens { name: "beds" } tokens { name: "was" } tokens { name: "the" } tokens { name: "biggest" } tokens { name: "snake" } tokens { name: "i" } tokens { name: "had" } tokens { name: "ever" } tokens { name: "seen" }


[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:39 punctuation_capitalization_infer_dataset:127] Max length: 22
[NeMo I 2023-11-12 08:25:39 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:39 data_preprocessing:406] Min: 20 |                  Max: 20 |                  Mean: 20.0 |                  Median: 20.0
[NeMo I 2023-11-12 08:25:39 data_preprocessing:412] 75 percentile: 20.00
[NeMo I 2023-11-12 08:25:39 data_preprocessing:413] 99 percentile: 20.00


100%|██████████| 1/1 [00:00<00:00, 15.23batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "know" } tokens { name: "i" } tokens { name: "am" } tokens { name: "just" } tokens { name: "awful" } tokens { name: "jim" } tokens { name: "i" } tokens { name: "was" } tokens { name: "so" } tokens { name: "scared" }


[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:25:40 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:40 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:25:40 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:25:40 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 12.01batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "never" } tokens { name: "know" } tokens { name: "you" } tokens { name: "was" } tokens { name: "so" } tokens { name: "brave" } tokens { name: "jim" } tokens { name: "she" } tokens { name: "went" } tokens { name: "on" } tokens { name: "comfortingly" }


[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:25:40 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:40 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:25:40 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:25:40 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 20.51batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "a" } tokens { name: "faint" } tokens { name: "fetid" } tokens { name: "smell" } tokens { name: "came" } tokens { name: "from" } tokens { name: "him" } tokens { name: "and" } tokens { name: "a" } tokens { name: "thread" } tokens { name: "of" } tokens { name: "green" } tokens { name: "liquid" } tokens { name: "oozed" } tokens { name: "from" } tokens { name: "his" } tokens { name: "crushed" } tokens { name: "head" }


[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_infer_dataset:127] Max length: 24
[NeMo I 2023-11-12 08:25:40 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:40 data_preprocessing:406] Min: 22 |                  Max: 22 |                  Mean: 22.0 |                  Median: 22.0
[NeMo I 2023-11-12 08:25:40 data_preprocessing:412] 75 percentile: 22.00
[NeMo I 2023-11-12 08:25:40 data_preprocessing:413] 99 percentile: 22.00


100%|██████████| 1/1 [00:00<00:00, 12.41batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "look" } tokens { name: "tony" } tokens { name: "that's" } tokens { name: "his" } tokens { name: "poison" } tokens { name: "i" } tokens { name: "said" }


[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_infer_dataset:127] Max length: 11
[NeMo I 2023-11-12 08:25:40 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:40 data_preprocessing:406] Min: 9 |                  Max: 9 |                  Mean: 9.0 |                  Median: 9.0
[NeMo I 2023-11-12 08:25:40 data_preprocessing:412] 75 percentile: 9.00
[NeMo I 2023-11-12 08:25:40 data_preprocessing:413] 99 percentile: 9.00


100%|██████████| 1/1 [00:00<00:00, 24.18batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "explained" } tokens { name: "to" } tokens { name: "antonia" } tokens { name: "how" } tokens { name: "this" } tokens { name: "meant" } tokens { name: "that" } tokens { name: "he" } tokens { name: "was" } tokens { name: "twenty" } tokens { name: "four" } tokens { name: "years" } tokens { name: "old" } tokens { name: "that" } tokens { name: "he" } tokens { name: "must" } tokens { name: "have" } tokens { name: "been" } tokens { name: "there" } tokens { name: "when" } tokens { name: "white" } tokens { name: "men" } tokens { name: "first" } tokens { name: "came" } tokens { name: "left" } tokens { name: "on" } tokens { name: "from" } tokens { name: "buffalo" } tokens { name: "and" } tokens { name: "indian" } tokens { name: "times" }


[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_infer_dataset:127] Max length: 34
[NeMo I 2023-11-12 08:25:40 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:40 data_preprocessing:406] Min: 32 |                  Max: 32 |                  Mean: 32.0 |                  Median: 32.0
[NeMo I 2023-11-12 08:25:40 data_preprocessing:412] 75 percentile: 32.00
[NeMo I 2023-11-12 08:25:40 data_preprocessing:413] 99 percentile: 32.00


100%|██████████| 1/1 [00:00<00:00, 18.00batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "we" } tokens { name: "decided" } tokens { name: "that" } tokens { name: "antonia" } tokens { name: "should" } tokens { name: "ride" } tokens { name: "dude" } tokens { name: "home" } tokens { name: "and" } tokens { name: "i" } tokens { name: "would" } tokens { name: "walk" }


[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:40 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:25:40 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:40 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:25:40 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:25:40 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 24.87batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "followed" } tokens { name: "with" } tokens { name: "the" } tokens { name: "spade" } tokens { name: "over" } tokens { name: "my" } tokens { name: "shoulder" } tokens { name: "dragging" } tokens { name: "my" } tokens { name: "snake" }


[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:25:41 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:41 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:25:41 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:25:41 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 24.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "otto" } tokens { name: "fuchs" } tokens { name: "was" } tokens { name: "the" } tokens { name: "first" } tokens { name: "one" } tokens { name: "we" } tokens { name: "met" }


[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_infer_dataset:127] Max length: 11
[NeMo I 2023-11-12 08:25:41 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:41 data_preprocessing:406] Min: 9 |                  Max: 9 |                  Mean: 9.0 |                  Median: 9.0
[NeMo I 2023-11-12 08:25:41 data_preprocessing:412] 75 percentile: 9.00
[NeMo I 2023-11-12 08:25:41 data_preprocessing:413] 99 percentile: 9.00


100%|██████████| 1/1 [00:00<00:00, 26.02batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "could" } tokens { name: "stand" } tokens { name: "right" } tokens { name: "up" } tokens { name: "and" } tokens { name: "talk" } tokens { name: "to" } tokens { name: "you" } tokens { name: "he" } tokens { name: "could" } tokens { name: "did" } tokens { name: "he" } tokens { name: "fight" } tokens { name: "hard" }


[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:25:41 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:41 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:25:41 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:25:41 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 19.84batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "otto" } tokens { name: "winked" } tokens { name: "at" } tokens { name: "me" }


[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_infer_dataset:127] Max length: 6
[NeMo I 2023-11-12 08:25:41 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:41 data_preprocessing:406] Min: 4 |                  Max: 4 |                  Mean: 4.0 |                  Median: 4.0
[NeMo I 2023-11-12 08:25:41 data_preprocessing:412] 75 percentile: 4.00
[NeMo I 2023-11-12 08:25:41 data_preprocessing:413] 99 percentile: 4.00


100%|██████████| 1/1 [00:00<00:00, 35.07batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "a" } tokens { name: "snake" } tokens { name: "of" } tokens { name: "his" } tokens { name: "size" } tokens { name: "in" } tokens { name: "fighting" } tokens { name: "trim" } tokens { name: "would" } tokens { name: "be" } tokens { name: "more" } tokens { name: "than" } tokens { name: "any" } tokens { name: "boy" } tokens { name: "could" } tokens { name: "handle" }


[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:41 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:25:41 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:41 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:25:41 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:25:41 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 19.28batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0013.flac
[{'entity_group': 'PER', 'score': 0.99992615, 'word': 'Otto Fuchs', 'start': 0, 'end': 10}]
ner tagged text B-PER Otto Fuchs E-PER was the first one we met.
Emotion Labels ['NEUTRAL']
tagged transcription B-PER Otto Fuchs E-PER was the first one we met.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0008.flac
[]
ner tagged text A faint fetid smell came from him and a thread of green liquid oozed from his crushed head.
Emotion Labels ['NEUTRAL']
tagged transcription A faint fetid smell came from him and a thread of green liquid oozed from his crushed head.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147960/2035-147960-0014.flac
[]
ner tagged text He could stand right up and talk to you. He could, Did he fight hard?
Emotion Labels ['HAPPY']
tagged transcription He could stand right up and talk to you. He could, Did he fight ha

 NeMo-text-processing :: DEBUG    :: tokens { name: "throughout" } tokens { name: "this" } tokens { name: "century" } tokens { name: "the" } tokens { name: "power" } tokens { name: "of" } tokens { name: "the" } tokens { name: "church" } tokens { name: "was" } tokens { name: "constantly" } tokens { name: "on" } tokens { name: "the" } tokens { name: "increase" } tokens { name: "and" } tokens { name: "is" } tokens { name: "visible" } tokens { name: "in" } tokens { name: "many" } tokens { name: "important" } tokens { name: "changes" }


Emotion Labels ['NEUTRAL']
tagged transcription I followed with the spade over my shoulder, dragging my snake.
ss /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373
segments ['/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0003.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0014.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0005.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0006.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0002.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0013.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0016.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0012.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-

100%|██████████| 1/1 [00:00<00:00, 11.28batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "ancestors" } tokens { name: "of" } tokens { name: "the" } tokens { name: "present" } tokens { name: "pretender" } tokens { name: "congal" } tokens { name: "surnamed" } tokens { name: "the" } tokens { name: "squint" } tokens { name: "eyed" } tokens { name: "had" } tokens { name: "twice" } tokens { name: "received" } tokens { name: "and" } tokens { name: "cherished" } tokens { name: "the" } tokens { name: "licentious" } tokens { name: "bards" } tokens { name: "when" } tokens { name: "under" } tokens { name: "the" } tokens { name: "ban" } tokens { name: "of" } tokens { name: "tara" } tokens { name: "and" } tokens { name: "his" } tokens { name: "popularity" } tokens { name: "with" } tokens { name: "that" } tokens { name: "still" } tokens { name: "powerful" } tokens { name: "order" } tokens { name: "was" } tokens { name: "one" } tokens { name: "prop" } tokens { name: 

[NeMo I 2023-11-12 08:25:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:53 punctuation_capitalization_infer_dataset:127] Max length: 50
[NeMo I 2023-11-12 08:25:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:53 data_preprocessing:406] Min: 48 |                  Max: 48 |                  Mean: 48.0 |                  Median: 48.0
[NeMo I 2023-11-12 08:25:53 data_preprocessing:412] 75 percentile: 48.00
[NeMo I 2023-11-12 08:25:53 data_preprocessing:413] 99 percentile: 48.00


100%|██████████| 1/1 [00:00<00:00,  9.71batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "is" } tokens { name: "pretty" } tokens { name: "clear" } tokens { name: "also" } tokens { name: "that" } tokens { name: "the" } tokens { name: "last" } tokens { name: "rally" } tokens { name: "of" } tokens { name: "druidism" } tokens { name: "against" } tokens { name: "christianity" } tokens { name: "took" } tokens { name: "place" } tokens { name: "behind" } tokens { name: "his" } tokens { name: "banner" } tokens { name: "on" } tokens { name: "the" } tokens { name: "plain" } tokens { name: "of" } tokens { name: "moira" }


[NeMo I 2023-11-12 08:25:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:53 punctuation_capitalization_infer_dataset:127] Max length: 28
[NeMo I 2023-11-12 08:25:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:53 data_preprocessing:406] Min: 26 |                  Max: 26 |                  Mean: 26.0 |                  Median: 26.0
[NeMo I 2023-11-12 08:25:53 data_preprocessing:412] 75 percentile: 26.00
[NeMo I 2023-11-12 08:25:53 data_preprocessing:413] 99 percentile: 26.00


100%|██████████| 1/1 [00:00<00:00, 12.69batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "poets" } tokens { name: "of" } tokens { name: "succeeding" } tokens { name: "ages" } tokens { name: "have" } tokens { name: "dwelt" } tokens { name: "much" } tokens { name: "in" } tokens { name: "detail" } tokens { name: "on" } tokens { name: "the" } tokens { name: "occurrences" } tokens { name: "of" } tokens { name: "this" } tokens { name: "memorable" } tokens { name: "day" }


[NeMo I 2023-11-12 08:25:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:53 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:25:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:53 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:25:53 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:25:53 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 16.98batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "like" } tokens { name: "the" } tokens { name: "two" } tokens { name: "kings" } tokens { name: "of" } tokens { name: "sparta" } tokens { name: "they" } tokens { name: "reigned" } tokens { name: "jointly" } tokens { name: "dividing" } tokens { name: "between" } tokens { name: "them" } tokens { name: "the" } tokens { name: "labours" } tokens { name: "and" } tokens { name: "cares" } tokens { name: "of" } tokens { name: "state" }


[NeMo I 2023-11-12 08:25:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:53 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:25:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:53 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:25:53 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:25:53 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 18.07batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "was" } tokens { name: "the" } tokens { name: "season" } tokens { name: "when" } tokens { name: "the" } tokens { name: "ancient" } tokens { name: "sun" } tokens { name: "god" } tokens { name: "had" } tokens { name: "been" } tokens { name: "accustomed" } tokens { name: "to" } tokens { name: "receive" } tokens { name: "his" } tokens { name: "annual" } tokens { name: "oblations" } tokens { name: "and" } tokens { name: "we" } tokens { name: "can" } tokens { name: "well" } tokens { name: "believe" } tokens { name: "that" } tokens { name: "those" } tokens { name: "whose" } tokens { name: "hearts" } tokens { name: "still" } tokens { name: "trembled" } tokens { name: "at" } tokens { name: "the" } tokens { name: "name" } tokens { name: "of" } tokens { name: "bel" } tokens { name: "must" } tokens { name: "have" } tokens { name: "connected" } tokens { name: "the" } tokens { n

[NeMo I 2023-11-12 08:25:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:54 punctuation_capitalization_infer_dataset:127] Max length: 64
[NeMo I 2023-11-12 08:25:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:54 data_preprocessing:406] Min: 76 |                  Max: 76 |                  Mean: 76.0 |                  Median: 76.0
[NeMo I 2023-11-12 08:25:54 data_preprocessing:412] 75 percentile: 76.00
[NeMo I 2023-11-12 08:25:54 data_preprocessing:413] 99 percentile: 76.00


100%|██████████| 3/3 [00:00<00:00, 15.63batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "lastly" } tokens { name: "the" } tokens { name: "royal" } tokens { name: "brothers" } tokens { name: "fell" } tokens { name: "themselves" } tokens { name: "victims" } tokens { name: "to" } tokens { name: "the" } tokens { name: "epidemic" } tokens { name: "which" } tokens { name: "so" } tokens { name: "sadly" } tokens { name: "signalizes" } tokens { name: "their" } tokens { name: "reign" }


[NeMo I 2023-11-12 08:25:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:54 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:25:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:54 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:25:54 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:25:54 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 22.88batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "only" } tokens { name: "conflicts" } tokens { name: "that" } tokens { name: "occurred" } tokens { name: "on" } tokens { name: "irish" } tokens { name: "soil" } tokens { name: "with" } tokens { name: "a" } tokens { name: "pictish" } tokens { name: "or" } tokens { name: "an" } tokens { name: "anglo" } tokens { name: "saxon" } tokens { name: "force" } tokens { name: "if" } tokens { name: "we" } tokens { name: "except" } tokens { name: "those" } tokens { name: "who" } tokens { name: "formed" } tokens { name: "a" } tokens { name: "contingent" } tokens { name: "of" } tokens { name: "congal's" } tokens { name: "army" } tokens { name: "at" } tokens { name: "moira" } tokens { name: "occurred" } tokens { name: "in" } tokens { name: "the" } tokens { name: "time" } tokens { name: "of" } tokens { name: "the" } tokens { name: "hospitable" } tokens { name: "finnacta" }


[NeMo I 2023-11-12 08:25:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:54 punctuation_capitalization_infer_dataset:127] Max length: 49
[NeMo I 2023-11-12 08:25:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:54 data_preprocessing:406] Min: 47 |                  Max: 47 |                  Mean: 47.0 |                  Median: 47.0
[NeMo I 2023-11-12 08:25:54 data_preprocessing:412] 75 percentile: 47.00
[NeMo I 2023-11-12 08:25:54 data_preprocessing:413] 99 percentile: 47.00


100%|██████████| 1/1 [00:00<00:00, 16.24batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "as" } tokens { name: "leading" } tokens { name: "to" } tokens { name: "the" } tokens { name: "mention" } tokens { name: "of" } tokens { name: "other" } tokens { name: "interesting" } tokens { name: "events" } tokens { name: "we" } tokens { name: "must" } tokens { name: "set" } tokens { name: "this" } tokens { name: "inroad" } tokens { name: "clearly" } tokens { name: "before" } tokens { name: "the" } tokens { name: "reader" }


[NeMo I 2023-11-12 08:25:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:55 punctuation_capitalization_infer_dataset:127] Max length: 22
[NeMo I 2023-11-12 08:25:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:55 data_preprocessing:406] Min: 20 |                  Max: 20 |                  Mean: 20.0 |                  Median: 20.0
[NeMo I 2023-11-12 08:25:55 data_preprocessing:412] 75 percentile: 20.00
[NeMo I 2023-11-12 08:25:55 data_preprocessing:413] 99 percentile: 20.00


100%|██████████| 1/1 [00:00<00:00,  9.71batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "saxons" } tokens { name: "of" } tokens { name: "kent" } tokens { name: "and" } tokens { name: "the" } tokens { name: "southern" } tokens { name: "kingdoms" } tokens { name: "generally" } tokens { name: "were" } tokens { name: "converted" } tokens { name: "by" } tokens { name: "missionaries" } tokens { name: "from" } tokens { name: "france" } tokens { name: "or" } tokens { name: "rome" } tokens { name: "or" } tokens { name: "native" } tokens { name: "preachers" } tokens { name: "of" } tokens { name: "the" } tokens { name: "first" } tokens { name: "or" } tokens { name: "second" } tokens { name: "christian" } tokens { name: "generation" } tokens { name: "those" } tokens { name: "of" } tokens { name: "northumbria" } tokens { name: "recognise" } tokens { name: "as" } tokens { name: "their" } tokens { name: "apostles" } tokens { name: "saint" } tokens { name: "aidan" }

[NeMo I 2023-11-12 08:25:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:55 punctuation_capitalization_infer_dataset:127] Max length: 48
[NeMo I 2023-11-12 08:25:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:55 data_preprocessing:406] Min: 46 |                  Max: 46 |                  Mean: 46.0 |                  Median: 46.0
[NeMo I 2023-11-12 08:25:55 data_preprocessing:412] 75 percentile: 46.00
[NeMo I 2023-11-12 08:25:55 data_preprocessing:413] 99 percentile: 46.00


100%|██████████| 1/1 [00:00<00:00, 16.08batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "kingdom" } tokens { name: "of" } tokens { name: "northumbria" } tokens { name: "as" } tokens { name: "the" } tokens { name: "name" } tokens { name: "implies" } tokens { name: "embraced" } tokens { name: "nearly" } tokens { name: "all" } tokens { name: "the" } tokens { name: "country" } tokens { name: "from" } tokens { name: "the" } tokens { name: "humber" } tokens { name: "to" } tokens { name: "the" } tokens { name: "pictish" } tokens { name: "border" }


[NeMo I 2023-11-12 08:25:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:55 punctuation_capitalization_infer_dataset:127] Max length: 26
[NeMo I 2023-11-12 08:25:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:55 data_preprocessing:406] Min: 24 |                  Max: 24 |                  Mean: 24.0 |                  Median: 24.0
[NeMo I 2023-11-12 08:25:55 data_preprocessing:412] 75 percentile: 24.00
[NeMo I 2023-11-12 08:25:55 data_preprocessing:413] 99 percentile: 24.00


100%|██████████| 1/1 [00:00<00:00, 14.65batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "barren" } tokens { name: "rock" } tokens { name: "about" } tokens { name: "three" } tokens { name: "miles" } tokens { name: "in" } tokens { name: "length" } tokens { name: "was" } tokens { name: "covered" } tokens { name: "with" } tokens { name: "monastic" } tokens { name: "buildings" } tokens { name: "and" } tokens { name: "its" } tokens { name: "cemetery" } tokens { name: "was" } tokens { name: "already" } tokens { name: "adorned" } tokens { name: "with" } tokens { name: "the" } tokens { name: "tombs" } tokens { name: "of" } tokens { name: "saints" } tokens { name: "and" } tokens { name: "kings" }


[NeMo I 2023-11-12 08:25:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:55 punctuation_capitalization_infer_dataset:127] Max length: 28
[NeMo I 2023-11-12 08:25:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:55 data_preprocessing:406] Min: 26 |                  Max: 26 |                  Mean: 26.0 |                  Median: 26.0
[NeMo I 2023-11-12 08:25:55 data_preprocessing:412] 75 percentile: 26.00
[NeMo I 2023-11-12 08:25:55 data_preprocessing:413] 99 percentile: 26.00


100%|██████████| 1/1 [00:00<00:00, 16.23batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "now" } tokens { name: "every" } tokens { name: "missionary" } tokens { name: "that" } tokens { name: "ever" } tokens { name: "went" } tokens { name: "out" } tokens { name: "from" } tokens { name: "iona" } tokens { name: "had" } tokens { name: "taught" } tokens { name: "that" } tokens { name: "to" } tokens { name: "reduce" } tokens { name: "christians" } tokens { name: "to" } tokens { name: "slavery" } tokens { name: "was" } tokens { name: "wholly" } tokens { name: "inconsistent" } tokens { name: "with" } tokens { name: "a" } tokens { name: "belief" } tokens { name: "in" } tokens { name: "the" } tokens { name: "doctrines" } tokens { name: "of" } tokens { name: "the" } tokens { name: "gospel" }


[NeMo I 2023-11-12 08:25:56 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:56 punctuation_capitalization_infer_dataset:127] Max length: 31
[NeMo I 2023-11-12 08:25:56 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:56 data_preprocessing:406] Min: 29 |                  Max: 29 |                  Mean: 29.0 |                  Median: 29.0
[NeMo I 2023-11-12 08:25:56 data_preprocessing:412] 75 percentile: 29.00
[NeMo I 2023-11-12 08:25:56 data_preprocessing:413] 99 percentile: 29.00


100%|██████████| 1/1 [00:00<00:00, 16.35batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "while" } tokens { name: "the" } tokens { name: "liberated" } tokens { name: "exiles" } tokens { name: "rejoiced" } tokens { name: "on" } tokens { name: "the" } tokens { name: "plain" } tokens { name: "of" } tokens { name: "meath" } tokens { name: "the" } tokens { name: "tent" } tokens { name: "of" } tokens { name: "the" } tokens { name: "abbot" } tokens { name: "of" } tokens { name: "iona" } tokens { name: "was" } tokens { name: "pitched" } tokens { name: "on" } tokens { name: "the" } tokens { name: "rath" } tokens { name: "of" } tokens { name: "tara" } tokens { name: "a" } tokens { name: "fact" } tokens { name: "which" } tokens { name: "would" } tokens { name: "seem" } tokens { name: "to" } tokens { name: "indicate" } tokens { name: "that" } tokens { name: "already" } tokens { name: "in" } tokens { name: "little" } tokens { name: "more" } tokens { name: "than" } tokens { name: "a" } t

[NeMo I 2023-11-12 08:25:56 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:56 punctuation_capitalization_infer_dataset:127] Max length: 64
[NeMo I 2023-11-12 08:25:56 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:56 data_preprocessing:406] Min: 74 |                  Max: 74 |                  Mean: 74.0 |                  Median: 74.0
[NeMo I 2023-11-12 08:25:56 data_preprocessing:412] 75 percentile: 74.00
[NeMo I 2023-11-12 08:25:56 data_preprocessing:413] 99 percentile: 74.00


100%|██████████| 3/3 [00:00<00:00, 16.36batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "so" } tokens { name: "slow" } tokens { name: "and" } tokens { name: "patient" } tokens { name: "is" } tokens { name: "the" } tokens { name: "process" } tokens { name: "by" } tokens { name: "which" } tokens { name: "christianity" } tokens { name: "infuses" } tokens { name: "itself" } tokens { name: "into" } tokens { name: "the" } tokens { name: "social" } tokens { name: "life" } tokens { name: "of" } tokens { name: "a" } tokens { name: "converted" } tokens { name: "people" }


[NeMo I 2023-11-12 08:25:56 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:56 punctuation_capitalization_infer_dataset:127] Max length: 24
[NeMo I 2023-11-12 08:25:56 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:56 data_preprocessing:406] Min: 22 |                  Max: 22 |                  Mean: 22.0 |                  Median: 22.0
[NeMo I 2023-11-12 08:25:56 data_preprocessing:412] 75 percentile: 22.00
[NeMo I 2023-11-12 08:25:56 data_preprocessing:413] 99 percentile: 22.00


100%|██████████| 1/1 [00:00<00:00, 19.01batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "here" } tokens { name: "the" } tokens { name: "holy" } tokens { name: "prelate" } tokens { name: "of" } tokens { name: "ferns" } tokens { name: "met" } tokens { name: "him" } tokens { name: "and" } tokens { name: "related" } tokens { name: "a" } tokens { name: "vision" } tokens { name: "in" } tokens { name: "which" } tokens { name: "he" } tokens { name: "had" } tokens { name: "been" } tokens { name: "instructed" } tokens { name: "to" } tokens { name: "demand" } tokens { name: "the" } tokens { name: "abolition" } tokens { name: "of" } tokens { name: "the" } tokens { name: "impost" }


[NeMo I 2023-11-12 08:25:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:57 punctuation_capitalization_infer_dataset:127] Max length: 28
[NeMo I 2023-11-12 08:25:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:57 data_preprocessing:406] Min: 26 |                  Max: 26 |                  Mean: 26.0 |                  Median: 26.0
[NeMo I 2023-11-12 08:25:57 data_preprocessing:412] 75 percentile: 26.00
[NeMo I 2023-11-12 08:25:57 data_preprocessing:413] 99 percentile: 26.00


100%|██████████| 1/1 [00:00<00:00, 19.92batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "tribute" } tokens { name: "was" } tokens { name: "at" } tokens { name: "this" } tokens { name: "period" } tokens { name: "enormous" } tokens { name: "fifteen" } tokens { name: "thousand" } tokens { name: "head" } tokens { name: "of" } tokens { name: "cattle" } tokens { name: "annually" }


[NeMo I 2023-11-12 08:25:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:57 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:25:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:57 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:25:57 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:25:57 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 28.14batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "saint" } tokens { name: "moling" } tokens { name: "survived" } tokens { name: "him" } tokens { name: "three" } tokens { name: "years" } tokens { name: "and" } tokens { name: "saint" } tokens { name: "adamnan" } tokens { name: "so" } tokens { name: "intimately" } tokens { name: "connected" } tokens { name: "with" } tokens { name: "his" } tokens { name: "reign" } tokens { name: "ten" } tokens { name: "years" }


[NeMo I 2023-11-12 08:25:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:57 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:25:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:57 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:25:57 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:25:57 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 16.45batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "nothing" } tokens { name: "could" } tokens { name: "be" } tokens { name: "more" } tokens { name: "natural" } tokens { name: "than" } tokens { name: "such" } tokens { name: "an" } tokens { name: "assembly" } tokens { name: "in" } tokens { name: "such" } tokens { name: "a" } tokens { name: "place" } tokens { name: "at" } tokens { name: "such" } tokens { name: "a" } tokens { name: "period" }


[NeMo I 2023-11-12 08:25:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:25:57 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:25:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:25:57 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:25:57 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:25:57 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 17.50batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0003.flac
[]
ner tagged text The poets of succeeding ages have dwelt much in detail on the occurrences of this memorable day.
Emotion Labels ['NEUTRAL']
tagged transcription The poets of succeeding ages have dwelt much in detail on the occurrences of this memorable day.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0014.flac
[{'entity_group': 'MISC', 'score': 0.99134505, 'word': 'Christianity', 'start': 44, 'end': 56}]
ner tagged text So slow and patient is the process by which B-MISC Christianity E-MISC infuses itself into the social life of a converted people.
Emotion Labels ['NEUTRAL']
tagged transcription So slow and patient is the process by which B-MISC Christianity E-MISC infuses itself into the social life of a converted people.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/152373/2035-152373-0005.flac
[{'entity_group': 'PER', 

 NeMo-text-processing :: DEBUG    :: tokens { name: "peter" } tokens { name: "told" } tokens { name: "his" } tokens { name: "troubles" } tokens { name: "to" } tokens { name: "mister" } tokens { name: "shimerda" } tokens { name: "he" } tokens { name: "was" } tokens { name: "unable" } tokens { name: "to" } tokens { name: "meet" } tokens { name: "a" } tokens { name: "note" } tokens { name: "which" } tokens { name: "fell" } tokens { name: "due" } tokens { name: "on" } tokens { name: "the" } tokens { name: "first" } tokens { name: "of" } tokens { name: "november" } tokens { name: "had" } tokens { name: "to" } tokens { name: "pay" } tokens { name: "an" } tokens { name: "exorbitant" } tokens { name: "bonus" } tokens { name: "on" } tokens { name: "renewing" } tokens { name: "it" } tokens { name: "and" } tokens { name: "to" } tokens { name: "give" } tokens { name: "a" } tokens { name: "mortgage" } tokens { name: "on" } tokens { name: "his" } tokens { name: "pigs" } tokens { name: "and" } tokens

[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_infer_dataset:127] Max length: 54
[NeMo I 2023-11-12 08:26:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:22 data_preprocessing:406] Min: 52 |                  Max: 52 |                  Mean: 52.0 |                  Median: 52.0
[NeMo I 2023-11-12 08:26:22 data_preprocessing:412] 75 percentile: 52.00
[NeMo I 2023-11-12 08:26:22 data_preprocessing:413] 99 percentile: 52.00


100%|██████████| 1/1 [00:00<00:00, 11.70batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "peter" } tokens { name: "could" } tokens { name: "give" } tokens { name: "no" } tokens { name: "very" } tokens { name: "clear" } tokens { name: "account" } tokens { name: "of" } tokens { name: "his" } tokens { name: "transactions" } tokens { name: "with" } tokens { name: "cutter" }


[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:26:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:22 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:26:22 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:26:22 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 16.11batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "asked" } tokens { name: "peter" } tokens { name: "to" } tokens { name: "wait" } tokens { name: "a" } tokens { name: "moment" } tokens { name: "and" } tokens { name: "when" } tokens { name: "she" } tokens { name: "came" } tokens { name: "back" } tokens { name: "from" } tokens { name: "the" } tokens { name: "kitchen" } tokens { name: "she" } tokens { name: "brought" } tokens { name: "a" } tokens { name: "bag" } tokens { name: "of" } tokens { name: "sandwiches" } tokens { name: "and" } tokens { name: "doughnuts" } tokens { name: "for" } tokens { name: "us" }


[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_infer_dataset:127] Max length: 29
[NeMo I 2023-11-12 08:26:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:22 data_preprocessing:406] Min: 27 |                  Max: 27 |                  Mean: 27.0 |                  Median: 27.0
[NeMo I 2023-11-12 08:26:22 data_preprocessing:412] 75 percentile: 27.00
[NeMo I 2023-11-12 08:26:22 data_preprocessing:413] 99 percentile: 27.00


100%|██████████| 1/1 [00:00<00:00, 15.69batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "we" } tokens { name: "lay" } tokens { name: "still" } tokens { name: "and" } tokens { name: "did" } tokens { name: "not" } tokens { name: "talk" }


[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:26:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:22 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:26:22 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:26:22 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 33.35batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "little" } tokens { name: "house" } tokens { name: "on" } tokens { name: "the" } tokens { name: "hillside" } tokens { name: "was" } tokens { name: "so" } tokens { name: "much" } tokens { name: "the" } tokens { name: "color" } tokens { name: "of" } tokens { name: "the" } tokens { name: "night" } tokens { name: "that" } tokens { name: "we" } tokens { name: "could" } tokens { name: "not" } tokens { name: "see" } tokens { name: "it" } tokens { name: "as" } tokens { name: "we" } tokens { name: "came" } tokens { name: "up" } tokens { name: "the" } tokens { name: "draw" }


[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:22 punctuation_capitalization_infer_dataset:127] Max length: 28
[NeMo I 2023-11-12 08:26:22 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:22 data_preprocessing:406] Min: 26 |                  Max: 26 |                  Mean: 26.0 |                  Median: 26.0
[NeMo I 2023-11-12 08:26:22 data_preprocessing:412] 75 percentile: 26.00
[NeMo I 2023-11-12 08:26:22 data_preprocessing:413] 99 percentile: 26.00


100%|██████████| 1/1 [00:00<00:00, 12.90batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "they" } tokens { name: "made" } tokens { name: "me" } tokens { name: "think" } tokens { name: "of" } tokens { name: "defeated" } tokens { name: "armies" } tokens { name: "retreating" } tokens { name: "or" } tokens { name: "of" } tokens { name: "ghosts" } tokens { name: "who" } tokens { name: "were" } tokens { name: "trying" } tokens { name: "desperately" } tokens { name: "to" } tokens { name: "get" } tokens { name: "in" } tokens { name: "for" } tokens { name: "shelter" } tokens { name: "and" } tokens { name: "then" } tokens { name: "went" } tokens { name: "moaning" } tokens { name: "on" }


[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:26:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:23 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:26:23 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:26:23 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 12.10batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "could" } tokens { name: "not" } tokens { name: "take" } tokens { name: "my" } tokens { name: "eyes" } tokens { name: "off" } tokens { name: "the" } tokens { name: "man" } tokens { name: "in" } tokens { name: "the" } tokens { name: "bed" }


[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:26:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:23 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:26:23 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:26:23 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 19.82batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "sharp" } tokens { name: "smell" } tokens { name: "of" } tokens { name: "spirits" } tokens { name: "went" } tokens { name: "through" } tokens { name: "the" } tokens { name: "room" }


[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_infer_dataset:127] Max length: 11
[NeMo I 2023-11-12 08:26:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:23 data_preprocessing:406] Min: 9 |                  Max: 9 |                  Mean: 9.0 |                  Median: 9.0
[NeMo I 2023-11-12 08:26:23 data_preprocessing:412] 75 percentile: 9.00
[NeMo I 2023-11-12 08:26:23 data_preprocessing:413] 99 percentile: 9.00


100%|██████████| 1/1 [00:00<00:00, 25.70batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "seemed" } tokens { name: "to" } tokens { name: "me" } tokens { name: "that" } tokens { name: "he" } tokens { name: "despised" } tokens { name: "him" } tokens { name: "for" } tokens { name: "being" } tokens { name: "so" } tokens { name: "simple" } tokens { name: "and" } tokens { name: "docile" }


[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:26:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:23 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:26:23 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:26:23 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 15.92batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "sick" } tokens { name: "man" } tokens { name: "raged" } tokens { name: "and" } tokens { name: "shook" } tokens { name: "his" } tokens { name: "fist" }


[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_infer_dataset:127] Max length: 10
[NeMo I 2023-11-12 08:26:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:23 data_preprocessing:406] Min: 8 |                  Max: 8 |                  Mean: 8.0 |                  Median: 8.0
[NeMo I 2023-11-12 08:26:23 data_preprocessing:412] 75 percentile: 8.00
[NeMo I 2023-11-12 08:26:23 data_preprocessing:413] 99 percentile: 8.00


100%|██████████| 1/1 [00:00<00:00, 22.15batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "seemed" } tokens { name: "to" } tokens { name: "be" } tokens { name: "cursing" } tokens { name: "people" } tokens { name: "who" } tokens { name: "had" } tokens { name: "wronged" } tokens { name: "him" }


[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:23 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:26:23 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:23 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:26:23 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:26:23 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 21.53batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "quickly" } tokens { name: "it" } tokens { name: "was" } tokens { name: "covered" } tokens { name: "with" } tokens { name: "bright" } tokens { name: "red" } tokens { name: "spots" } tokens { name: "i" } tokens { name: "thought" } tokens { name: "i" } tokens { name: "had" } tokens { name: "never" } tokens { name: "seen" } tokens { name: "any" } tokens { name: "blood" } tokens { name: "so" } tokens { name: "bright" }


[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:26:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:24 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:26:24 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:26:24 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 18.10batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "lay" } tokens { name: "patiently" } tokens { name: "fighting" } tokens { name: "for" } tokens { name: "breath" } tokens { name: "like" } tokens { name: "a" } tokens { name: "child" } tokens { name: "with" } tokens { name: "croup" }


[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:26:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:24 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:26:24 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:26:24 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 19.00batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "antonia's" } tokens { name: "father" } tokens { name: "uncovered" } tokens { name: "one" } tokens { name: "of" } tokens { name: "his" } tokens { name: "long" } tokens { name: "bony" } tokens { name: "legs" } tokens { name: "and" } tokens { name: "rubbed" } tokens { name: "it" } tokens { name: "rhythmically" }


[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:26:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:24 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:26:24 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:26:24 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 17.17batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "from" } tokens { name: "our" } tokens { name: "bench" } tokens { name: "we" } tokens { name: "could" } tokens { name: "see" } tokens { name: "what" } tokens { name: "a" } tokens { name: "hollow" } tokens { name: "case" } tokens { name: "his" } tokens { name: "body" } tokens { name: "was" }


[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:26:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:24 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:26:24 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:26:24 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 21.65batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "gradually" } tokens { name: "relief" } tokens { name: "came" } tokens { name: "to" } tokens { name: "all" } tokens { name: "of" } tokens { name: "us" }


[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:26:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:24 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:26:24 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:26:24 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 35.06batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "without" } tokens { name: "a" } tokens { name: "word" } tokens { name: "peter" } tokens { name: "got" } tokens { name: "up" } tokens { name: "and" } tokens { name: "lit" } tokens { name: "his" } tokens { name: "lantern" }


[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:26:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:24 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:26:24 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:26:24 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 21.54batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "mister" } tokens { name: "shimerda" } tokens { name: "went" } tokens { name: "with" } tokens { name: "him" }


[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:24 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:26:24 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:24 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:26:24 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:26:24 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 20.64batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "after" } tokens { name: "the" } tokens { name: "ceremony" } tokens { name: "at" } tokens { name: "the" } tokens { name: "church" } tokens { name: "the" } tokens { name: "party" } tokens { name: "went" } tokens { name: "to" } tokens { name: "a" } tokens { name: "dinner" } tokens { name: "given" } tokens { name: "by" } tokens { name: "the" } tokens { name: "parents" } tokens { name: "of" } tokens { name: "the" } tokens { name: "bride" }


[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:26:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:25 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:26:25 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:26:25 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 16.22batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "first" } tokens { name: "howls" } tokens { name: "were" } tokens { name: "taken" } tokens { name: "up" } tokens { name: "and" } tokens { name: "echoed" } tokens { name: "and" } tokens { name: "with" } tokens { name: "quickening" } tokens { name: "repetitions" }


[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:26:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:25 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:26:25 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:26:25 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 10.91batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "a" } tokens { name: "black" } tokens { name: "drove" } tokens { name: "came" } tokens { name: "up" } tokens { name: "over" } tokens { name: "the" } tokens { name: "hill" } tokens { name: "behind" } tokens { name: "the" } tokens { name: "wedding" } tokens { name: "party" }


[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:26:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:25 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:26:25 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:26:25 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 21.64batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "something" } tokens { name: "happened" } tokens { name: "to" } tokens { name: "the" } tokens { name: "hindmost" } tokens { name: "sledge" } tokens { name: "the" } tokens { name: "driver" } tokens { name: "lost" } tokens { name: "control" } tokens { name: "he" } tokens { name: "was" } tokens { name: "probably" } tokens { name: "very" } tokens { name: "drunk" } tokens { name: "the" } tokens { name: "horses" } tokens { name: "left" } tokens { name: "the" } tokens { name: "road" } tokens { name: "the" } tokens { name: "sledge" } tokens { name: "was" } tokens { name: "caught" } tokens { name: "in" } tokens { name: "a" } tokens { name: "clump" } tokens { name: "of" } tokens { name: "trees" } tokens { name: "and" } tokens { name: "overturned" }


[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_infer_dataset:127] Max length: 37
[NeMo I 2023-11-12 08:26:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:25 data_preprocessing:406] Min: 35 |                  Max: 35 |                  Mean: 35.0 |                  Median: 35.0
[NeMo I 2023-11-12 08:26:25 data_preprocessing:412] 75 percentile: 35.00
[NeMo I 2023-11-12 08:26:25 data_preprocessing:413] 99 percentile: 35.00


100%|██████████| 1/1 [00:00<00:00, 14.54batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "shrieks" } tokens { name: "that" } tokens { name: "followed" } tokens { name: "made" } tokens { name: "everybody" } tokens { name: "sober" }


[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_infer_dataset:127] Max length: 10
[NeMo I 2023-11-12 08:26:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:25 data_preprocessing:406] Min: 8 |                  Max: 8 |                  Mean: 8.0 |                  Median: 8.0
[NeMo I 2023-11-12 08:26:25 data_preprocessing:412] 75 percentile: 8.00
[NeMo I 2023-11-12 08:26:25 data_preprocessing:413] 99 percentile: 8.00


100%|██████████| 1/1 [00:00<00:00, 26.67batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "road" } tokens { name: "was" } tokens { name: "clear" } tokens { name: "and" } tokens { name: "white" } tokens { name: "and" } tokens { name: "the" } tokens { name: "groom's" } tokens { name: "three" } tokens { name: "blacks" } tokens { name: "went" } tokens { name: "like" } tokens { name: "the" } tokens { name: "wind" }


[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:25 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:26:25 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:25 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:26:25 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:26:25 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 12.21batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "there" } tokens { name: "are" } tokens { name: "only" } tokens { name: "three" } tokens { name: "sledges" } tokens { name: "left" } tokens { name: "he" } tokens { name: "whispered" }


[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:26:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:26 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:26:26 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:26:26 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 25.06batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "and" } tokens { name: "the" } tokens { name: "wolves" } tokens { name: "pavel" } tokens { name: "asked" } tokens { name: "enough" } tokens { name: "enough" } tokens { name: "for" } tokens { name: "all" } tokens { name: "of" } tokens { name: "us" }


[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:26:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:26 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:26:26 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:26:26 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 16.44batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "they" } tokens { name: "were" } tokens { name: "within" } tokens { name: "a" } tokens { name: "few" } tokens { name: "miles" } tokens { name: "of" } tokens { name: "their" } tokens { name: "village" } tokens { name: "now" }


[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:26:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:26 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:26:26 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:26:26 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 17.22batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "yes" } tokens { name: "how" } tokens { name: "many" }


[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_infer_dataset:127] Max length: 5
[NeMo I 2023-11-12 08:26:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:26 data_preprocessing:406] Min: 3 |                  Max: 3 |                  Mean: 3.0 |                  Median: 3.0
[NeMo I 2023-11-12 08:26:26 data_preprocessing:412] 75 percentile: 3.00
[NeMo I 2023-11-12 08:26:26 data_preprocessing:413] 99 percentile: 3.00


100%|██████████| 1/1 [00:00<00:00, 22.22batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "twenty" } tokens { name: "thirty" } tokens { name: "enough" }


[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_infer_dataset:127] Max length: 5
[NeMo I 2023-11-12 08:26:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:26 data_preprocessing:406] Min: 3 |                  Max: 3 |                  Mean: 3.0 |                  Median: 3.0
[NeMo I 2023-11-12 08:26:26 data_preprocessing:412] 75 percentile: 3.00
[NeMo I 2023-11-12 08:26:26 data_preprocessing:413] 99 percentile: 3.00


100%|██████████| 1/1 [00:00<00:00, 31.50batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "now" } tokens { name: "his" } tokens { name: "middle" } tokens { name: "horse" } tokens { name: "was" } tokens { name: "being" } tokens { name: "almost" } tokens { name: "dragged" } tokens { name: "by" } tokens { name: "the" } tokens { name: "other" } tokens { name: "two" }


[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:26:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:26 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:26:26 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:26:26 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 12.79batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "pavel" } tokens { name: "knocked" } tokens { name: "him" } tokens { name: "over" } tokens { name: "the" } tokens { name: "side" } tokens { name: "of" } tokens { name: "the" } tokens { name: "sledge" } tokens { name: "and" } tokens { name: "threw" } tokens { name: "the" } tokens { name: "girl" } tokens { name: "after" } tokens { name: "him" }


[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:26 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:26:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:26 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:26:26 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:26:26 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 20.18batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "peter" } tokens { name: "crouching" } tokens { name: "in" } tokens { name: "the" } tokens { name: "front" } tokens { name: "seat" } tokens { name: "saw" } tokens { name: "nothing" }


[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_infer_dataset:127] Max length: 11
[NeMo I 2023-11-12 08:26:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:27 data_preprocessing:406] Min: 9 |                  Max: 9 |                  Mean: 9.0 |                  Median: 9.0
[NeMo I 2023-11-12 08:26:27 data_preprocessing:412] 75 percentile: 9.00
[NeMo I 2023-11-12 08:26:27 data_preprocessing:413] 99 percentile: 9.00


100%|██████████| 1/1 [00:00<00:00, 27.13batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "first" } tokens { name: "thing" } tokens { name: "either" } tokens { name: "of" } tokens { name: "them" } tokens { name: "noticed" } tokens { name: "was" } tokens { name: "a" } tokens { name: "new" } tokens { name: "sound" } tokens { name: "that" } tokens { name: "broke" } tokens { name: "into" } tokens { name: "the" } tokens { name: "clear" } tokens { name: "air" } tokens { name: "louder" } tokens { name: "than" } tokens { name: "they" } tokens { name: "had" } tokens { name: "ever" } tokens { name: "heard" } tokens { name: "it" } tokens { name: "before" } tokens { name: "the" } tokens { name: "bell" } tokens { name: "of" } tokens { name: "the" } tokens { name: "monastery" } tokens { name: "of" } tokens { name: "their" } tokens { name: "own" } tokens { name: "village" } tokens { name: "ringing" } tokens { name: "for" } tokens { name: "early" } tokens { name: "pra

[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_infer_dataset:127] Max length: 40
[NeMo I 2023-11-12 08:26:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:27 data_preprocessing:406] Min: 38 |                  Max: 38 |                  Mean: 38.0 |                  Median: 38.0
[NeMo I 2023-11-12 08:26:27 data_preprocessing:412] 75 percentile: 38.00
[NeMo I 2023-11-12 08:26:27 data_preprocessing:413] 99 percentile: 38.00


100%|██████████| 1/1 [00:00<00:00, 13.74batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "they" } tokens { name: "were" } tokens { name: "run" } tokens { name: "out" } tokens { name: "of" } tokens { name: "their" } tokens { name: "village" }


[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:26:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:27 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:26:27 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:26:27 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 22.31batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "wherever" } tokens { name: "they" } tokens { name: "went" } tokens { name: "the" } tokens { name: "story" } tokens { name: "followed" } tokens { name: "them" }


[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:26:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:27 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:26:27 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:26:27 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00,  9.98batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "they" } tokens { name: "worked" } tokens { name: "in" } tokens { name: "chicago" } tokens { name: "des" } tokens { name: "moines" } tokens { name: "fort" } tokens { name: "wayne" } tokens { name: "but" } tokens { name: "they" } tokens { name: "were" } tokens { name: "always" } tokens { name: "unfortunate" }


[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:26:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:27 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:26:27 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:26:27 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 32.93batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "during" } tokens { name: "the" } tokens { name: "auction" } tokens { name: "he" } tokens { name: "went" } tokens { name: "about" } tokens { name: "with" } tokens { name: "his" } tokens { name: "head" } tokens { name: "down" } tokens { name: "and" } tokens { name: "never" } tokens { name: "lifted" } tokens { name: "his" } tokens { name: "eyes" }


[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:27 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:26:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:27 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:26:27 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:26:27 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 22.51batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "every" } tokens { name: "one" } tokens { name: "said" } tokens { name: "peter" } tokens { name: "kissed" } tokens { name: "the" } tokens { name: "cow" } tokens { name: "before" } tokens { name: "she" } tokens { name: "was" } tokens { name: "led" } tokens { name: "away" } tokens { name: "by" } tokens { name: "her" } tokens { name: "new" } tokens { name: "owner" }


[NeMo I 2023-11-12 08:26:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:28 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:26:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:28 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:26:28 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:26:28 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 11.38batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "loss" } tokens { name: "of" } tokens { name: "his" } tokens { name: "two" } tokens { name: "friends" } tokens { name: "had" } tokens { name: "a" } tokens { name: "depressing" } tokens { name: "effect" } tokens { name: "upon" } tokens { name: "old" } tokens { name: "mister" } tokens { name: "shimerda" }


[NeMo I 2023-11-12 08:26:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:28 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:26:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:28 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:26:28 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:26:28 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 14.86batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "when" } tokens { name: "he" } tokens { name: "was" } tokens { name: "out" } tokens { name: "hunting" } tokens { name: "he" } tokens { name: "used" } tokens { name: "to" } tokens { name: "go" } tokens { name: "into" } tokens { name: "the" } tokens { name: "empty" } tokens { name: "log" } tokens { name: "house" } tokens { name: "and" } tokens { name: "sit" } tokens { name: "there" } tokens { name: "brooding" }


[NeMo I 2023-11-12 08:26:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:28 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:26:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:28 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:26:28 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:26:28 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 18.05batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "this" } tokens { name: "cabin" } tokens { name: "was" } tokens { name: "his" } tokens { name: "hermitage" } tokens { name: "until" } tokens { name: "the" } tokens { name: "winter" } tokens { name: "snows" } tokens { name: "penned" } tokens { name: "him" } tokens { name: "in" } tokens { name: "his" } tokens { name: "cave" }


[NeMo I 2023-11-12 08:26:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:28 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:26:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:28 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:26:28 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:26:28 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 21.62batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147961/2035-147961-0017.flac
[{'entity_group': 'PER', 'score': 0.8017523, 'word': 'Shimerda', 'start': 7, 'end': 15}]
ner tagged text Mister B-PER Shimerda E-PER went with him.
Emotion Labels ['NEUTRAL']
tagged transcription Mister B-PER Shimerda E-PER went with him.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147961/2035-147961-0032.flac
[]
ner tagged text The first thing either of them noticed was a new sound that broke into the clear air, louder than they had ever heard it before the bell of the monastery of their own village, ringing for early prayers.
Emotion Labels ['NEUTRAL']
tagged transcription The first thing either of them noticed was a new sound that broke into the clear air, louder than they had ever heard it before the bell of the monastery of their own village, ringing for early prayers.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2035/147961/2035-147961-0016

 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "narrative" } tokens { name: "it" } tokens { name: "may" } tokens { name: "be" } tokens { name: "is" } tokens { name: "woven" } tokens { name: "of" } tokens { name: "so" } tokens { name: "humble" } tokens { name: "a" } tokens { name: "texture" } tokens { name: "as" } tokens { name: "to" } tokens { name: "require" } tokens { name: "this" } tokens { name: "advantage" } tokens { name: "and" } tokens { name: "at" } tokens { name: "the" } tokens { name: "same" } tokens { name: "time" } tokens { name: "to" } tokens { name: "render" } tokens { name: "it" } tokens { name: "the" } tokens { name: "more" } tokens { name: "difficult" } tokens { name: "of" } tokens { name: "attainment" }


[NeMo I 2023-11-12 08:26:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:54 punctuation_capitalization_infer_dataset:127] Max length: 33
[NeMo I 2023-11-12 08:26:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:54 data_preprocessing:406] Min: 31 |                  Max: 31 |                  Mean: 31.0 |                  Median: 31.0
[NeMo I 2023-11-12 08:26:54 data_preprocessing:412] 75 percentile: 31.00
[NeMo I 2023-11-12 08:26:54 data_preprocessing:413] 99 percentile: 31.00


100%|██████████| 1/1 [00:00<00:00, 11.73batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "in" } tokens { name: "good" } tokens { name: "faith" } tokens { name: "however" } tokens { name: "he" } tokens { name: "is" } tokens { name: "not" } tokens { name: "sufficiently" } tokens { name: "imaginative" } tokens { name: "to" } tokens { name: "flatter" } tokens { name: "himself" } tokens { name: "with" } tokens { name: "the" } tokens { name: "slightest" } tokens { name: "hope" } tokens { name: "of" } tokens { name: "this" } tokens { name: "kind" }


[NeMo I 2023-11-12 08:26:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:54 punctuation_capitalization_infer_dataset:127] Max length: 22
[NeMo I 2023-11-12 08:26:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:54 data_preprocessing:406] Min: 20 |                  Max: 20 |                  Mean: 20.0 |                  Median: 20.0
[NeMo I 2023-11-12 08:26:54 data_preprocessing:412] 75 percentile: 20.00
[NeMo I 2023-11-12 08:26:54 data_preprocessing:413] 99 percentile: 20.00


100%|██████████| 1/1 [00:00<00:00,  8.89batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "author" } tokens { name: "has" } tokens { name: "considered" } tokens { name: "it" } tokens { name: "hardly" } tokens { name: "worth" } tokens { name: "his" } tokens { name: "while" } tokens { name: "therefore" } tokens { name: "relentlessly" } tokens { name: "to" } tokens { name: "impale" } tokens { name: "the" } tokens { name: "story" } tokens { name: "with" } tokens { name: "its" } tokens { name: "moral" } tokens { name: "as" } tokens { name: "with" } tokens { name: "an" } tokens { name: "iron" } tokens { name: "rod" } tokens { name: "or" } tokens { name: "rather" } tokens { name: "as" } tokens { name: "by" } tokens { name: "sticking" } tokens { name: "a" } tokens { name: "pin" } tokens { name: "through" } tokens { name: "a" } tokens { name: "butterfly" } tokens { name: "thus" } tokens { name: "at" } tokens { name: "once" } tokens { name: "depriving" } tokens 

[NeMo I 2023-11-12 08:26:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:54 punctuation_capitalization_infer_dataset:127] Max length: 61
[NeMo I 2023-11-12 08:26:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:54 data_preprocessing:406] Min: 59 |                  Max: 59 |                  Mean: 59.0 |                  Median: 59.0
[NeMo I 2023-11-12 08:26:54 data_preprocessing:412] 75 percentile: 59.00
[NeMo I 2023-11-12 08:26:54 data_preprocessing:413] 99 percentile: 59.00


100%|██████████| 1/1 [00:00<00:00, 13.90batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "if" } tokens { name: "permitted" } tokens { name: "by" } tokens { name: "the" } tokens { name: "historical" } tokens { name: "connection" } tokens { name: "which" } tokens { name: "though" } tokens { name: "slight" } tokens { name: "was" } tokens { name: "essential" } tokens { name: "to" } tokens { name: "his" } tokens { name: "plan" } tokens { name: "the" } tokens { name: "author" } tokens { name: "would" } tokens { name: "very" } tokens { name: "willingly" } tokens { name: "have" } tokens { name: "avoided" } tokens { name: "anything" } tokens { name: "of" } tokens { name: "this" } tokens { name: "nature" }


[NeMo I 2023-11-12 08:26:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:55 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:26:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:55 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:26:55 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:26:55 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 14.81batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "trusts" } tokens { name: "not" } tokens { name: "to" } tokens { name: "be" } tokens { name: "considered" } tokens { name: "as" } tokens { name: "unpardonably" } tokens { name: "offending" } tokens { name: "by" } tokens { name: "laying" } tokens { name: "out" } tokens { name: "a" } tokens { name: "street" } tokens { name: "that" } tokens { name: "infringes" } tokens { name: "upon" } tokens { name: "nobody's" } tokens { name: "private" } tokens { name: "rights" } tokens { name: "and" } tokens { name: "appropriating" } tokens { name: "a" } tokens { name: "lot" } tokens { name: "of" } tokens { name: "land" } tokens { name: "which" } tokens { name: "had" } tokens { name: "no" } tokens { name: "visible" } tokens { name: "owner" } tokens { name: "and" } tokens { name: "building" } tokens { name: "a" } tokens { name: "house" } tokens { name: "of" } tokens { name: "materia

[NeMo I 2023-11-12 08:26:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:26:55 punctuation_capitalization_infer_dataset:127] Max length: 60
[NeMo I 2023-11-12 08:26:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:26:55 data_preprocessing:406] Min: 58 |                  Max: 58 |                  Mean: 58.0 |                  Median: 58.0
[NeMo I 2023-11-12 08:26:55 data_preprocessing:412] 75 percentile: 58.00
[NeMo I 2023-11-12 08:26:55 data_preprocessing:413] 99 percentile: 58.00


100%|██████████| 1/1 [00:00<00:00, 11.30batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149214/2086-149214-0001.flac
[{'entity_group': 'MISC', 'score': 0.7886988, 'word': 'good', 'start': 3, 'end': 7}, {'entity_group': 'MISC', 'score': 0.5454882, 'word': 'faith', 'start': 8, 'end': 13}]
ner tagged text In B-MISC good E-MISC B-MISC faith E-MISC, however, he is not sufficiently imaginative to flatter himself with the slightest hope of this kind.
Emotion Labels ['NEUTRAL']
tagged transcription In B-MISC good E-MISC B-MISC faith E-MISC, however, he is not sufficiently imaginative to flatter himself with the slightest hope of this kind.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149214/2086-149214-0004.flac
[]
ner tagged text He trusts not to be considered as unpardonably offending by laying out a street that infringes upon nobody's private rights, and appropriating a lot of land which had no visible owner, and building a house of materials long in use for constructing castles in the

 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "enclosure" } tokens { name: "had" } tokens { name: "formerly" } tokens { name: "been" } tokens { name: "very" } tokens { name: "extensive" } tokens { name: "but" } tokens { name: "was" } tokens { name: "now" } tokens { name: "contracted" } tokens { name: "within" } tokens { name: "small" } tokens { name: "compass" } tokens { name: "and" } tokens { name: "hemmed" } tokens { name: "about" } tokens { name: "partly" } tokens { name: "by" } tokens { name: "high" } tokens { name: "wooden" } tokens { name: "fences" } tokens { name: "and" } tokens { name: "partly" } tokens { name: "by" } tokens { name: "the" } tokens { name: "outbuildings" } tokens { name: "of" } tokens { name: "houses" } tokens { name: "that" } tokens { name: "stood" } tokens { name: "on" } tokens { name: "another" } tokens { name: "street" }


Emotion Labels ['NEUTRAL']
tagged transcription If permitted by the historical connection, which, though slight was essential to his plan, the author would very willingly have avoided anything of this nature.
ss /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220
segments ['/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0004.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0013.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0047.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0007.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0012.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0028.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0025.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220

100%|██████████| 1/1 [00:00<00:00, 16.48batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "white" } tokens { name: "double" } tokens { name: "rosebush" } tokens { name: "had" } tokens { name: "evidently" } tokens { name: "been" } tokens { name: "propped" } tokens { name: "up" } tokens { name: "anew" } tokens { name: "against" } tokens { name: "the" } tokens { name: "house" } tokens { name: "since" } tokens { name: "the" } tokens { name: "commencement" } tokens { name: "of" } tokens { name: "the" } tokens { name: "season" } tokens { name: "and" } tokens { name: "a" } tokens { name: "pear" } tokens { name: "tree" } tokens { name: "and" } tokens { name: "three" } tokens { name: "damson" } tokens { name: "trees" } tokens { name: "which" } tokens { name: "except" } tokens { name: "a" } tokens { name: "row" } tokens { name: "of" } tokens { name: "currant" } tokens { name: "bushes" } tokens { name: "constituted" } tokens { name: "the" } tokens { name: "only" 

[NeMo I 2023-11-12 08:27:03 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:03 punctuation_capitalization_infer_dataset:127] Max length: 61
[NeMo I 2023-11-12 08:27:03 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:03 data_preprocessing:406] Min: 59 |                  Max: 59 |                  Mean: 59.0 |                  Median: 59.0
[NeMo I 2023-11-12 08:27:03 data_preprocessing:412] 75 percentile: 59.00
[NeMo I 2023-11-12 08:27:03 data_preprocessing:413] 99 percentile: 59.00


100%|██████████| 1/1 [00:00<00:00, 13.30batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "there" } tokens { name: "were" } tokens { name: "also" } tokens { name: "a" } tokens { name: "few" } tokens { name: "species" } tokens { name: "of" } tokens { name: "antique" } tokens { name: "and" } tokens { name: "hereditary" } tokens { name: "flowers" } tokens { name: "in" } tokens { name: "no" } tokens { name: "very" } tokens { name: "flourishing" } tokens { name: "condition" } tokens { name: "but" } tokens { name: "scrupulously" } tokens { name: "weeded" } tokens { name: "as" } tokens { name: "if" } tokens { name: "some" } tokens { name: "person" } tokens { name: "either" } tokens { name: "out" } tokens { name: "of" } tokens { name: "love" } tokens { name: "or" } tokens { name: "curiosity" } tokens { name: "had" } tokens { name: "been" } tokens { name: "anxious" } tokens { name: "to" } tokens { name: "bring" } tokens { name: "them" } tokens { name: "to" } tokens { name: "such" } t

[NeMo I 2023-11-12 08:27:03 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:03 punctuation_capitalization_infer_dataset:127] Max length: 49
[NeMo I 2023-11-12 08:27:03 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:03 data_preprocessing:406] Min: 47 |                  Max: 47 |                  Mean: 47.0 |                  Median: 47.0
[NeMo I 2023-11-12 08:27:03 data_preprocessing:412] 75 percentile: 47.00
[NeMo I 2023-11-12 08:27:03 data_preprocessing:413] 99 percentile: 47.00


100%|██████████| 1/1 [00:00<00:00, 14.51batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "summer" } tokens { name: "squashes" } tokens { name: "almost" } tokens { name: "in" } tokens { name: "their" } tokens { name: "golden" } tokens { name: "blossom" } tokens { name: "cucumbers" } tokens { name: "now" } tokens { name: "evincing" } tokens { name: "a" } tokens { name: "tendency" } tokens { name: "to" } tokens { name: "spread" } tokens { name: "away" } tokens { name: "from" } tokens { name: "the" } tokens { name: "main" } tokens { name: "stock" } tokens { name: "and" } tokens { name: "ramble" } tokens { name: "far" } tokens { name: "and" } tokens { name: "wide" } tokens { name: "two" } tokens { name: "or" } tokens { name: "three" } tokens { name: "rows" } tokens { name: "of" } tokens { name: "string" } tokens { name: "beans" } tokens { name: "and" } tokens { name: "as" } tokens { name: "many" } tokens { name: "more" } tokens { name: "that" } tokens { name: "were" } tokens { n

[NeMo I 2023-11-12 08:27:03 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:03 punctuation_capitalization_infer_dataset:127] Max length: 64
[NeMo I 2023-11-12 08:27:03 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:03 data_preprocessing:406] Min: 71 |                  Max: 71 |                  Mean: 71.0 |                  Median: 71.0
[NeMo I 2023-11-12 08:27:03 data_preprocessing:412] 75 percentile: 71.00
[NeMo I 2023-11-12 08:27:03 data_preprocessing:413] 99 percentile: 71.00


100%|██████████| 3/3 [00:00<00:00, 13.11batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "phoebe" } tokens { name: "wondered" } tokens { name: "whose" } tokens { name: "care" } tokens { name: "and" } tokens { name: "toil" } tokens { name: "it" } tokens { name: "could" } tokens { name: "have" } tokens { name: "been" } tokens { name: "that" } tokens { name: "had" } tokens { name: "planted" } tokens { name: "these" } tokens { name: "vegetables" } tokens { name: "and" } tokens { name: "kept" } tokens { name: "the" } tokens { name: "soil" } tokens { name: "so" } tokens { name: "clean" } tokens { name: "and" } tokens { name: "orderly" }


[NeMo I 2023-11-12 08:27:04 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:04 punctuation_capitalization_infer_dataset:127] Max length: 26
[NeMo I 2023-11-12 08:27:04 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:04 data_preprocessing:406] Min: 24 |                  Max: 24 |                  Mean: 24.0 |                  Median: 24.0
[NeMo I 2023-11-12 08:27:04 data_preprocessing:412] 75 percentile: 24.00
[NeMo I 2023-11-12 08:27:04 data_preprocessing:413] 99 percentile: 24.00


100%|██████████| 1/1 [00:00<00:00, 15.82batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "bees" } tokens { name: "too" } tokens { name: "strange" } tokens { name: "to" } tokens { name: "say" } tokens { name: "had" } tokens { name: "thought" } tokens { name: "it" } tokens { name: "worth" } tokens { name: "their" } tokens { name: "while" } tokens { name: "to" } tokens { name: "come" } tokens { name: "hither" } tokens { name: "possibly" } tokens { name: "from" } tokens { name: "the" } tokens { name: "range" } tokens { name: "of" } tokens { name: "hives" } tokens { name: "beside" } tokens { name: "some" } tokens { name: "farm" } tokens { name: "house" } tokens { name: "miles" } tokens { name: "away" }


[NeMo I 2023-11-12 08:27:04 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:04 punctuation_capitalization_infer_dataset:127] Max length: 30
[NeMo I 2023-11-12 08:27:04 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:04 data_preprocessing:406] Min: 28 |                  Max: 28 |                  Mean: 28.0 |                  Median: 28.0
[NeMo I 2023-11-12 08:27:04 data_preprocessing:412] 75 percentile: 28.00
[NeMo I 2023-11-12 08:27:04 data_preprocessing:413] 99 percentile: 28.00


100%|██████████| 1/1 [00:00<00:00, 15.94batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "this" } tokens { name: "was" } tokens { name: "a" } tokens { name: "fountain" } tokens { name: "set" } tokens { name: "round" } tokens { name: "with" } tokens { name: "a" } tokens { name: "rim" } tokens { name: "of" } tokens { name: "old" } tokens { name: "mossy" } tokens { name: "stones" } tokens { name: "and" } tokens { name: "paved" } tokens { name: "in" } tokens { name: "its" } tokens { name: "bed" } tokens { name: "with" } tokens { name: "what" } tokens { name: "appeared" } tokens { name: "to" } tokens { name: "be" } tokens { name: "a" } tokens { name: "sort" } tokens { name: "of" } tokens { name: "mosaic" } tokens { name: "work" } tokens { name: "of" } tokens { name: "variously" } tokens { name: "colored" } tokens { name: "pebbles" }


[NeMo I 2023-11-12 08:27:04 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:04 punctuation_capitalization_infer_dataset:127] Max length: 35
[NeMo I 2023-11-12 08:27:04 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:04 data_preprocessing:406] Min: 33 |                  Max: 33 |                  Mean: 33.0 |                  Median: 33.0
[NeMo I 2023-11-12 08:27:04 data_preprocessing:412] 75 percentile: 33.00
[NeMo I 2023-11-12 08:27:04 data_preprocessing:413] 99 percentile: 33.00


100%|██████████| 1/1 [00:00<00:00, 14.89batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "now" } tokens { name: "contained" } tokens { name: "only" } tokens { name: "chanticleer" } tokens { name: "his" } tokens { name: "two" } tokens { name: "wives" } tokens { name: "and" } tokens { name: "a" } tokens { name: "solitary" } tokens { name: "chicken" }


[NeMo I 2023-11-12 08:27:04 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:04 punctuation_capitalization_infer_dataset:127] Max length: 16
[NeMo I 2023-11-12 08:27:04 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:04 data_preprocessing:406] Min: 14 |                  Max: 14 |                  Mean: 14.0 |                  Median: 14.0
[NeMo I 2023-11-12 08:27:04 data_preprocessing:412] 75 percentile: 14.00
[NeMo I 2023-11-12 08:27:04 data_preprocessing:413] 99 percentile: 14.00


100%|██████████| 1/1 [00:00<00:00, 15.65batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "was" } tokens { name: "evident" } tokens { name: "that" } tokens { name: "the" } tokens { name: "race" } tokens { name: "had" } tokens { name: "degenerated" } tokens { name: "like" } tokens { name: "many" } tokens { name: "a" } tokens { name: "noble" } tokens { name: "race" } tokens { name: "besides" } tokens { name: "in" } tokens { name: "consequence" } tokens { name: "of" } tokens { name: "too" } tokens { name: "strict" } tokens { name: "a" } tokens { name: "watchfulness" } tokens { name: "to" } tokens { name: "keep" } tokens { name: "it" } tokens { name: "pure" }


[NeMo I 2023-11-12 08:27:05 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:05 punctuation_capitalization_infer_dataset:127] Max length: 30
[NeMo I 2023-11-12 08:27:05 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:05 data_preprocessing:406] Min: 28 |                  Max: 28 |                  Mean: 28.0 |                  Median: 28.0
[NeMo I 2023-11-12 08:27:05 data_preprocessing:412] 75 percentile: 28.00
[NeMo I 2023-11-12 08:27:05 data_preprocessing:413] 99 percentile: 28.00


100%|██████████| 1/1 [00:00<00:00, 11.72batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "these" } tokens { name: "feathered" } tokens { name: "people" } tokens { name: "had" } tokens { name: "existed" } tokens { name: "too" } tokens { name: "long" } tokens { name: "in" } tokens { name: "their" } tokens { name: "distinct" } tokens { name: "variety" } tokens { name: "a" } tokens { name: "fact" } tokens { name: "of" } tokens { name: "which" } tokens { name: "the" } tokens { name: "present" } tokens { name: "representatives" } tokens { name: "judging" } tokens { name: "by" } tokens { name: "their" } tokens { name: "lugubrious" } tokens { name: "deportment" } tokens { name: "seemed" } tokens { name: "to" } tokens { name: "be" } tokens { name: "aware" }


[NeMo I 2023-11-12 08:27:05 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:05 punctuation_capitalization_infer_dataset:127] Max length: 35
[NeMo I 2023-11-12 08:27:05 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:05 data_preprocessing:406] Min: 33 |                  Max: 33 |                  Mean: 33.0 |                  Median: 33.0
[NeMo I 2023-11-12 08:27:05 data_preprocessing:412] 75 percentile: 33.00
[NeMo I 2023-11-12 08:27:05 data_preprocessing:413] 99 percentile: 33.00


100%|██████████| 1/1 [00:00<00:00, 14.25batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "they" } tokens { name: "kept" } tokens { name: "themselves" } tokens { name: "alive" } tokens { name: "unquestionably" } tokens { name: "and" } tokens { name: "laid" } tokens { name: "now" } tokens { name: "and" } tokens { name: "then" } tokens { name: "an" } tokens { name: "egg" } tokens { name: "and" } tokens { name: "hatched" } tokens { name: "a" } tokens { name: "chicken" } tokens { name: "not" } tokens { name: "for" } tokens { name: "any" } tokens { name: "pleasure" } tokens { name: "of" } tokens { name: "their" } tokens { name: "own" } tokens { name: "but" } tokens { name: "that" } tokens { name: "the" } tokens { name: "world" } tokens { name: "might" } tokens { name: "not" } tokens { name: "absolutely" } tokens { name: "lose" } tokens { name: "what" } tokens { name: "had" } tokens { name: "once" } tokens { name: "been" } tokens { name: "so" } tokens { name: "admirable" } tokens 

[NeMo I 2023-11-12 08:27:05 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:05 punctuation_capitalization_infer_dataset:127] Max length: 51
[NeMo I 2023-11-12 08:27:05 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:05 data_preprocessing:406] Min: 49 |                  Max: 49 |                  Mean: 49.0 |                  Median: 49.0
[NeMo I 2023-11-12 08:27:05 data_preprocessing:412] 75 percentile: 49.00
[NeMo I 2023-11-12 08:27:05 data_preprocessing:413] 99 percentile: 49.00


100%|██████████| 1/1 [00:00<00:00, 18.18batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "distinguishing" } tokens { name: "mark" } tokens { name: "of" } tokens { name: "the" } tokens { name: "hens" } tokens { name: "was" } tokens { name: "a" } tokens { name: "crest" } tokens { name: "of" } tokens { name: "lamentably" } tokens { name: "scanty" } tokens { name: "growth" } tokens { name: "in" } tokens { name: "these" } tokens { name: "latter" } tokens { name: "days" } tokens { name: "but" } tokens { name: "so" } tokens { name: "oddly" } tokens { name: "and" } tokens { name: "wickedly" } tokens { name: "analogous" } tokens { name: "to" } tokens { name: "hepzibah's" } tokens { name: "turban" } tokens { name: "that" } tokens { name: "phoebe" } tokens { name: "to" } tokens { name: "the" } tokens { name: "poignant" } tokens { name: "distress" } tokens { name: "of" } tokens { name: "her" } tokens { name: "conscience" } tokens { name: "but" } tokens { name: "i

[NeMo I 2023-11-12 08:27:06 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:06 punctuation_capitalization_infer_dataset:127] Max length: 64
[NeMo I 2023-11-12 08:27:06 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:06 data_preprocessing:406] Min: 71 |                  Max: 71 |                  Mean: 71.0 |                  Median: 71.0
[NeMo I 2023-11-12 08:27:06 data_preprocessing:412] 75 percentile: 71.00
[NeMo I 2023-11-12 08:27:06 data_preprocessing:413] 99 percentile: 71.00


100%|██████████| 3/3 [00:00<00:00, 12.46batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "chicken" } tokens { name: "crept" } tokens { name: "through" } tokens { name: "the" } tokens { name: "pales" } tokens { name: "of" } tokens { name: "the" } tokens { name: "coop" } tokens { name: "and" } tokens { name: "ran" } tokens { name: "with" } tokens { name: "some" } tokens { name: "show" } tokens { name: "of" } tokens { name: "liveliness" } tokens { name: "to" } tokens { name: "her" } tokens { name: "feet" } tokens { name: "while" } tokens { name: "chanticleer" } tokens { name: "and" } tokens { name: "the" } tokens { name: "ladies" } tokens { name: "of" } tokens { name: "his" } tokens { name: "household" } tokens { name: "regarded" } tokens { name: "her" } tokens { name: "with" } tokens { name: "queer" } tokens { name: "sidelong" } tokens { name: "glances" } tokens { name: "and" } tokens { name: "then" } tokens { name: "croaked" } tokens { name: "one" } to

[NeMo I 2023-11-12 08:27:06 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:06 punctuation_capitalization_infer_dataset:127] Max length: 57
[NeMo I 2023-11-12 08:27:06 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:06 data_preprocessing:406] Min: 55 |                  Max: 55 |                  Mean: 55.0 |                  Median: 55.0
[NeMo I 2023-11-12 08:27:06 data_preprocessing:412] 75 percentile: 55.00
[NeMo I 2023-11-12 08:27:06 data_preprocessing:413] 99 percentile: 55.00


100%|██████████| 1/1 [00:00<00:00, 17.03batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "so" } tokens { name: "wise" } tokens { name: "as" } tokens { name: "well" } tokens { name: "as" } tokens { name: "antique" } tokens { name: "was" } tokens { name: "their" } tokens { name: "aspect" } tokens { name: "as" } tokens { name: "to" } tokens { name: "give" } tokens { name: "color" } tokens { name: "to" } tokens { name: "the" } tokens { name: "idea" } tokens { name: "not" } tokens { name: "merely" } tokens { name: "that" } tokens { name: "they" } tokens { name: "were" } tokens { name: "the" } tokens { name: "descendants" } tokens { name: "of" } tokens { name: "a" } tokens { name: "time" } tokens { name: "honored" } tokens { name: "race" } tokens { name: "but" } tokens { name: "that" } tokens { name: "they" } tokens { name: "had" } tokens { name: "existed" } tokens { name: "in" } tokens { name: "their" } tokens { name: "individual" } tokens { name: "capacity" } tokens { name: "ev

[NeMo I 2023-11-12 08:27:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:07 punctuation_capitalization_infer_dataset:127] Max length: 57
[NeMo I 2023-11-12 08:27:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:07 data_preprocessing:406] Min: 55 |                  Max: 55 |                  Mean: 55.0 |                  Median: 55.0
[NeMo I 2023-11-12 08:27:07 data_preprocessing:412] 75 percentile: 55.00
[NeMo I 2023-11-12 08:27:07 data_preprocessing:413] 99 percentile: 55.00


100%|██████████| 1/1 [00:00<00:00,  7.80batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "held" } tokens { name: "a" } tokens { name: "hoe" } tokens { name: "in" } tokens { name: "his" } tokens { name: "hand" } tokens { name: "and" } tokens { name: "while" } tokens { name: "phoebe" } tokens { name: "was" } tokens { name: "gone" } tokens { name: "in" } tokens { name: "quest" } tokens { name: "of" } tokens { name: "the" } tokens { name: "crumbs" } tokens { name: "had" } tokens { name: "begun" } tokens { name: "to" } tokens { name: "busy" } tokens { name: "himself" } tokens { name: "with" } tokens { name: "drawing" } tokens { name: "up" } tokens { name: "fresh" } tokens { name: "earth" } tokens { name: "about" } tokens { name: "the" } tokens { name: "roots" } tokens { name: "of" } tokens { name: "the" } tokens { name: "tomatoes" }


[NeMo I 2023-11-12 08:27:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:07 punctuation_capitalization_infer_dataset:127] Max length: 38
[NeMo I 2023-11-12 08:27:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:07 data_preprocessing:406] Min: 36 |                  Max: 36 |                  Mean: 36.0 |                  Median: 36.0
[NeMo I 2023-11-12 08:27:07 data_preprocessing:412] 75 percentile: 36.00
[NeMo I 2023-11-12 08:27:07 data_preprocessing:413] 99 percentile: 36.00


100%|██████████| 1/1 [00:00<00:00, 15.42batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "they" } tokens { name: "have" } tokens { name: "known" } tokens { name: "me" } tokens { name: "much" } tokens { name: "longer" } tokens { name: "but" } tokens { name: "never" } tokens { name: "honor" } tokens { name: "me" } tokens { name: "with" } tokens { name: "any" } tokens { name: "familiarity" } tokens { name: "though" } tokens { name: "hardly" } tokens { name: "a" } tokens { name: "day" } tokens { name: "passes" } tokens { name: "without" } tokens { name: "my" } tokens { name: "bringing" } tokens { name: "them" } tokens { name: "food" }


[NeMo I 2023-11-12 08:27:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:07 punctuation_capitalization_infer_dataset:127] Max length: 25
[NeMo I 2023-11-12 08:27:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:07 data_preprocessing:406] Min: 23 |                  Max: 23 |                  Mean: 23.0 |                  Median: 23.0
[NeMo I 2023-11-12 08:27:07 data_preprocessing:412] 75 percentile: 23.00
[NeMo I 2023-11-12 08:27:07 data_preprocessing:413] 99 percentile: 23.00


100%|██████████| 1/1 [00:00<00:00, 11.80batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "miss" } tokens { name: "hepzibah" } tokens { name: "i" } tokens { name: "suppose" } tokens { name: "will" } tokens { name: "interweave" } tokens { name: "the" } tokens { name: "fact" } tokens { name: "with" } tokens { name: "her" } tokens { name: "other" } tokens { name: "traditions" } tokens { name: "and" } tokens { name: "set" } tokens { name: "it" } tokens { name: "down" } tokens { name: "that" } tokens { name: "the" } tokens { name: "fowls" } tokens { name: "know" } tokens { name: "you" } tokens { name: "to" } tokens { name: "be" } tokens { name: "a" } tokens { name: "pyncheon" }


[NeMo I 2023-11-12 08:27:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:07 punctuation_capitalization_infer_dataset:127] Max length: 36
[NeMo I 2023-11-12 08:27:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:07 data_preprocessing:406] Min: 34 |                  Max: 34 |                  Mean: 34.0 |                  Median: 34.0
[NeMo I 2023-11-12 08:27:07 data_preprocessing:412] 75 percentile: 34.00
[NeMo I 2023-11-12 08:27:07 data_preprocessing:413] 99 percentile: 34.00


100%|██████████| 1/1 [00:00<00:00, 12.44batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "ah" } tokens { name: "but" } tokens { name: "these" } tokens { name: "hens" } tokens { name: "answered" } tokens { name: "the" } tokens { name: "young" } tokens { name: "man" } tokens { name: "these" } tokens { name: "hens" } tokens { name: "of" } tokens { name: "aristocratic" } tokens { name: "lineage" } tokens { name: "would" } tokens { name: "scorn" } tokens { name: "to" } tokens { name: "understand" } tokens { name: "the" } tokens { name: "vulgar" } tokens { name: "language" } tokens { name: "of" } tokens { name: "a" } tokens { name: "barn" } tokens { name: "yard" } tokens { name: "fowl" }


[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_infer_dataset:127] Max length: 32
[NeMo I 2023-11-12 08:27:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:08 data_preprocessing:406] Min: 30 |                  Max: 30 |                  Mean: 30.0 |                  Median: 30.0
[NeMo I 2023-11-12 08:27:08 data_preprocessing:412] 75 percentile: 30.00
[NeMo I 2023-11-12 08:27:08 data_preprocessing:413] 99 percentile: 30.00


100%|██████████| 1/1 [00:00<00:00, 14.58batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "prefer" } tokens { name: "to" } tokens { name: "think" } tokens { name: "and" } tokens { name: "so" } tokens { name: "would" } tokens { name: "miss" } tokens { name: "hepzibah" } tokens { name: "that" } tokens { name: "they" } tokens { name: "recognize" } tokens { name: "the" } tokens { name: "family" } tokens { name: "tone" } tokens { name: "for" } tokens { name: "you" } tokens { name: "are" } tokens { name: "a" } tokens { name: "pyncheon" }


[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:27:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:08 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:27:08 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:27:08 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 13.79batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "my" } tokens { name: "name" } tokens { name: "is" } tokens { name: "phoebe" } tokens { name: "pyncheon" } tokens { name: "said" } tokens { name: "the" } tokens { name: "girl" } tokens { name: "with" } tokens { name: "a" } tokens { name: "manner" } tokens { name: "of" } tokens { name: "some" } tokens { name: "reserve" } tokens { name: "for" } tokens { name: "she" } tokens { name: "was" } tokens { name: "aware" } tokens { name: "that" } tokens { name: "her" } tokens { name: "new" } tokens { name: "acquaintance" } tokens { name: "could" } tokens { name: "be" } tokens { name: "no" } tokens { name: "other" } tokens { name: "than" } tokens { name: "the" } tokens { name: "daguerreotypist" } tokens { name: "of" } tokens { name: "whose" } tokens { name: "lawless" } tokens { name: "propensities" } tokens { name: "the" } tokens { name: "old" } tokens { name: "maid" } tokens { name: "had" } tokens

[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_infer_dataset:127] Max length: 55
[NeMo I 2023-11-12 08:27:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:08 data_preprocessing:406] Min: 53 |                  Max: 53 |                  Mean: 53.0 |                  Median: 53.0
[NeMo I 2023-11-12 08:27:08 data_preprocessing:412] 75 percentile: 53.00
[NeMo I 2023-11-12 08:27:08 data_preprocessing:413] 99 percentile: 53.00


100%|██████████| 1/1 [00:00<00:00,  9.58batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "turn" } tokens { name: "up" } tokens { name: "the" } tokens { name: "earth" } tokens { name: "by" } tokens { name: "way" } tokens { name: "of" } tokens { name: "pastime" }


[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:27:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:08 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:27:08 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:27:08 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 25.20batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "is" } tokens { name: "like" } tokens { name: "a" } tokens { name: "bandage" } tokens { name: "over" } tokens { name: "one's" } tokens { name: "eyes" } tokens { name: "to" } tokens { name: "come" } tokens { name: "into" } tokens { name: "it" }


[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:08 punctuation_capitalization_infer_dataset:127] Max length: 16
[NeMo I 2023-11-12 08:27:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:08 data_preprocessing:406] Min: 14 |                  Max: 14 |                  Mean: 14.0 |                  Median: 14.0
[NeMo I 2023-11-12 08:27:08 data_preprocessing:412] 75 percentile: 14.00
[NeMo I 2023-11-12 08:27:08 data_preprocessing:413] 99 percentile: 14.00


100%|██████████| 1/1 [00:00<00:00, 17.76batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "if" } tokens { name: "you" } tokens { name: "would" } tokens { name: "permit" } tokens { name: "me" } tokens { name: "said" } tokens { name: "the" } tokens { name: "artist" } tokens { name: "looking" } tokens { name: "at" } tokens { name: "phoebe" } tokens { name: "i" } tokens { name: "should" } tokens { name: "like" } tokens { name: "to" } tokens { name: "try" } tokens { name: "whether" } tokens { name: "the" } tokens { name: "daguerreotype" } tokens { name: "can" } tokens { name: "bring" } tokens { name: "out" } tokens { name: "disagreeable" } tokens { name: "traits" } tokens { name: "on" } tokens { name: "a" } tokens { name: "perfectly" } tokens { name: "amiable" } tokens { name: "face" }


[NeMo I 2023-11-12 08:27:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:09 punctuation_capitalization_infer_dataset:127] Max length: 36
[NeMo I 2023-11-12 08:27:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:09 data_preprocessing:406] Min: 34 |                  Max: 34 |                  Mean: 34.0 |                  Median: 34.0
[NeMo I 2023-11-12 08:27:09 data_preprocessing:412] 75 percentile: 34.00
[NeMo I 2023-11-12 08:27:09 data_preprocessing:413] 99 percentile: 34.00


100%|██████████| 1/1 [00:00<00:00, 16.12batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "most" } tokens { name: "of" } tokens { name: "my" } tokens { name: "likenesses" } tokens { name: "do" } tokens { name: "look" } tokens { name: "unamiable" } tokens { name: "but" } tokens { name: "the" } tokens { name: "very" } tokens { name: "sufficient" } tokens { name: "reason" } tokens { name: "i" } tokens { name: "fancy" } tokens { name: "is" } tokens { name: "because" } tokens { name: "the" } tokens { name: "originals" } tokens { name: "are" } tokens { name: "so" }


[NeMo I 2023-11-12 08:27:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:09 punctuation_capitalization_infer_dataset:127] Max length: 25
[NeMo I 2023-11-12 08:27:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:09 data_preprocessing:406] Min: 23 |                  Max: 23 |                  Mean: 23.0 |                  Median: 23.0
[NeMo I 2023-11-12 08:27:09 data_preprocessing:412] 75 percentile: 23.00
[NeMo I 2023-11-12 08:27:09 data_preprocessing:413] 99 percentile: 23.00


100%|██████████| 1/1 [00:00<00:00, 14.86batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "there" } tokens { name: "is" } tokens { name: "a" } tokens { name: "wonderful" } tokens { name: "insight" } tokens { name: "in" } tokens { name: "heaven's" } tokens { name: "broad" } tokens { name: "and" } tokens { name: "simple" } tokens { name: "sunshine" }


[NeMo I 2023-11-12 08:27:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:09 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:27:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:09 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:27:09 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:27:09 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 19.59batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "while" } tokens { name: "we" } tokens { name: "give" } tokens { name: "it" } tokens { name: "credit" } tokens { name: "only" } tokens { name: "for" } tokens { name: "depicting" } tokens { name: "the" } tokens { name: "merest" } tokens { name: "surface" } tokens { name: "it" } tokens { name: "actually" } tokens { name: "brings" } tokens { name: "out" } tokens { name: "the" } tokens { name: "secret" } tokens { name: "character" } tokens { name: "with" } tokens { name: "a" } tokens { name: "truth" } tokens { name: "that" } tokens { name: "no" } tokens { name: "painter" } tokens { name: "would" } tokens { name: "ever" } tokens { name: "venture" } tokens { name: "upon" } tokens { name: "even" } tokens { name: "could" } tokens { name: "he" } tokens { name: "detect" } tokens { name: "it" }


[NeMo I 2023-11-12 08:27:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:09 punctuation_capitalization_infer_dataset:127] Max length: 36
[NeMo I 2023-11-12 08:27:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:09 data_preprocessing:406] Min: 34 |                  Max: 34 |                  Mean: 34.0 |                  Median: 34.0
[NeMo I 2023-11-12 08:27:09 data_preprocessing:412] 75 percentile: 34.00
[NeMo I 2023-11-12 08:27:09 data_preprocessing:413] 99 percentile: 34.00


100%|██████████| 1/1 [00:00<00:00, 13.04batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "yet" } tokens { name: "the" } tokens { name: "original" } tokens { name: "wears" } tokens { name: "to" } tokens { name: "common" } tokens { name: "eyes" } tokens { name: "a" } tokens { name: "very" } tokens { name: "different" } tokens { name: "expression" }


[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:27:10 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:10 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:27:10 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:27:10 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 18.14batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "exhibited" } tokens { name: "a" } tokens { name: "daguerreotype" } tokens { name: "miniature" } tokens { name: "in" } tokens { name: "a" } tokens { name: "morocco" } tokens { name: "case" }


[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:27:10 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:10 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:27:10 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:27:10 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 18.02batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "phoebe" } tokens { name: "merely" } tokens { name: "glanced" } tokens { name: "at" } tokens { name: "it" } tokens { name: "and" } tokens { name: "gave" } tokens { name: "it" } tokens { name: "back" }


[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_infer_dataset:127] Max length: 11
[NeMo I 2023-11-12 08:27:10 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:10 data_preprocessing:406] Min: 9 |                  Max: 9 |                  Mean: 9.0 |                  Median: 9.0
[NeMo I 2023-11-12 08:27:10 data_preprocessing:412] 75 percentile: 9.00
[NeMo I 2023-11-12 08:27:10 data_preprocessing:413] 99 percentile: 9.00


100%|██████████| 1/1 [00:00<00:00, 23.32batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "can" } tokens { name: "assure" } tokens { name: "you" } tokens { name: "that" } tokens { name: "this" } tokens { name: "is" } tokens { name: "a" } tokens { name: "modern" } tokens { name: "face" } tokens { name: "and" } tokens { name: "one" } tokens { name: "which" } tokens { name: "you" } tokens { name: "will" } tokens { name: "very" } tokens { name: "probably" } tokens { name: "meet" }


[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:27:10 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:10 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:27:10 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:27:10 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 16.57batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "sun" } tokens { name: "as" } tokens { name: "you" } tokens { name: "see" } tokens { name: "tells" } tokens { name: "quite" } tokens { name: "another" } tokens { name: "story" } tokens { name: "and" } tokens { name: "will" } tokens { name: "not" } tokens { name: "be" } tokens { name: "coaxed" } tokens { name: "out" } tokens { name: "of" } tokens { name: "it" } tokens { name: "after" } tokens { name: "half" } tokens { name: "a" } tokens { name: "dozen" } tokens { name: "patient" } tokens { name: "attempts" } tokens { name: "on" } tokens { name: "my" } tokens { name: "part" }


[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_infer_dataset:127] Max length: 29
[NeMo I 2023-11-12 08:27:10 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:10 data_preprocessing:406] Min: 27 |                  Max: 27 |                  Mean: 27.0 |                  Median: 27.0
[NeMo I 2023-11-12 08:27:10 data_preprocessing:412] 75 percentile: 27.00
[NeMo I 2023-11-12 08:27:10 data_preprocessing:413] 99 percentile: 27.00


100%|██████████| 1/1 [00:00<00:00, 16.99batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "here" } tokens { name: "we" } tokens { name: "have" } tokens { name: "the" } tokens { name: "man" } tokens { name: "sly" } tokens { name: "subtle" } tokens { name: "hard" } tokens { name: "imperious" } tokens { name: "and" } tokens { name: "withal" } tokens { name: "cold" } tokens { name: "as" } tokens { name: "ice" } tokens { name: "look" } tokens { name: "at" } tokens { name: "that" } tokens { name: "eye" }


[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:10 punctuation_capitalization_infer_dataset:127] Max length: 23
[NeMo I 2023-11-12 08:27:10 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:10 data_preprocessing:406] Min: 21 |                  Max: 21 |                  Mean: 21.0 |                  Median: 21.0
[NeMo I 2023-11-12 08:27:10 data_preprocessing:412] 75 percentile: 21.00
[NeMo I 2023-11-12 08:27:10 data_preprocessing:413] 99 percentile: 21.00


100%|██████████| 1/1 [00:00<00:00, 12.27batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "and" } tokens { name: "yet" } tokens { name: "if" } tokens { name: "you" } tokens { name: "could" } tokens { name: "only" } tokens { name: "see" } tokens { name: "the" } tokens { name: "benign" } tokens { name: "smile" } tokens { name: "of" } tokens { name: "the" } tokens { name: "original" }


[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:27:11 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:11 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:27:11 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:27:11 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 22.96batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "well" } tokens { name: "i" } tokens { name: "don't" } tokens { name: "wish" } tokens { name: "to" } tokens { name: "see" } tokens { name: "it" } tokens { name: "any" } tokens { name: "more" } tokens { name: "observed" } tokens { name: "phoebe" } tokens { name: "turning" } tokens { name: "away" } tokens { name: "her" } tokens { name: "eyes" } tokens { name: "it" } tokens { name: "is" } tokens { name: "certainly" } tokens { name: "very" } tokens { name: "like" } tokens { name: "the" } tokens { name: "old" } tokens { name: "portrait" }


[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:27:11 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:11 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:27:11 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:27:11 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 17.98batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "if" } tokens { name: "the" } tokens { name: "original" } tokens { name: "is" } tokens { name: "still" } tokens { name: "in" } tokens { name: "the" } tokens { name: "world" } tokens { name: "i" } tokens { name: "think" } tokens { name: "he" } tokens { name: "might" } tokens { name: "defy" } tokens { name: "the" } tokens { name: "sun" } tokens { name: "to" } tokens { name: "make" } tokens { name: "him" } tokens { name: "look" } tokens { name: "stern" } tokens { name: "and" } tokens { name: "hard" }


[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_infer_dataset:127] Max length: 25
[NeMo I 2023-11-12 08:27:11 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:11 data_preprocessing:406] Min: 23 |                  Max: 23 |                  Mean: 23.0 |                  Median: 23.0
[NeMo I 2023-11-12 08:27:11 data_preprocessing:412] 75 percentile: 23.00
[NeMo I 2023-11-12 08:27:11 data_preprocessing:413] 99 percentile: 23.00


100%|██████████| 1/1 [00:00<00:00, 16.00batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "is" } tokens { name: "there" } tokens { name: "nothing" } tokens { name: "wild" } tokens { name: "in" } tokens { name: "the" } tokens { name: "eye" } tokens { name: "continued" } tokens { name: "holgrave" } tokens { name: "so" } tokens { name: "earnestly" } tokens { name: "that" } tokens { name: "it" } tokens { name: "embarrassed" } tokens { name: "phoebe" } tokens { name: "as" } tokens { name: "did" } tokens { name: "also" } tokens { name: "the" } tokens { name: "quiet" } tokens { name: "freedom" } tokens { name: "with" } tokens { name: "which" } tokens { name: "he" } tokens { name: "presumed" } tokens { name: "on" } tokens { name: "their" } tokens { name: "so" } tokens { name: "recent" } tokens { name: "acquaintance" }


[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_infer_dataset:127] Max length: 35
[NeMo I 2023-11-12 08:27:11 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:11 data_preprocessing:406] Min: 33 |                  Max: 33 |                  Mean: 33.0 |                  Median: 33.0
[NeMo I 2023-11-12 08:27:11 data_preprocessing:412] 75 percentile: 33.00
[NeMo I 2023-11-12 08:27:11 data_preprocessing:413] 99 percentile: 33.00


100%|██████████| 1/1 [00:00<00:00, 16.10batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "is" } tokens { name: "nonsense" } tokens { name: "said" } tokens { name: "phoebe" } tokens { name: "a" } tokens { name: "little" } tokens { name: "impatiently" } tokens { name: "for" } tokens { name: "us" } tokens { name: "to" } tokens { name: "talk" } tokens { name: "about" } tokens { name: "a" } tokens { name: "picture" } tokens { name: "which" } tokens { name: "you" } tokens { name: "have" } tokens { name: "never" } tokens { name: "seen" }


[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:11 punctuation_capitalization_infer_dataset:127] Max length: 22
[NeMo I 2023-11-12 08:27:11 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:11 data_preprocessing:406] Min: 20 |                  Max: 20 |                  Mean: 20.0 |                  Median: 20.0
[NeMo I 2023-11-12 08:27:11 data_preprocessing:412] 75 percentile: 20.00
[NeMo I 2023-11-12 08:27:11 data_preprocessing:413] 99 percentile: 20.00


100%|██████████| 1/1 [00:00<00:00, 21.50batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "since" } tokens { name: "you" } tokens { name: "are" } tokens { name: "a" } tokens { name: "friend" } tokens { name: "of" } tokens { name: "my" } tokens { name: "cousin" } tokens { name: "hepzibah's" } tokens { name: "you" } tokens { name: "should" } tokens { name: "ask" } tokens { name: "her" } tokens { name: "to" } tokens { name: "show" } tokens { name: "you" } tokens { name: "the" } tokens { name: "picture" }


[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_infer_dataset:127] Max length: 25
[NeMo I 2023-11-12 08:27:12 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:12 data_preprocessing:406] Min: 23 |                  Max: 23 |                  Mean: 23.0 |                  Median: 23.0
[NeMo I 2023-11-12 08:27:12 data_preprocessing:412] 75 percentile: 23.00
[NeMo I 2023-11-12 08:27:12 data_preprocessing:413] 99 percentile: 23.00


100%|██████████| 1/1 [00:00<00:00, 12.37batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "so" } tokens { name: "we" } tokens { name: "will" } tokens { name: "be" } tokens { name: "fellow" } tokens { name: "laborers" } tokens { name: "somewhat" } tokens { name: "on" } tokens { name: "the" } tokens { name: "community" } tokens { name: "system" }


[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:27:12 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:12 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:27:12 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:27:12 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 19.49batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "did" } tokens { name: "not" } tokens { name: "altogether" } tokens { name: "like" } tokens { name: "him" }


[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_infer_dataset:127] Max length: 8
[NeMo I 2023-11-12 08:27:12 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:12 data_preprocessing:406] Min: 6 |                  Max: 6 |                  Mean: 6.0 |                  Median: 6.0
[NeMo I 2023-11-12 08:27:12 data_preprocessing:412] 75 percentile: 6.00
[NeMo I 2023-11-12 08:27:12 data_preprocessing:413] 99 percentile: 6.00


100%|██████████| 1/1 [00:00<00:00, 29.17batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "oh" } tokens { name: "rejoined" } tokens { name: "the" } tokens { name: "daguerreotypist" } tokens { name: "because" } tokens { name: "like" } tokens { name: "an" } tokens { name: "old" } tokens { name: "lady's" } tokens { name: "cup" } tokens { name: "of" } tokens { name: "tea" } tokens { name: "it" } tokens { name: "is" } tokens { name: "water" } tokens { name: "bewitched" }


[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:27:12 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:12 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:27:12 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:27:12 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 17.48batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "was" } tokens { name: "indistinctly" } tokens { name: "aware" } tokens { name: "however" } tokens { name: "that" } tokens { name: "the" } tokens { name: "gaunt" } tokens { name: "figure" } tokens { name: "of" } tokens { name: "the" } tokens { name: "old" } tokens { name: "gentlewoman" } tokens { name: "was" } tokens { name: "sitting" } tokens { name: "in" } tokens { name: "one" } tokens { name: "of" } tokens { name: "the" } tokens { name: "straight" } tokens { name: "backed" } tokens { name: "chairs" } tokens { name: "a" } tokens { name: "little" } tokens { name: "withdrawn" } tokens { name: "from" } tokens { name: "the" } tokens { name: "window" } tokens { name: "the" } tokens { name: "faint" } tokens { name: "gleam" } tokens { name: "of" } tokens { name: "which" } tokens { name: "showed" } tokens { name: "the" } tokens { name: "blanched" } tokens { name: "palen

[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:12 punctuation_capitalization_infer_dataset:127] Max length: 54
[NeMo I 2023-11-12 08:27:12 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:12 data_preprocessing:406] Min: 52 |                  Max: 52 |                  Mean: 52.0 |                  Median: 52.0
[NeMo I 2023-11-12 08:27:12 data_preprocessing:412] 75 percentile: 52.00
[NeMo I 2023-11-12 08:27:12 data_preprocessing:413] 99 percentile: 52.00


100%|██████████| 1/1 [00:00<00:00, 13.97batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "but" } tokens { name: "put" } tokens { name: "it" } tokens { name: "on" } tokens { name: "the" } tokens { name: "table" } tokens { name: "in" } tokens { name: "the" } tokens { name: "corner" } tokens { name: "of" } tokens { name: "the" } tokens { name: "passage" }


[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:27:13 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:13 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:27:13 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:27:13 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 24.49batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "what" } tokens { name: "an" } tokens { name: "instrument" } tokens { name: "is" } tokens { name: "the" } tokens { name: "human" } tokens { name: "voice" }


[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:27:13 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:13 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:27:13 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:27:13 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 20.93batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "how" } tokens { name: "wonderfully" } tokens { name: "responsive" } tokens { name: "to" } tokens { name: "every" } tokens { name: "emotion" } tokens { name: "of" } tokens { name: "the" } tokens { name: "human" } tokens { name: "soul" }


[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:27:13 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:13 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:27:13 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:27:13 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 21.49batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "fewer" } tokens { name: "words" } tokens { name: "than" } tokens { name: "before" } tokens { name: "but" } tokens { name: "with" } tokens { name: "the" } tokens { name: "same" } tokens { name: "mysterious" } tokens { name: "music" } tokens { name: "in" } tokens { name: "them" }


[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:27:13 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:13 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:27:13 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:27:13 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 18.80batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "pray" } tokens { name: "go" } tokens { name: "to" } tokens { name: "bed" } tokens { name: "for" } tokens { name: "i" } tokens { name: "am" } tokens { name: "sure" } tokens { name: "you" } tokens { name: "must" } tokens { name: "need" } tokens { name: "rest" }


[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:27:13 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:13 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:27:13 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:27:13 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 17.55batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "will" } tokens { name: "sit" } tokens { name: "in" } tokens { name: "the" } tokens { name: "parlor" } tokens { name: "awhile" } tokens { name: "and" } tokens { name: "collect" } tokens { name: "my" } tokens { name: "thoughts" }


[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:27:13 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:13 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:27:13 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:27:13 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 16.23batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "while" } tokens { name: "thus" } tokens { name: "dismissing" } tokens { name: "her" } tokens { name: "the" } tokens { name: "maiden" } tokens { name: "lady" } tokens { name: "stept" } tokens { name: "forward" } tokens { name: "kissed" } tokens { name: "phoebe" } tokens { name: "and" } tokens { name: "pressed" } tokens { name: "her" } tokens { name: "to" } tokens { name: "her" } tokens { name: "heart" } tokens { name: "which" } tokens { name: "beat" } tokens { name: "against" } tokens { name: "the" } tokens { name: "girl's" } tokens { name: "bosom" } tokens { name: "with" } tokens { name: "a" } tokens { name: "strong" } tokens { name: "high" } tokens { name: "and" } tokens { name: "tumultuous" } tokens { name: "swell" }


[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:13 punctuation_capitalization_infer_dataset:127] Max length: 39
[NeMo I 2023-11-12 08:27:13 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:13 data_preprocessing:406] Min: 37 |                  Max: 37 |                  Mean: 37.0 |                  Median: 37.0
[NeMo I 2023-11-12 08:27:13 data_preprocessing:412] 75 percentile: 37.00
[NeMo I 2023-11-12 08:27:13 data_preprocessing:413] 99 percentile: 37.00


100%|██████████| 1/1 [00:00<00:00, 15.24batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "at" } tokens { name: "some" } tokens { name: "uncertain" } tokens { name: "period" } tokens { name: "in" } tokens { name: "the" } tokens { name: "depths" } tokens { name: "of" } tokens { name: "night" } tokens { name: "and" } tokens { name: "as" } tokens { name: "it" } tokens { name: "were" } tokens { name: "through" } tokens { name: "the" } tokens { name: "thin" } tokens { name: "veil" } tokens { name: "of" } tokens { name: "a" } tokens { name: "dream" } tokens { name: "she" } tokens { name: "was" } tokens { name: "conscious" } tokens { name: "of" } tokens { name: "a" } tokens { name: "footstep" } tokens { name: "mounting" } tokens { name: "the" } tokens { name: "stairs" } tokens { name: "heavily" } tokens { name: "but" } tokens { name: "not" } tokens { name: "with" } tokens { name: "force" } tokens { name: "and" } tokens { name: "decision" }


[NeMo I 2023-11-12 08:27:14 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:27:14 punctuation_capitalization_infer_dataset:127] Max length: 40
[NeMo I 2023-11-12 08:27:14 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:27:14 data_preprocessing:406] Min: 38 |                  Max: 38 |                  Mean: 38.0 |                  Median: 38.0
[NeMo I 2023-11-12 08:27:14 data_preprocessing:412] 75 percentile: 38.00
[NeMo I 2023-11-12 08:27:14 data_preprocessing:413] 99 percentile: 38.00


100%|██████████| 1/1 [00:00<00:00, 14.28batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0004.flac
[{'entity_group': 'PER', 'score': 0.9990177, 'word': 'Phoebe', 'start': 0, 'end': 6}]
ner tagged text B-PER Phoebe E-PER wondered whose care and toil it could have been that had planted these vegetables and kept the soil so clean and orderly.
Emotion Labels ['HAPPY']
tagged transcription B-PER Phoebe E-PER wondered whose care and toil it could have been that had planted these vegetables and kept the soil so clean and orderly.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/2086/149220/2086-149220-0013.flac
[{'entity_group': 'LOC', 'score': 0.59396243, 'word': 'House of the', 'start': 211, 'end': 223}, {'entity_group': 'MISC', 'score': 0.49966738, 'word': 'Seven Gables', 'start': 224, 'end': 236}]
ner tagged text So wise as well as antique was their aspect as to give color to the idea, not merely that they were the descendants of a time honored race, but that they had existe

 NeMo-text-processing :: DEBUG    :: tokens { name: "long" } tokens { name: "ago" } tokens { name: "there" } tokens { name: "lived" } tokens { name: "a" } tokens { name: "merchant" } tokens { name: "who" } tokens { name: "had" } tokens { name: "three" } tokens { name: "daughters" }


Emotion Labels ['HAPPY']
tagged transcription This was a fountain set round with a rim of old mossy stones, and paved in its bed with what appeared to be a sort of mosaic work of variously colored pebbles.
ss /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124
segments ['/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0019.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0004.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0015.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0016.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0000.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0008.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0017.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/79

100%|██████████| 1/1 [00:00<00:00, 28.08batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "every" } tokens { name: "year" } tokens { name: "at" } tokens { name: "a" } tokens { name: "certain" } tokens { name: "day" } tokens { name: "of" } tokens { name: "a" } tokens { name: "certain" } tokens { name: "month" } tokens { name: "he" } tokens { name: "went" } tokens { name: "away" } tokens { name: "to" } tokens { name: "a" } tokens { name: "distant" } tokens { name: "city" } tokens { name: "to" } tokens { name: "collect" } tokens { name: "money" } tokens { name: "on" } tokens { name: "an" } tokens { name: "account" }


[NeMo I 2023-11-12 08:28:05 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:05 punctuation_capitalization_infer_dataset:127] Max length: 25
[NeMo I 2023-11-12 08:28:05 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:05 data_preprocessing:406] Min: 23 |                  Max: 23 |                  Mean: 23.0 |                  Median: 23.0
[NeMo I 2023-11-12 08:28:05 data_preprocessing:412] 75 percentile: 23.00
[NeMo I 2023-11-12 08:28:05 data_preprocessing:413] 99 percentile: 23.00


100%|██████████| 1/1 [00:00<00:00, 16.39batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "how" } tokens { name: "do" } tokens { name: "you" } tokens { name: "know" } tokens { name: "asked" } tokens { name: "their" } tokens { name: "father" } tokens { name: "i" } tokens { name: "am" } tokens { name: "older" } tokens { name: "and" } tokens { name: "wiser" } tokens { name: "than" } tokens { name: "you" } tokens { name: "are" } tokens { name: "and" } tokens { name: "i" } tokens { name: "know" } tokens { name: "that" } tokens { name: "there" } tokens { name: "are" } tokens { name: "many" } tokens { name: "evils" } tokens { name: "which" } tokens { name: "might" } tokens { name: "come" } tokens { name: "upon" } tokens { name: "you" }


[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_infer_dataset:127] Max length: 32
[NeMo I 2023-11-12 08:28:06 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:06 data_preprocessing:406] Min: 30 |                  Max: 30 |                  Mean: 30.0 |                  Median: 30.0
[NeMo I 2023-11-12 08:28:06 data_preprocessing:412] 75 percentile: 30.00
[NeMo I 2023-11-12 08:28:06 data_preprocessing:413] 99 percentile: 30.00


100%|██████████| 1/1 [00:00<00:00, 13.54batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "when" } tokens { name: "it" } tokens { name: "was" } tokens { name: "evening" } tokens { name: "he" } tokens { name: "led" } tokens { name: "his" } tokens { name: "band" } tokens { name: "into" } tokens { name: "a" } tokens { name: "nearby" } tokens { name: "street" } tokens { name: "and" } tokens { name: "in" } tokens { name: "his" } tokens { name: "disguise" } tokens { name: "approached" } tokens { name: "the" } tokens { name: "merchant's" } tokens { name: "house" } tokens { name: "he" } tokens { name: "knocked" } tokens { name: "at" } tokens { name: "the" } tokens { name: "door" }


[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_infer_dataset:127] Max length: 29
[NeMo I 2023-11-12 08:28:06 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:06 data_preprocessing:406] Min: 27 |                  Max: 27 |                  Mean: 27.0 |                  Median: 27.0
[NeMo I 2023-11-12 08:28:06 data_preprocessing:412] 75 percentile: 27.00
[NeMo I 2023-11-12 08:28:06 data_preprocessing:413] 99 percentile: 27.00


100%|██████████| 1/1 [00:00<00:00, 16.24batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "have" } tokens { name: "pity" } tokens { name: "upon" } tokens { name: "a" } tokens { name: "poor" } tokens { name: "unfortunate" } tokens { name: "one" } tokens { name: "he" } tokens { name: "called" } tokens { name: "out" }


[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:28:06 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:06 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:28:06 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:28:06 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 21.59batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "let" } tokens { name: "me" } tokens { name: "enter" } tokens { name: "i" } tokens { name: "pray" } tokens { name: "you" } tokens { name: "to" } tokens { name: "pass" } tokens { name: "the" } tokens { name: "night" } tokens { name: "under" } tokens { name: "your" } tokens { name: "roof" }


[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:28:06 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:06 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:28:06 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:28:06 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 21.24batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it's" } tokens { name: "surely" } tokens { name: "a" } tokens { name: "terrible" } tokens { name: "storm" } tokens { name: "outside" } tokens { name: "said" } tokens { name: "the" } tokens { name: "merchant's" } tokens { name: "eldest" } tokens { name: "daughter" } tokens { name: "as" } tokens { name: "the" } tokens { name: "wind" } tokens { name: "rattled" } tokens { name: "the" } tokens { name: "tiles" } tokens { name: "of" } tokens { name: "the" } tokens { name: "roof" } tokens { name: "and" } tokens { name: "the" } tokens { name: "rain" } tokens { name: "beat" } tokens { name: "in" } tokens { name: "torrents" } tokens { name: "against" } tokens { name: "the" } tokens { name: "doors" } tokens { name: "and" } tokens { name: "windows" }


[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:06 punctuation_capitalization_infer_dataset:127] Max length: 38
[NeMo I 2023-11-12 08:28:06 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:06 data_preprocessing:406] Min: 36 |                  Max: 36 |                  Mean: 36.0 |                  Median: 36.0
[NeMo I 2023-11-12 08:28:06 data_preprocessing:412] 75 percentile: 36.00
[NeMo I 2023-11-12 08:28:06 data_preprocessing:413] 99 percentile: 36.00


100%|██████████| 1/1 [00:00<00:00, 11.31batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "is" } tokens { name: "old" } tokens { name: "as" } tokens { name: "well" } tokens { name: "as" } tokens { name: "poor" } tokens { name: "she" } tokens { name: "said" }


[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_infer_dataset:127] Max length: 11
[NeMo I 2023-11-12 08:28:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:07 data_preprocessing:406] Min: 9 |                  Max: 9 |                  Mean: 9.0 |                  Median: 9.0
[NeMo I 2023-11-12 08:28:07 data_preprocessing:412] 75 percentile: 9.00
[NeMo I 2023-11-12 08:28:07 data_preprocessing:413] 99 percentile: 9.00


100%|██████████| 1/1 [00:00<00:00, 25.21batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "if" } tokens { name: "we" } tokens { name: "decide" } tokens { name: "to" } tokens { name: "show" } tokens { name: "mercy" } tokens { name: "to" } tokens { name: "this" } tokens { name: "poor" } tokens { name: "beggar" } tokens { name: "it" } tokens { name: "is" } tokens { name: "not" } tokens { name: "for" } tokens { name: "you" } tokens { name: "to" } tokens { name: "oppose" } tokens { name: "it" }


[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:28:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:07 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:28:07 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:28:07 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 16.14batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "bui" } tokens { name: "we" } tokens { name: "should" } tokens { name: "not" } tokens { name: "forget" } tokens { name: "our" } tokens { name: "promise" } tokens { name: "to" } tokens { name: "our" } tokens { name: "father" } tokens { name: "cried" } tokens { name: "the" } tokens { name: "youngest" } tokens { name: "daughter" }


[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:28:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:07 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:28:07 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:28:07 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 11.08batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "however" } tokens { name: "in" } tokens { name: "spite" } tokens { name: "of" } tokens { name: "all" } tokens { name: "she" } tokens { name: "could" } tokens { name: "say" } tokens { name: "the" } tokens { name: "elder" } tokens { name: "sisters" } tokens { name: "opened" } tokens { name: "the" } tokens { name: "door" } tokens { name: "and" } tokens { name: "admitted" } tokens { name: "the" } tokens { name: "beggar" }


[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:28:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:07 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:28:07 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:28:07 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 15.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "is" } tokens { name: "a" } tokens { name: "fearful" } tokens { name: "night" } tokens { name: "to" } tokens { name: "send" } tokens { name: "away" } tokens { name: "a" } tokens { name: "beggar" } tokens { name: "said" } tokens { name: "the" } tokens { name: "eldest" } tokens { name: "sister" } tokens { name: "while" } tokens { name: "they" } tokens { name: "were" } tokens { name: "eating" }


[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:07 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:28:07 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:07 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:28:07 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:28:07 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 15.20batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "while" } tokens { name: "they" } tokens { name: "were" } tokens { name: "talking" } tokens { name: "the" } tokens { name: "beggar" } tokens { name: "had" } tokens { name: "taken" } tokens { name: "the" } tokens { name: "apples" } tokens { name: "which" } tokens { name: "the" } tokens { name: "girls" } tokens { name: "were" } tokens { name: "to" } tokens { name: "eat" } tokens { name: "for" } tokens { name: "dessert" } tokens { name: "and" } tokens { name: "had" } tokens { name: "sprinkled" } tokens { name: "a" } tokens { name: "sleeping" } tokens { name: "powder" } tokens { name: "over" } tokens { name: "them" }


[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_infer_dataset:127] Max length: 31
[NeMo I 2023-11-12 08:28:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:08 data_preprocessing:406] Min: 29 |                  Max: 29 |                  Mean: 29.0 |                  Median: 29.0
[NeMo I 2023-11-12 08:28:08 data_preprocessing:412] 75 percentile: 29.00
[NeMo I 2023-11-12 08:28:08 data_preprocessing:413] 99 percentile: 29.00


100%|██████████| 1/1 [00:00<00:00,  8.40batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "two" } tokens { name: "eldest" } tokens { name: "ate" } tokens { name: "their" } tokens { name: "apples" } tokens { name: "but" } tokens { name: "the" } tokens { name: "youngest" } tokens { name: "could" } tokens { name: "not" } tokens { name: "eat" } tokens { name: "that" } tokens { name: "night" } tokens { name: "she" } tokens { name: "threw" } tokens { name: "the" } tokens { name: "apple" } tokens { name: "away" }


[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:28:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:08 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:28:08 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:28:08 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 16.08batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "she" } tokens { name: "did" } tokens { name: "not" } tokens { name: "stir" } tokens { name: "and" } tokens { name: "he" } tokens { name: "knew" } tokens { name: "that" } tokens { name: "the" } tokens { name: "sleeping" } tokens { name: "powder" } tokens { name: "had" } tokens { name: "thoroughly" } tokens { name: "done" } tokens { name: "its" } tokens { name: "work" }


[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:28:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:08 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:28:08 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:28:08 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 20.67batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "then" } tokens { name: "she" } tokens { name: "heard" } tokens { name: "him" } tokens { name: "go" } tokens { name: "down" } tokens { name: "the" } tokens { name: "stairway" } tokens { name: "and" } tokens { name: "unbolt" } tokens { name: "the" } tokens { name: "heavy" } tokens { name: "doors" } tokens { name: "which" } tokens { name: "led" } tokens { name: "into" } tokens { name: "the" } tokens { name: "store" }


[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:28:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:08 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:28:08 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:28:08 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 20.70batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "was" } tokens { name: "the" } tokens { name: "youngest" } tokens { name: "one" } tokens { name: "who" } tokens { name: "deceived" } tokens { name: "me" } tokens { name: "cried" } tokens { name: "the" } tokens { name: "robber" } tokens { name: "chieftain" }


[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:08 punctuation_capitalization_infer_dataset:127] Max length: 16
[NeMo I 2023-11-12 08:28:08 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:08 data_preprocessing:406] Min: 14 |                  Max: 14 |                  Mean: 14.0 |                  Median: 14.0
[NeMo I 2023-11-12 08:28:08 data_preprocessing:412] 75 percentile: 14.00
[NeMo I 2023-11-12 08:28:08 data_preprocessing:413] 99 percentile: 14.00


100%|██████████| 1/1 [00:00<00:00, 15.19batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "perhaps" } tokens { name: "you" } tokens { name: "can" } tokens { name: "outwit" } tokens { name: "her" } tokens { name: "yet" } tokens { name: "cried" } tokens { name: "another" }


[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:28:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:09 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:28:09 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:28:09 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 28.35batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "merchant's" } tokens { name: "daughter" } tokens { name: "at" } tokens { name: "first" } tokens { name: "did" } tokens { name: "not" } tokens { name: "answer" } tokens { name: "but" } tokens { name: "as" } tokens { name: "he" } tokens { name: "kept" } tokens { name: "on" } tokens { name: "calling" } tokens { name: "to" } tokens { name: "her" } tokens { name: "she" } tokens { name: "finally" } tokens { name: "asked" } tokens { name: "him" } tokens { name: "what" } tokens { name: "it" } tokens { name: "was" } tokens { name: "that" } tokens { name: "he" } tokens { name: "wanted" }


[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_infer_dataset:127] Max length: 30
[NeMo I 2023-11-12 08:28:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:09 data_preprocessing:406] Min: 28 |                  Max: 28 |                  Mean: 28.0 |                  Median: 28.0
[NeMo I 2023-11-12 08:28:09 data_preprocessing:412] 75 percentile: 28.00
[NeMo I 2023-11-12 08:28:09 data_preprocessing:413] 99 percentile: 28.00


100%|██████████| 1/1 [00:00<00:00, 17.72batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "promise" } tokens { name: "you" } tokens { name: "i" } tokens { name: "will" } tokens { name: "do" } tokens { name: "you" } tokens { name: "no" } tokens { name: "harm" }


[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_infer_dataset:127] Max length: 11
[NeMo I 2023-11-12 08:28:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:09 data_preprocessing:406] Min: 9 |                  Max: 9 |                  Mean: 9.0 |                  Median: 9.0
[NeMo I 2023-11-12 08:28:09 data_preprocessing:412] 75 percentile: 9.00
[NeMo I 2023-11-12 08:28:09 data_preprocessing:413] 99 percentile: 9.00


100%|██████████| 1/1 [00:00<00:00, 25.36batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "you" } tokens { name: "shall" } tokens { name: "not" } tokens { name: "come" } tokens { name: "into" } tokens { name: "my" } tokens { name: "father's" } tokens { name: "house" }


[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:28:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:09 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:28:09 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:28:09 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 28.51batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "pass" } tokens { name: "the" } tokens { name: "charm" } tokens { name: "out" } tokens { name: "to" } tokens { name: "me" } tokens { name: "then" } tokens { name: "said" } tokens { name: "the" } tokens { name: "robber" }


[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:28:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:09 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:28:09 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:28:09 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 22.07batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "when" } tokens { name: "she" } tokens { name: "returned" } tokens { name: "his" } tokens { name: "hand" } tokens { name: "was" } tokens { name: "sticking" } tokens { name: "through" } tokens { name: "the" } tokens { name: "hole" } tokens { name: "in" } tokens { name: "the" } tokens { name: "door" }


[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:28:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:09 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:28:09 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:28:09 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 27.72batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "cries" } tokens { name: "and" } tokens { name: "curses" } tokens { name: "of" } tokens { name: "the" } tokens { name: "robbers" } tokens { name: "filled" } tokens { name: "the" } tokens { name: "air" }


[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:09 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:28:09 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:09 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:28:09 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:28:09 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 21.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "they" } tokens { name: "tried" } tokens { name: "in" } tokens { name: "vain" } tokens { name: "to" } tokens { name: "break" } tokens { name: "down" } tokens { name: "the" } tokens { name: "great" } tokens { name: "doors" }


[NeMo I 2023-11-12 08:28:10 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:10 punctuation_capitalization_infer_dataset:127] Max length: 12
[NeMo I 2023-11-12 08:28:10 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:10 data_preprocessing:406] Min: 10 |                  Max: 10 |                  Mean: 10.0 |                  Median: 10.0
[NeMo I 2023-11-12 08:28:10 data_preprocessing:412] 75 percentile: 10.00
[NeMo I 2023-11-12 08:28:10 data_preprocessing:413] 99 percentile: 10.00


100%|██████████| 1/1 [00:00<00:00, 21.06batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "all" } tokens { name: "my" } tokens { name: "worries" } tokens { name: "about" } tokens { name: "you" } tokens { name: "were" } tokens { name: "foolish" }


[NeMo I 2023-11-12 08:28:10 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:10 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:28:10 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:10 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:28:10 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:28:10 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 16.46batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0019.flac
[]
ner tagged text I promise you I will do you no harm.
Emotion Labels ['HAPPY']
tagged transcription I promise you I will do you no harm.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0004.flac
[]
ner tagged text Have pity upon a poor unfortunate one he called out.
Emotion Labels ['ANGRY']
tagged transcription Have pity upon a poor unfortunate one he called out.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0015.flac
[]
ner tagged text Then she heard him go down the stairway and unbolt the heavy doors which led into the store.
Emotion Labels ['NEUTRAL']
tagged transcription Then she heard him go down the stairway and unbolt the heavy doors which led into the store.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110124/7976-110124-0016.flac
[]
ner tagged text It was the youngest one wh

 NeMo-text-processing :: DEBUG    :: tokens { name: "grant" } tokens { name: "was" } tokens { name: "only" } tokens { name: "a" } tokens { name: "few" } tokens { name: "miles" } tokens { name: "away" } tokens { name: "but" } tokens { name: "although" } tokens { name: "commander" } tokens { name: "in" } tokens { name: "chief" } tokens { name: "he" } tokens { name: "knew" } tokens { name: "nothing" } tokens { name: "of" } tokens { name: "the" } tokens { name: "hardest" } tokens { name: "fought" } tokens { name: "battle" } tokens { name: "of" } tokens { name: "the" } tokens { name: "civil" } tokens { name: "war" } tokens { name: "until" } tokens { name: "it" } tokens { name: "was" } tokens { name: "over" }


Emotion Labels ['HAPPY']
tagged transcription The cries and curses of the robbers filled the air.
ss /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575
segments ['/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0020.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575.trans.txt', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0017.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0008.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0025.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0002.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0019.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0013.flac', '/n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0004.f

100%|██████████| 1/1 [00:00<00:00, 17.77batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "my" } tokens { name: "own" } tokens { name: "regiment" } tokens { name: "was" } tokens { name: "in" } tokens { name: "the" } tokens { name: "advance" }


[NeMo I 2023-11-12 08:28:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:26 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:28:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:26 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:28:26 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:28:26 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 30.18batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "our" } tokens { name: "brigade" } tokens { name: "was" } tokens { name: "fearfully" } tokens { name: "outnumbered" }


[NeMo I 2023-11-12 08:28:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:26 punctuation_capitalization_infer_dataset:127] Max length: 8
[NeMo I 2023-11-12 08:28:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:26 data_preprocessing:406] Min: 6 |                  Max: 6 |                  Mean: 6.0 |                  Median: 6.0
[NeMo I 2023-11-12 08:28:26 data_preprocessing:412] 75 percentile: 6.00
[NeMo I 2023-11-12 08:28:26 data_preprocessing:413] 99 percentile: 6.00


100%|██████████| 1/1 [00:00<00:00, 24.78batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "there" } tokens { name: "were" } tokens { name: "no" } tokens { name: "breastworks" } tokens { name: "yet" } tokens { name: "that" } tokens { name: "one" } tokens { name: "little" } tokens { name: "brigade" } tokens { name: "of" } tokens { name: "hamilton's" } tokens { name: "division" } tokens { name: "stood" } tokens { name: "there" } tokens { name: "in" } tokens { name: "the" } tokens { name: "open" } tokens { name: "and" } tokens { name: "repulsed" } tokens { name: "assault" } tokens { name: "after" } tokens { name: "assault" }


[NeMo I 2023-11-12 08:28:26 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:26 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:28:26 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:26 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:28:26 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:28:26 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 16.07batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "not" } tokens { name: "balaklava" } tokens { name: "nor" } tokens { name: "the" } tokens { name: "alma" } tokens { name: "saw" } tokens { name: "such" } tokens { name: "fighting" } tokens { name: "it" } tokens { name: "was" } tokens { name: "a" } tokens { name: "duel" } tokens { name: "to" } tokens { name: "the" } tokens { name: "death" }


[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:28:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:27 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:28:27 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:28:27 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 15.50batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "no" } tokens { name: "battery" } tokens { name: "in" } tokens { name: "the" } tokens { name: "whole" } tokens { name: "four" } tokens { name: "years" } tokens { name: "war" } tokens { name: "lost" } tokens { name: "so" } tokens { name: "many" } tokens { name: "men" } tokens { name: "in" } tokens { name: "so" } tokens { name: "short" } tokens { name: "a" } tokens { name: "time" }


[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_infer_dataset:127] Max length: 19
[NeMo I 2023-11-12 08:28:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:27 data_preprocessing:406] Min: 17 |                  Max: 17 |                  Mean: 17.0 |                  Median: 17.0
[NeMo I 2023-11-12 08:28:27 data_preprocessing:412] 75 percentile: 17.00
[NeMo I 2023-11-12 08:28:27 data_preprocessing:413] 99 percentile: 17.00


100%|██████████| 1/1 [00:00<00:00, 18.38batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "one" } tokens { name: "daring" } tokens { name: "rebel" } tokens { name: "was" } tokens { name: "shot" } tokens { name: "down" } tokens { name: "and" } tokens { name: "bayoneted" } tokens { name: "clear" } tokens { name: "behind" } tokens { name: "the" } tokens { name: "line" } tokens { name: "of" } tokens { name: "company" } tokens { name: "b" } tokens { name: "where" } tokens { name: "he" } tokens { name: "had" } tokens { name: "broken" } tokens { name: "through" } tokens { name: "to" } tokens { name: "seize" } tokens { name: "the" } tokens { name: "flag" } tokens { name: "of" } tokens { name: "my" } tokens { name: "regiment" }


[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_infer_dataset:127] Max length: 31
[NeMo I 2023-11-12 08:28:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:27 data_preprocessing:406] Min: 29 |                  Max: 29 |                  Mean: 29.0 |                  Median: 29.0
[NeMo I 2023-11-12 08:28:27 data_preprocessing:412] 75 percentile: 29.00
[NeMo I 2023-11-12 08:28:27 data_preprocessing:413] 99 percentile: 29.00


100%|██████████| 1/1 [00:00<00:00, 16.84batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "that" } tokens { name: "night" } tokens { name: "the" } tokens { name: "enemy" } tokens { name: "slipped" } tokens { name: "away" } tokens { name: "leaving" } tokens { name: "hundreds" } tokens { name: "and" } tokens { name: "hundreds" } tokens { name: "of" } tokens { name: "his" } tokens { name: "dead" } tokens { name: "and" } tokens { name: "wounded" } tokens { name: "on" } tokens { name: "the" } tokens { name: "field" }


[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:28:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:27 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:28:27 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:28:27 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 15.86batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "with" } tokens { name: "a" } tokens { name: "few" } tokens { name: "lanterns" } tokens { name: "our" } tokens { name: "men" } tokens { name: "then" } tokens { name: "went" } tokens { name: "about" } tokens { name: "and" } tokens { name: "tried" } tokens { name: "to" } tokens { name: "gather" } tokens { name: "up" } tokens { name: "the" } tokens { name: "wounded" } tokens { name: "the" } tokens { name: "dead" } tokens { name: "were" } tokens { name: "left" } tokens { name: "till" } tokens { name: "morning" }


[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_infer_dataset:127] Max length: 24
[NeMo I 2023-11-12 08:28:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:27 data_preprocessing:406] Min: 22 |                  Max: 22 |                  Mean: 22.0 |                  Median: 22.0
[NeMo I 2023-11-12 08:28:27 data_preprocessing:412] 75 percentile: 22.00
[NeMo I 2023-11-12 08:28:27 data_preprocessing:413] 99 percentile: 22.00


100%|██████████| 1/1 [00:00<00:00, 18.34batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "was" } tokens { name: "not" } tokens { name: "a" } tokens { name: "question" } tokens { name: "who" } tokens { name: "was" } tokens { name: "dead" } tokens { name: "or" } tokens { name: "wounded" } tokens { name: "but" } tokens { name: "who" } tokens { name: "was" } tokens { name: "not" }


[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:27 punctuation_capitalization_infer_dataset:127] Max length: 16
[NeMo I 2023-11-12 08:28:27 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:27 data_preprocessing:406] Min: 14 |                  Max: 14 |                  Mean: 14.0 |                  Median: 14.0
[NeMo I 2023-11-12 08:28:27 data_preprocessing:412] 75 percentile: 14.00
[NeMo I 2023-11-12 08:28:27 data_preprocessing:413] 99 percentile: 14.00


100%|██████████| 1/1 [00:00<00:00, 18.79batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "fifteen" } tokens { name: "officers" } tokens { name: "of" } tokens { name: "our" } tokens { name: "little" } tokens { name: "half" } tokens { name: "regiment" } tokens { name: "were" } tokens { name: "dead" } tokens { name: "or" } tokens { name: "wounded" }


[NeMo I 2023-11-12 08:28:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:28 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:28:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:28 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:28:28 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:28:28 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 22.54batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "remained" } tokens { name: "awake" } tokens { name: "all" } tokens { name: "night" } tokens { name: "talking" } tokens { name: "with" } tokens { name: "a" } tokens { name: "comrade" } tokens { name: "who" } tokens { name: "shared" } tokens { name: "my" } tokens { name: "blanket" } tokens { name: "with" } tokens { name: "me" } tokens { name: "poor" } tokens { name: "jimmy" } tokens { name: "king" }


[NeMo I 2023-11-12 08:28:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:28 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:28:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:28 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:28:28 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:28:28 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 15.04batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "survived" } tokens { name: "the" } tokens { name: "war" } tokens { name: "only" } tokens { name: "to" } tokens { name: "be" } tokens { name: "murdered" } tokens { name: "later" } tokens { name: "on" } tokens { name: "a" } tokens { name: "plantation" } tokens { name: "in" } tokens { name: "mississippi" }


[NeMo I 2023-11-12 08:28:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:28 punctuation_capitalization_infer_dataset:127] Max length: 16
[NeMo I 2023-11-12 08:28:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:28 data_preprocessing:406] Min: 14 |                  Max: 14 |                  Mean: 14.0 |                  Median: 14.0
[NeMo I 2023-11-12 08:28:28 data_preprocessing:412] 75 percentile: 14.00
[NeMo I 2023-11-12 08:28:28 data_preprocessing:413] 99 percentile: 14.00


100%|██████████| 1/1 [00:00<00:00, 10.02batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "when" } tokens { name: "morning" } tokens { name: "came" } tokens { name: "the" } tokens { name: "firing" } tokens { name: "opened" } tokens { name: "and" } tokens { name: "for" } tokens { name: "all" } tokens { name: "that" } tokens { name: "day" } tokens { name: "the" } tokens { name: "battle" } tokens { name: "raged" } tokens { name: "fiercely" } tokens { name: "at" } tokens { name: "the" } tokens { name: "left" } tokens { name: "and" } tokens { name: "center" } tokens { name: "left" } tokens { name: "we" } tokens { name: "getting" } tokens { name: "the" } tokens { name: "worst" } tokens { name: "of" } tokens { name: "it" } tokens { name: "too" }


[NeMo I 2023-11-12 08:28:28 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:28 punctuation_capitalization_infer_dataset:127] Max length: 30
[NeMo I 2023-11-12 08:28:28 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:28 data_preprocessing:406] Min: 28 |                  Max: 28 |                  Mean: 28.0 |                  Median: 28.0
[NeMo I 2023-11-12 08:28:28 data_preprocessing:412] 75 percentile: 28.00
[NeMo I 2023-11-12 08:28:28 data_preprocessing:413] 99 percentile: 28.00


100%|██████████| 1/1 [00:00<00:00, 14.63batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "that" } tokens { name: "evening" } tokens { name: "an" } tokens { name: "order" } tokens { name: "came" } tokens { name: "for" } tokens { name: "us" } tokens { name: "hamilton's" } tokens { name: "division" } tokens { name: "to" } tokens { name: "assault" } tokens { name: "the" } tokens { name: "enemy's" } tokens { name: "left" } tokens { name: "flank" } tokens { name: "at" } tokens { name: "midnight" }


[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_infer_dataset:127] Max length: 23
[NeMo I 2023-11-12 08:28:29 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:29 data_preprocessing:406] Min: 21 |                  Max: 21 |                  Mean: 21.0 |                  Median: 21.0
[NeMo I 2023-11-12 08:28:29 data_preprocessing:412] 75 percentile: 21.00
[NeMo I 2023-11-12 08:28:29 data_preprocessing:413] 99 percentile: 21.00


100%|██████████| 1/1 [00:00<00:00, 15.76batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "under" } tokens { name: "the" } tokens { name: "same" } tokens { name: "quiet" } tokens { name: "moonlight" } tokens { name: "and" } tokens { name: "only" } tokens { name: "six" } tokens { name: "hundred" } tokens { name: "yards" } tokens { name: "away" } tokens { name: "from" } tokens { name: "us" } tokens { name: "also" } tokens { name: "lay" } tokens { name: "the" } tokens { name: "victorious" } tokens { name: "rebel" } tokens { name: "army" }


[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:28:29 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:29 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:28:29 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:28:29 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 18.59batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "once" } tokens { name: "in" } tokens { name: "the" } tokens { name: "night" } tokens { name: "i" } tokens { name: "slipped" } tokens { name: "away" } tokens { name: "from" } tokens { name: "the" } tokens { name: "bivouac" } tokens { name: "and" } tokens { name: "hurried" } tokens { name: "to" } tokens { name: "the" } tokens { name: "old" } tokens { name: "tishimingo" } tokens { name: "hotel" } tokens { name: "to" } tokens { name: "see" } tokens { name: "a" } tokens { name: "lieutenant" } tokens { name: "of" } tokens { name: "my" } tokens { name: "company" } tokens { name: "who" } tokens { name: "had" } tokens { name: "been" } tokens { name: "shot" } tokens { name: "through" } tokens { name: "the" } tokens { name: "breast" }


[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_infer_dataset:127] Max length: 39
[NeMo I 2023-11-12 08:28:29 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:29 data_preprocessing:406] Min: 37 |                  Max: 37 |                  Mean: 37.0 |                  Median: 37.0
[NeMo I 2023-11-12 08:28:29 data_preprocessing:412] 75 percentile: 37.00
[NeMo I 2023-11-12 08:28:29 data_preprocessing:413] 99 percentile: 37.00


100%|██████████| 1/1 [00:00<00:00, 13.60batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "could" } tokens { name: "not" } tokens { name: "help" } tokens { name: "my" } tokens { name: "friend" }


[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_infer_dataset:127] Max length: 8
[NeMo I 2023-11-12 08:28:29 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:29 data_preprocessing:406] Min: 6 |                  Max: 6 |                  Mean: 6.0 |                  Median: 6.0
[NeMo I 2023-11-12 08:28:29 data_preprocessing:412] 75 percentile: 6.00
[NeMo I 2023-11-12 08:28:29 data_preprocessing:413] 99 percentile: 6.00


100%|██████████| 1/1 [00:00<00:00, 27.66batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "go" } tokens { name: "back" } tokens { name: "to" } tokens { name: "the" } tokens { name: "regiment" } tokens { name: "he" } tokens { name: "said" } tokens { name: "smiling" } tokens { name: "all" } tokens { name: "will" } tokens { name: "be" } tokens { name: "needed" }


[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:28:29 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:29 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:28:29 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:28:29 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 13.73batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "my" } tokens { name: "friend" } tokens { name: "with" } tokens { name: "many" } tokens { name: "others" } tokens { name: "was" } tokens { name: "being" } tokens { name: "carried" } tokens { name: "out" } tokens { name: "to" } tokens { name: "die" } tokens { name: "elsewhere" }


[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:28:29 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:29 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:28:29 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:28:29 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 22.72batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "i" } tokens { name: "hastened" } tokens { name: "back" } tokens { name: "to" } tokens { name: "the" } tokens { name: "lines" }


[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:29 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:28:29 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:29 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:28:29 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:28:29 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 18.70batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "cloud" } tokens { name: "of" } tokens { name: "rebels" } tokens { name: "we" } tokens { name: "had" } tokens { name: "seen" } tokens { name: "divided" } tokens { name: "itself" } tokens { name: "into" } tokens { name: "three" } tokens { name: "columns" }


[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:28:30 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:30 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:28:30 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:28:30 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 20.79batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "a" } tokens { name: "perfect" } tokens { name: "blaze" } tokens { name: "of" } tokens { name: "close" } tokens { name: "range" } tokens { name: "musketry" } tokens { name: "too" } tokens { name: "mowed" } tokens { name: "them" } tokens { name: "down" } tokens { name: "like" } tokens { name: "grass" }


[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:28:30 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:30 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:28:30 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:28:30 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 17.53batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "they" } tokens { name: "lay" } tokens { name: "in" } tokens { name: "heaps" } tokens { name: "of" } tokens { name: "dozens" } tokens { name: "even" } tokens { name: "close" } tokens { name: "up" } tokens { name: "to" } tokens { name: "the" } tokens { name: "works" }


[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_infer_dataset:127] Max length: 15
[NeMo I 2023-11-12 08:28:30 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:30 data_preprocessing:406] Min: 13 |                  Max: 13 |                  Mean: 13.0 |                  Median: 13.0
[NeMo I 2023-11-12 08:28:30 data_preprocessing:412] 75 percentile: 13.00
[NeMo I 2023-11-12 08:28:30 data_preprocessing:413] 99 percentile: 13.00


100%|██████████| 1/1 [00:00<00:00, 26.43batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "that" } tokens { name: "night" } tokens { name: "i" } tokens { name: "stood" } tokens { name: "guard" } tokens { name: "under" } tokens { name: "an" } tokens { name: "oak" } tokens { name: "tree" } tokens { name: "on" } tokens { name: "the" } tokens { name: "battlefield" } tokens { name: "among" } tokens { name: "the" } tokens { name: "unburied" } tokens { name: "dead" }


[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:28:30 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:30 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:28:30 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:28:30 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 14.03batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "indeed" } tokens { name: "we" } tokens { name: "of" } tokens { name: "the" } tokens { name: "rank" } tokens { name: "and" } tokens { name: "file" } tokens { name: "had" } tokens { name: "little" } tokens { name: "confidence" } tokens { name: "in" } tokens { name: "grant" } tokens { name: "in" } tokens { name: "those" } tokens { name: "days" }


[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_infer_dataset:127] Max length: 17
[NeMo I 2023-11-12 08:28:30 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:30 data_preprocessing:406] Min: 15 |                  Max: 15 |                  Mean: 15.0 |                  Median: 15.0
[NeMo I 2023-11-12 08:28:30 data_preprocessing:412] 75 percentile: 15.00
[NeMo I 2023-11-12 08:28:30 data_preprocessing:413] 99 percentile: 15.00


100%|██████████| 1/1 [00:00<00:00, 18.28batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "rosecrans" } tokens { name: "protested" } tokens { name: "it" } tokens { name: "was" } tokens { name: "in" } tokens { name: "vain" }


[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:30 punctuation_capitalization_infer_dataset:127] Max length: 10
[NeMo I 2023-11-12 08:28:30 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:30 data_preprocessing:406] Min: 8 |                  Max: 8 |                  Mean: 8.0 |                  Median: 8.0
[NeMo I 2023-11-12 08:28:30 data_preprocessing:412] 75 percentile: 8.00
[NeMo I 2023-11-12 08:28:30 data_preprocessing:413] 99 percentile: 8.00


100%|██████████| 1/1 [00:00<00:00, 32.79batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "it" } tokens { name: "required" } tokens { name: "months" } tokens { name: "and" } tokens { name: "great" } tokens { name: "events" } tokens { name: "to" } tokens { name: "make" } tokens { name: "grant" } tokens { name: "the" } tokens { name: "hero" } tokens { name: "of" } tokens { name: "the" } tokens { name: "army" } tokens { name: "which" } tokens { name: "he" } tokens { name: "afterward" } tokens { name: "became" }


[NeMo I 2023-11-12 08:28:31 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:31 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:28:31 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:31 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:28:31 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:28:31 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 12.61batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "for" } tokens { name: "some" } tokens { name: "reason" } tokens { name: "the" } tokens { name: "dead" } tokens { name: "at" } tokens { name: "hatchie" } tokens { name: "bridge" } tokens { name: "were" } tokens { name: "not" } tokens { name: "buried" }


[NeMo I 2023-11-12 08:28:31 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:31 punctuation_capitalization_infer_dataset:127] Max length: 14
[NeMo I 2023-11-12 08:28:31 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:31 data_preprocessing:406] Min: 12 |                  Max: 12 |                  Mean: 12.0 |                  Median: 12.0
[NeMo I 2023-11-12 08:28:31 data_preprocessing:412] 75 percentile: 12.00
[NeMo I 2023-11-12 08:28:31 data_preprocessing:413] 99 percentile: 12.00


100%|██████████| 1/1 [00:00<00:00, 18.22batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "a" } tokens { name: "week" } tokens { name: "after" } tokens { name: "the" } tokens { name: "battle" } tokens { name: "my" } tokens { name: "brother" } tokens { name: "rode" } tokens { name: "by" } tokens { name: "there" } tokens { name: "on" } tokens { name: "a" } tokens { name: "cavalry" } tokens { name: "expedition" } tokens { name: "and" } tokens { name: "made" } tokens { name: "the" } tokens { name: "horrible" } tokens { name: "discovery" } tokens { name: "that" } tokens { name: "hogs" } tokens { name: "were" } tokens { name: "eating" } tokens { name: "up" } tokens { name: "the" } tokens { name: "bodies" } tokens { name: "of" } tokens { name: "our" } tokens { name: "dead" } tokens { name: "heroes" } tokens { name: "that" } tokens { name: "too" } tokens { name: "was" } tokens { name: "war" }


[NeMo I 2023-11-12 08:28:31 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:31 punctuation_capitalization_infer_dataset:127] Max length: 37
[NeMo I 2023-11-12 08:28:31 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:31 data_preprocessing:406] Min: 35 |                  Max: 35 |                  Mean: 35.0 |                  Median: 35.0
[NeMo I 2023-11-12 08:28:31 data_preprocessing:412] 75 percentile: 35.00
[NeMo I 2023-11-12 08:28:31 data_preprocessing:413] 99 percentile: 35.00


100%|██████████| 1/1 [00:00<00:00, 14.86batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0020.flac
[{'entity_group': 'PER', 'score': 0.31028217, 'word': 'I', 'start': 0, 'end': 1}]
ner tagged text B-PER I E-PER hastened back to the lines.
Emotion Labels ['ANGRY']
tagged transcription B-PER I E-PER hastened back to the lines.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0017.flac
[]
ner tagged text I could not help my friend.
Emotion Labels ['NEUTRAL']
tagged transcription I could not help my friend.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/105575/7976-105575-0008.flac
[]
ner tagged text With a few lanterns, our men then went about and tried to gather up the wounded. the dead were left till morning.
Emotion Labels ['HAPPY']
tagged transcription With a few lanterns, our men then went about and tried to gather up the wounded. the dead were left till morning.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clea

 NeMo-text-processing :: DEBUG    :: tokens { name: "he" } tokens { name: "had" } tokens { name: "little" } tokens { name: "enough" } tokens { name: "to" } tokens { name: "break" } tokens { name: "or" } tokens { name: "bite" } tokens { name: "and" } tokens { name: "once" } tokens { name: "when" } tokens { name: "there" } tokens { name: "was" } tokens { name: "a" } tokens { name: "great" } tokens { name: "famine" } tokens { name: "in" } tokens { name: "the" } tokens { name: "land" } tokens { name: "he" } tokens { name: "could" } tokens { name: "hardly" } tokens { name: "procure" } tokens { name: "even" } tokens { name: "his" } tokens { name: "daily" } tokens { name: "bread" } tokens { name: "and" } tokens { name: "as" } tokens { name: "he" } tokens { name: "lay" } tokens { name: "thinking" } tokens { name: "in" } tokens { name: "his" } tokens { name: "bed" } tokens { name: "one" } tokens { name: "night" } tokens { name: "he" } tokens { name: "sighed" } tokens { name: "and" } tokens { na

[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_infer_dataset:127] Max length: 52
[NeMo I 2023-11-12 08:28:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:53 data_preprocessing:406] Min: 50 |                  Max: 50 |                  Mean: 50.0 |                  Median: 50.0
[NeMo I 2023-11-12 08:28:53 data_preprocessing:412] 75 percentile: 50.00
[NeMo I 2023-11-12 08:28:53 data_preprocessing:413] 99 percentile: 50.00


100%|██████████| 1/1 [00:00<00:00, 16.27batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "how" } tokens { name: "can" } tokens { name: "we" } tokens { name: "feed" } tokens { name: "our" } tokens { name: "children" } tokens { name: "when" } tokens { name: "we" } tokens { name: "have" } tokens { name: "no" } tokens { name: "more" } tokens { name: "than" } tokens { name: "we" } tokens { name: "can" } tokens { name: "eat" } tokens { name: "ourselves" }


[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_infer_dataset:127] Max length: 18
[NeMo I 2023-11-12 08:28:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:53 data_preprocessing:406] Min: 16 |                  Max: 16 |                  Mean: 16.0 |                  Median: 16.0
[NeMo I 2023-11-12 08:28:53 data_preprocessing:412] 75 percentile: 16.00
[NeMo I 2023-11-12 08:28:53 data_preprocessing:413] 99 percentile: 16.00


100%|██████████| 1/1 [00:00<00:00, 22.72batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "oh" } tokens { name: "you" } tokens { name: "simpleton" } tokens { name: "said" } tokens { name: "she" } tokens { name: "then" } tokens { name: "we" } tokens { name: "must" } tokens { name: "all" } tokens { name: "four" } tokens { name: "die" } tokens { name: "of" } tokens { name: "hunger" } tokens { name: "you" } tokens { name: "had" } tokens { name: "better" } tokens { name: "plane" } tokens { name: "the" } tokens { name: "coffins" } tokens { name: "for" } tokens { name: "us" }


[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_infer_dataset:127] Max length: 25
[NeMo I 2023-11-12 08:28:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:53 data_preprocessing:406] Min: 23 |                  Max: 23 |                  Mean: 23.0 |                  Median: 23.0
[NeMo I 2023-11-12 08:28:53 data_preprocessing:412] 75 percentile: 23.00
[NeMo I 2023-11-12 08:28:53 data_preprocessing:413] 99 percentile: 23.00


100%|██████████| 1/1 [00:00<00:00, 16.41batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "but" } tokens { name: "she" } tokens { name: "left" } tokens { name: "him" } tokens { name: "no" } tokens { name: "peace" } tokens { name: "till" } tokens { name: "he" } tokens { name: "consented" } tokens { name: "saying" } tokens { name: "ah" } tokens { name: "but" } tokens { name: "i" } tokens { name: "shall" } tokens { name: "miss" } tokens { name: "the" } tokens { name: "poor" } tokens { name: "children" }


[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_infer_dataset:127] Max length: 21
[NeMo I 2023-11-12 08:28:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:53 data_preprocessing:406] Min: 19 |                  Max: 19 |                  Mean: 19.0 |                  Median: 19.0
[NeMo I 2023-11-12 08:28:53 data_preprocessing:412] 75 percentile: 19.00
[NeMo I 2023-11-12 08:28:53 data_preprocessing:413] 99 percentile: 19.00


100%|██████████| 1/1 [00:00<00:00, 16.72batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "and" } tokens { name: "as" } tokens { name: "soon" } tokens { name: "as" } tokens { name: "their" } tokens { name: "parents" } tokens { name: "had" } tokens { name: "gone" } tokens { name: "to" } tokens { name: "sleep" } tokens { name: "he" } tokens { name: "got" } tokens { name: "up" } tokens { name: "put" } tokens { name: "on" } tokens { name: "his" } tokens { name: "coat" } tokens { name: "and" } tokens { name: "unbarring" } tokens { name: "the" } tokens { name: "back" } tokens { name: "door" } tokens { name: "went" } tokens { name: "out" }


[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:53 punctuation_capitalization_infer_dataset:127] Max length: 28
[NeMo I 2023-11-12 08:28:53 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:53 data_preprocessing:406] Min: 26 |                  Max: 26 |                  Mean: 26.0 |                  Median: 26.0
[NeMo I 2023-11-12 08:28:53 data_preprocessing:412] 75 percentile: 26.00
[NeMo I 2023-11-12 08:28:53 data_preprocessing:413] 99 percentile: 26.00


100%|██████████| 1/1 [00:00<00:00, 15.36batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "ah" } tokens { name: "father" } tokens { name: "said" } tokens { name: "hansel" } tokens { name: "i" } tokens { name: "am" } tokens { name: "looking" } tokens { name: "at" } tokens { name: "my" } tokens { name: "white" } tokens { name: "cat" } tokens { name: "sitting" } tokens { name: "upon" } tokens { name: "the" } tokens { name: "roof" } tokens { name: "of" } tokens { name: "the" } tokens { name: "house" } tokens { name: "and" } tokens { name: "trying" } tokens { name: "to" } tokens { name: "say" } tokens { name: "good" } tokens { name: "bye" }


[NeMo I 2023-11-12 08:28:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:54 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:28:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:54 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:28:54 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:28:54 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 16.36batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "but" } tokens { name: "in" } tokens { name: "reality" } tokens { name: "hansel" } tokens { name: "was" } tokens { name: "not" } tokens { name: "looking" } tokens { name: "at" } tokens { name: "a" } tokens { name: "cat" } tokens { name: "but" } tokens { name: "every" } tokens { name: "time" } tokens { name: "he" } tokens { name: "stopped" } tokens { name: "he" } tokens { name: "dropped" } tokens { name: "a" } tokens { name: "pebble" } tokens { name: "out" } tokens { name: "of" } tokens { name: "his" } tokens { name: "pocket" } tokens { name: "upon" } tokens { name: "the" } tokens { name: "path" }


[NeMo I 2023-11-12 08:28:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:54 punctuation_capitalization_infer_dataset:127] Max length: 30
[NeMo I 2023-11-12 08:28:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:54 data_preprocessing:406] Min: 28 |                  Max: 28 |                  Mean: 28.0 |                  Median: 28.0
[NeMo I 2023-11-12 08:28:54 data_preprocessing:412] 75 percentile: 28.00
[NeMo I 2023-11-12 08:28:54 data_preprocessing:413] 99 percentile: 28.00


100%|██████████| 1/1 [00:00<00:00, 17.30batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "but" } tokens { name: "her" } tokens { name: "husband" } tokens { name: "felt" } tokens { name: "heavy" } tokens { name: "at" } tokens { name: "heart" } tokens { name: "and" } tokens { name: "thought" } tokens { name: "it" } tokens { name: "were" } tokens { name: "better" } tokens { name: "to" } tokens { name: "share" } tokens { name: "the" } tokens { name: "last" } tokens { name: "crust" } tokens { name: "with" } tokens { name: "the" } tokens { name: "children" }


[NeMo I 2023-11-12 08:28:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:54 punctuation_capitalization_infer_dataset:127] Max length: 22
[NeMo I 2023-11-12 08:28:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:54 data_preprocessing:406] Min: 20 |                  Max: 20 |                  Mean: 20.0 |                  Median: 20.0
[NeMo I 2023-11-12 08:28:54 data_preprocessing:412] 75 percentile: 20.00
[NeMo I 2023-11-12 08:28:54 data_preprocessing:413] 99 percentile: 20.00


100%|██████████| 1/1 [00:00<00:00, 18.26batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "early" } tokens { name: "in" } tokens { name: "the" } tokens { name: "morning" } tokens { name: "the" } tokens { name: "stepmother" } tokens { name: "came" } tokens { name: "and" } tokens { name: "pulled" } tokens { name: "them" } tokens { name: "out" } tokens { name: "of" } tokens { name: "bed" } tokens { name: "and" } tokens { name: "gave" } tokens { name: "them" } tokens { name: "each" } tokens { name: "a" } tokens { name: "slice" } tokens { name: "of" } tokens { name: "bread" } tokens { name: "which" } tokens { name: "was" } tokens { name: "still" } tokens { name: "smaller" } tokens { name: "than" } tokens { name: "the" } tokens { name: "former" } tokens { name: "piece" }


[NeMo I 2023-11-12 08:28:54 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:54 punctuation_capitalization_infer_dataset:127] Max length: 31
[NeMo I 2023-11-12 08:28:54 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:54 data_preprocessing:406] Min: 29 |                  Max: 29 |                  Mean: 29.0 |                  Median: 29.0
[NeMo I 2023-11-12 08:28:54 data_preprocessing:412] 75 percentile: 29.00
[NeMo I 2023-11-12 08:28:54 data_preprocessing:413] 99 percentile: 29.00


100%|██████████| 1/1 [00:00<00:00, 13.03batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "we" } tokens { name: "are" } tokens { name: "going" } tokens { name: "into" } tokens { name: "the" } tokens { name: "forest" } tokens { name: "to" } tokens { name: "hew" } tokens { name: "wood" } tokens { name: "and" } tokens { name: "in" } tokens { name: "the" } tokens { name: "evening" } tokens { name: "when" } tokens { name: "we" } tokens { name: "are" } tokens { name: "ready" } tokens { name: "we" } tokens { name: "will" } tokens { name: "come" } tokens { name: "and" } tokens { name: "fetch" } tokens { name: "you" } tokens { name: "again" }


[NeMo I 2023-11-12 08:28:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:55 punctuation_capitalization_infer_dataset:127] Max length: 27
[NeMo I 2023-11-12 08:28:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:55 data_preprocessing:406] Min: 25 |                  Max: 25 |                  Mean: 25.0 |                  Median: 25.0
[NeMo I 2023-11-12 08:28:55 data_preprocessing:412] 75 percentile: 25.00
[NeMo I 2023-11-12 08:28:55 data_preprocessing:413] 99 percentile: 25.00


100%|██████████| 1/1 [00:00<00:00, 12.12batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "hansel" } tokens { name: "thought" } tokens { name: "the" } tokens { name: "roof" } tokens { name: "tasted" } tokens { name: "very" } tokens { name: "nice" } tokens { name: "and" } tokens { name: "so" } tokens { name: "he" } tokens { name: "tore" } tokens { name: "off" } tokens { name: "a" } tokens { name: "great" } tokens { name: "piece" } tokens { name: "while" } tokens { name: "grethel" } tokens { name: "broke" } tokens { name: "a" } tokens { name: "large" } tokens { name: "round" } tokens { name: "pane" } tokens { name: "out" } tokens { name: "of" } tokens { name: "the" } tokens { name: "window" } tokens { name: "and" } tokens { name: "sat" } tokens { name: "down" } tokens { name: "quite" } tokens { name: "contentedly" }


[NeMo I 2023-11-12 08:28:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:55 punctuation_capitalization_infer_dataset:127] Max length: 38
[NeMo I 2023-11-12 08:28:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:55 data_preprocessing:406] Min: 36 |                  Max: 36 |                  Mean: 36.0 |                  Median: 36.0
[NeMo I 2023-11-12 08:28:55 data_preprocessing:412] 75 percentile: 36.00
[NeMo I 2023-11-12 08:28:55 data_preprocessing:413] 99 percentile: 36.00


100%|██████████| 1/1 [00:00<00:00, 16.09batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "come" } tokens { name: "in" } tokens { name: "and" } tokens { name: "stop" } tokens { name: "with" } tokens { name: "me" } tokens { name: "and" } tokens { name: "no" } tokens { name: "harm" } tokens { name: "shall" } tokens { name: "come" } tokens { name: "to" } tokens { name: "you" } tokens { name: "and" } tokens { name: "so" } tokens { name: "saying" } tokens { name: "she" } tokens { name: "took" } tokens { name: "them" } tokens { name: "both" } tokens { name: "by" } tokens { name: "the" } tokens { name: "hand" } tokens { name: "and" } tokens { name: "led" } tokens { name: "them" } tokens { name: "into" } tokens { name: "her" } tokens { name: "cottage" }


[NeMo I 2023-11-12 08:28:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:55 punctuation_capitalization_infer_dataset:127] Max length: 31
[NeMo I 2023-11-12 08:28:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:55 data_preprocessing:406] Min: 29 |                  Max: 29 |                  Mean: 29.0 |                  Median: 29.0
[NeMo I 2023-11-12 08:28:55 data_preprocessing:412] 75 percentile: 29.00
[NeMo I 2023-11-12 08:28:55 data_preprocessing:413] 99 percentile: 29.00


100%|██████████| 1/1 [00:00<00:00, 13.88batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "the" } tokens { name: "old" } tokens { name: "woman" } tokens { name: "behaved" } tokens { name: "very" } tokens { name: "kindly" } tokens { name: "to" } tokens { name: "them" } tokens { name: "but" } tokens { name: "in" } tokens { name: "reality" } tokens { name: "she" } tokens { name: "was" } tokens { name: "a" } tokens { name: "wicked" } tokens { name: "old" } tokens { name: "witch" } tokens { name: "who" } tokens { name: "way" } tokens { name: "laid" } tokens { name: "children" } tokens { name: "and" } tokens { name: "built" } tokens { name: "the" } tokens { name: "breadhouse" } tokens { name: "in" } tokens { name: "order" } tokens { name: "to" } tokens { name: "entice" } tokens { name: "them" } tokens { name: "in" } tokens { name: "but" } tokens { name: "as" } tokens { name: "soon" } tokens { name: "as" } tokens { name: "they" } tokens { name: "were" } tokens { name: "in" } tokens

[NeMo I 2023-11-12 08:28:55 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:55 punctuation_capitalization_infer_dataset:127] Max length: 60
[NeMo I 2023-11-12 08:28:55 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:55 data_preprocessing:406] Min: 58 |                  Max: 58 |                  Mean: 58.0 |                  Median: 58.0
[NeMo I 2023-11-12 08:28:55 data_preprocessing:412] 75 percentile: 58.00
[NeMo I 2023-11-12 08:28:55 data_preprocessing:413] 99 percentile: 58.00


100%|██████████| 1/1 [00:00<00:00, 13.29batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "then" } tokens { name: "she" } tokens { name: "took" } tokens { name: "up" } tokens { name: "hansel" } tokens { name: "with" } tokens { name: "her" } tokens { name: "rough" } tokens { name: "hand" } tokens { name: "and" } tokens { name: "shut" } tokens { name: "him" } tokens { name: "up" } tokens { name: "in" } tokens { name: "a" } tokens { name: "little" } tokens { name: "cage" } tokens { name: "with" } tokens { name: "a" } tokens { name: "lattice" } tokens { name: "door" } tokens { name: "and" } tokens { name: "although" } tokens { name: "he" } tokens { name: "screamed" } tokens { name: "loudly" } tokens { name: "it" } tokens { name: "was" } tokens { name: "of" } tokens { name: "no" } tokens { name: "use" }


[NeMo I 2023-11-12 08:28:56 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:56 punctuation_capitalization_infer_dataset:127] Max length: 34
[NeMo I 2023-11-12 08:28:56 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:56 data_preprocessing:406] Min: 32 |                  Max: 32 |                  Mean: 32.0 |                  Median: 32.0
[NeMo I 2023-11-12 08:28:56 data_preprocessing:412] 75 percentile: 32.00
[NeMo I 2023-11-12 08:28:56 data_preprocessing:413] 99 percentile: 32.00


100%|██████████| 1/1 [00:00<00:00, 17.56batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "grethel" } tokens { name: "began" } tokens { name: "to" } tokens { name: "cry" } tokens { name: "but" } tokens { name: "it" } tokens { name: "was" } tokens { name: "all" } tokens { name: "useless" } tokens { name: "for" } tokens { name: "the" } tokens { name: "old" } tokens { name: "witch" } tokens { name: "made" } tokens { name: "her" } tokens { name: "do" } tokens { name: "as" } tokens { name: "she" } tokens { name: "wanted" }


[NeMo I 2023-11-12 08:28:56 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:56 punctuation_capitalization_infer_dataset:127] Max length: 23
[NeMo I 2023-11-12 08:28:56 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:56 data_preprocessing:406] Min: 21 |                  Max: 21 |                  Mean: 21.0 |                  Median: 21.0
[NeMo I 2023-11-12 08:28:56 data_preprocessing:412] 75 percentile: 21.00
[NeMo I 2023-11-12 08:28:56 data_preprocessing:413] 99 percentile: 21.00


100%|██████████| 1/1 [00:00<00:00, 14.20batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "grethel" } tokens { name: "she" } tokens { name: "cried" } tokens { name: "in" } tokens { name: "a" } tokens { name: "passion" } tokens { name: "get" } tokens { name: "some" } tokens { name: "water" } tokens { name: "quickly" } tokens { name: "be" } tokens { name: "hansel" } tokens { name: "fat" } tokens { name: "or" } tokens { name: "lean" } tokens { name: "this" } tokens { name: "morning" } tokens { name: "i" } tokens { name: "will" } tokens { name: "kill" } tokens { name: "and" } tokens { name: "cook" } tokens { name: "him" }


[NeMo I 2023-11-12 08:28:56 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:56 punctuation_capitalization_infer_dataset:127] Max length: 28
[NeMo I 2023-11-12 08:28:56 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:56 data_preprocessing:406] Min: 26 |                  Max: 26 |                  Mean: 26.0 |                  Median: 26.0
[NeMo I 2023-11-12 08:28:56 data_preprocessing:412] 75 percentile: 26.00
[NeMo I 2023-11-12 08:28:56 data_preprocessing:413] 99 percentile: 26.00


100%|██████████| 1/1 [00:00<00:00, 12.67batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "dear" } tokens { name: "good" } tokens { name: "god" } tokens { name: "help" } tokens { name: "us" } tokens { name: "now" } tokens { name: "she" } tokens { name: "prayed" }


[NeMo I 2023-11-12 08:28:56 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:56 punctuation_capitalization_infer_dataset:127] Max length: 10
[NeMo I 2023-11-12 08:28:56 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:56 data_preprocessing:406] Min: 8 |                  Max: 8 |                  Mean: 8.0 |                  Median: 8.0
[NeMo I 2023-11-12 08:28:56 data_preprocessing:412] 75 percentile: 8.00
[NeMo I 2023-11-12 08:28:56 data_preprocessing:413] 99 percentile: 8.00


100%|██████████| 1/1 [00:00<00:00, 20.66batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "creep" } tokens { name: "in" } tokens { name: "said" } tokens { name: "the" } tokens { name: "witch" } tokens { name: "and" } tokens { name: "see" } tokens { name: "if" } tokens { name: "it" } tokens { name: "is" } tokens { name: "hot" } tokens { name: "enough" } tokens { name: "and" } tokens { name: "then" } tokens { name: "we" } tokens { name: "will" } tokens { name: "put" } tokens { name: "in" } tokens { name: "the" } tokens { name: "bread" } tokens { name: "but" } tokens { name: "she" } tokens { name: "intended" } tokens { name: "when" } tokens { name: "grethel" } tokens { name: "got" } tokens { name: "in" } tokens { name: "to" } tokens { name: "shut" } tokens { name: "up" } tokens { name: "the" } tokens { name: "oven" } tokens { name: "and" } tokens { name: "let" } tokens { name: "her" } tokens { name: "bake" } tokens { name: "so" } tokens { name: "that" } tokens { name: "she" } t

[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_infer_dataset:127] Max length: 52
[NeMo I 2023-11-12 08:28:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:57 data_preprocessing:406] Min: 50 |                  Max: 50 |                  Mean: 50.0 |                  Median: 50.0
[NeMo I 2023-11-12 08:28:57 data_preprocessing:412] 75 percentile: 50.00
[NeMo I 2023-11-12 08:28:57 data_preprocessing:413] 99 percentile: 50.00


100%|██████████| 1/1 [00:00<00:00, 15.03batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "see" } tokens { name: "i" } tokens { name: "could" } tokens { name: "even" } tokens { name: "get" } tokens { name: "in" } tokens { name: "myself" }


[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_infer_dataset:127] Max length: 9
[NeMo I 2023-11-12 08:28:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:57 data_preprocessing:406] Min: 7 |                  Max: 7 |                  Mean: 7.0 |                  Median: 7.0
[NeMo I 2023-11-12 08:28:57 data_preprocessing:412] 75 percentile: 7.00
[NeMo I 2023-11-12 08:28:57 data_preprocessing:413] 99 percentile: 7.00


100%|██████████| 1/1 [00:00<00:00, 24.97batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "and" } tokens { name: "she" } tokens { name: "got" } tokens { name: "up" } tokens { name: "and" } tokens { name: "put" } tokens { name: "her" } tokens { name: "head" } tokens { name: "into" } tokens { name: "the" } tokens { name: "oven" }


[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_infer_dataset:127] Max length: 13
[NeMo I 2023-11-12 08:28:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:57 data_preprocessing:406] Min: 11 |                  Max: 11 |                  Mean: 11.0 |                  Median: 11.0
[NeMo I 2023-11-12 08:28:57 data_preprocessing:412] 75 percentile: 11.00
[NeMo I 2023-11-12 08:28:57 data_preprocessing:413] 99 percentile: 11.00


100%|██████████| 1/1 [00:00<00:00, 22.92batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "and" } tokens { name: "now" } tokens { name: "as" } tokens { name: "there" } tokens { name: "was" } tokens { name: "nothing" } tokens { name: "to" } tokens { name: "fear" } tokens { name: "they" } tokens { name: "went" } tokens { name: "back" } tokens { name: "to" } tokens { name: "the" } tokens { name: "witch's" } tokens { name: "house" } tokens { name: "where" } tokens { name: "in" } tokens { name: "every" } tokens { name: "corner" } tokens { name: "were" } tokens { name: "caskets" } tokens { name: "full" } tokens { name: "of" } tokens { name: "pearls" } tokens { name: "and" } tokens { name: "precious" } tokens { name: "stones" }


[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_infer_dataset:127] Max length: 32
[NeMo I 2023-11-12 08:28:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:57 data_preprocessing:406] Min: 30 |                  Max: 30 |                  Mean: 30.0 |                  Median: 30.0
[NeMo I 2023-11-12 08:28:57 data_preprocessing:412] 75 percentile: 30.00
[NeMo I 2023-11-12 08:28:57 data_preprocessing:413] 99 percentile: 30.00


100%|██████████| 1/1 [00:00<00:00, 15.26batch/s]
 NeMo-text-processing :: DEBUG    :: tokens { name: "then" } tokens { name: "they" } tokens { name: "began" } tokens { name: "to" } tokens { name: "run" } tokens { name: "and" } tokens { name: "rushing" } tokens { name: "into" } tokens { name: "the" } tokens { name: "house" } tokens { name: "they" } tokens { name: "fell" } tokens { name: "upon" } tokens { name: "their" } tokens { name: "father's" } tokens { name: "neck" }


[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_model:1167] Using batch size 1 for inference
[NeMo I 2023-11-12 08:28:57 punctuation_capitalization_infer_dataset:127] Max length: 20
[NeMo I 2023-11-12 08:28:57 data_preprocessing:404] Some stats of the lengths of the sequences:
[NeMo I 2023-11-12 08:28:57 data_preprocessing:406] Min: 18 |                  Max: 18 |                  Mean: 18.0 |                  Median: 18.0
[NeMo I 2023-11-12 08:28:57 data_preprocessing:412] 75 percentile: 18.00
[NeMo I 2023-11-12 08:28:57 data_preprocessing:413] 99 percentile: 18.00


100%|██████████| 1/1 [00:00<00:00, 18.67batch/s]


Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110523/7976-110523-0009.flac
[]
ner tagged text We are going into the forest to hew wood, and in the evening when we are ready, we will come and fetch you again.
Emotion Labels ['HAPPY']
tagged transcription We are going into the forest to hew wood, and in the evening when we are ready, we will come and fetch you again.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110523/7976-110523-0007.flac
[]
ner tagged text But her husband felt heavy at heart and thought it were better to share the last crust with the children.
Emotion Labels ['NEUTRAL']
tagged transcription But her husband felt heavy at heart and thought it were better to share the last crust with the children.
Old path /n/disk1/audio_datasets/EN_libre/LibriSpeech/dev-clean/7976/110523/7976-110523-0011.flac
[]
ner tagged text Come in and stop with me, and no harm shall come to you, and so saying, she took them both by the hand and led them