In [1]:
import tensorflow as tf
import os
from machine_learning.neural_networks import tf_helpers as tfh
import pdb

from pynwb import NWBHDF5IO
import numpy as np
import os
import torch
import soundfile as sf
import scipy.stats
from process_nwb.resample import resample as resample_nwb
from scipy.stats.mstats import zscore
import samplerate

# Creating a list of dictionaries of:
# 
# `ecog_sequence`: ECoG data, clipped to token(-sequence) length
# `text_sequence`: the corresponding text token(-sequence)
# `audio_sequence`: the corresponding audio (MFCC) token sequence (gonna set to)
# `phoneme_sequence`: ditto for phonemes--with repeats
#
# Then saving them as tf_records

def transcription_to_array(trial_t0, trial_tF, onset_times, offset_times, transcription, max_length, sampling_rate):
    
    # if the transcription is missing (e.g. for covert trials)
    if transcription is None:
        return np.full(max_length, 'pau', dtype='<U5')

    # get just the parts of transcript relevant to this trial
    trial_inds = (onset_times >= trial_t0) * (offset_times < trial_tF)
    transcript = np.array(transcription.description.split(' '))[trial_inds]
    onset_times = np.array(onset_times[trial_inds])
    offset_times = np.array(offset_times[trial_inds])

    # vectorized indexing
    sample_times = trial_t0 + np.arange(max_length)/sampling_rate
    indices = (
        (sample_times[None, :] >= onset_times[:, None]) *
        (sample_times[None, :] < offset_times[:, None])
    )

    # no more than one phoneme should be on at once...
    try:
        # print('exactly one phoneme:', np.all(np.sum(indices, 0) == 1))
        assert np.all(np.sum(indices, 0) < 2)
    except:
        pdb.set_trace()

    # ...but there can be locations with *zero* phonemes; assume 'pau' here
    transcript = np.insert(transcript, 0, 'pau')
    indices = np.sum(indices*(np.arange(1, len(transcript))[:, None]), 0)

    return transcript[indices]

def sentence_tokenize(token_list): # token_type = word_sequence
    tokenized_sentence = [
                (token.lower() + '_').encode('utf-8') for token in token_list
            ]
    return tokenized_sentence

def write_to_Protobuf(path, example_dicts):
    '''
    Collect the relevant ECoG data and then write to disk as a (google)
        protocol buffer.
    '''
    writer = tf.io.TFRecordWriter(
        path)
    for example_dict in example_dicts:
        feature_example = tfh.make_feature_example(example_dict)
        writer.write(feature_example.SerializeToString())
            
# sorting function for latent representation filenames, NOT USED FOR THIS
# def custom_sort_key(filename):
#     num_part = int(filename.split('nwb_')[1].split('.wav.pt')[0])
#     return num_part

def resample(
    data, source_to_target_ratio, ZSCORE, resample_method='sinc_best',
    N_channels_max=128
):

    ######################
    # If downsampling by an integer, just anti-alias and subsample??
    ######################

    # 128 is the max for the underlying library
    N_channels_max = min(N_channels_max, 128)
    N_channels = data.shape[1]
    data_mat = None

    for i0 in np.arange(0, N_channels, N_channels_max):
        iF = np.min((i0+N_channels_max, N_channels))
        resampler = samplerate.Resampler(resample_method, channels=iF-i0)
        data_chunk = resampler.process(
            data[:, i0:iF], 1/source_to_target_ratio, end_of_input=True
        )
        data_mat = (
            data_chunk if data_mat is None else
            np.concatenate((data_mat, data_chunk), axis=1)
        )
    if ZSCORE:
        data_mat = zscore(data_mat)

    return data_mat

def downsample(data, rate_source, rate_target, ZSCORE=False):
    return downsample_NWB(data, rate_source, rate_target, ZSCORE=ZSCORE)
    # return resample(data, rate_source/rate_target, ZSCORE=ZSCORE)

def downsample_NWB(data, rate_source, rate_target, ZSCORE=False):
    '''
    Downsample data from rate_source to rate_target using process_NWB methods

    Input arguments:
    --------
    data:
        an ndarray of the data to downsample (Nsamples_source, Nchannels)
    source_rate:
        the sampling rate of the input data
    rate_target:
        the sampling rate of the output data

    Returns:
    --------
    X:
        An ndarray (Nsamples_source, Nchannels) of the downsampled data
    '''

    # Note: zero padding is done in resample
 
    # downsampling
    print("Downsampling signals to %s Hz; please wait..." % rate_target)
    Nsamples_source, Nchannels = data.shape
    ##############
    # 1e6 scaling helps with numerical accuracy
    # scale = 1e6
    scale = 1
    # Is this true??
    ##############
    
    # malloc
    Nsamples_target = int(np.ceil(Nsamples_source*rate_target/rate_source))
    X = np.zeros((Nsamples_target, Nchannels))

    # One channel at a time, to improve memory usage for long signals
    for ch in np.arange(Nchannels):
        X[:, ch] = resample_nwb(data[:, ch]*scale, rate_target, rate_source)
    X = X/scale

    if ZSCORE:
        X = scipy.stats.mstats.zscore(X)

    return X

# Removing bad electrodes

def elec_layout(grid_size, grid_step):
    layout = np.arange(np.prod(
        grid_size)-1, -1, -1).reshape(grid_size).T

    # now correct for subsampling the grid
    return layout[::grid_step, ::grid_step]
    
def good_electrodes(grid_size, bad_electrodes):
    '''
    NB!!! bad_electrodes are 1-indexed, good_electrodes are zero-indexed!!

    Since this is a set, it contains no order information.  The canonical
    ordering is established with good_channels, since after all the data
    size is (... x Nchannels),  not (... x Nelectrodes).
    '''

    # bad_electrodes = [int(e.strip()) for e in bad_electrodes]
    return (
        set(range(np.prod(grid_size))) -
        set(np.array(bad_electrodes)-1)
    )
    
def bipolar_to_elec_map(layout):
    # print('WARNING!!!!  MAKING UP bipolar_to_elec_map!!!')
    elec_map = []
    # layout = self.elec_layout  # for short
    for i in range(layout.shape[0]):
        for j in range(layout.shape[1]):
            if j < layout.shape[1]-1:
                elec_map.append((layout[i, j], layout[i, j+1]))
            if i < layout.shape[0]-1:
                elec_map.append((layout[i, j], layout[i+1, j]))
    return np.array(elec_map)
    
def good_channels(elec_layout, bipolar_to_elec_map, good_electrodes):
    '''
    Pseudo-channels, constructed (on the fly) from the physical electrodes.
    For now at least, we won't USE_FIELD_POTENTIALS if we want to
    REFERENCE_BIPOLAR.

    NB!!: The *order* of these channels matters--it determines the order of
    the input data, and therefore is required by the functions that plot
    electrode_contributions in plotters.py! And the order of these channels
    will be determined by the *elec_layout*.
    '''

    # NB: this means that the electrodes are *not* in numerical order ('e1'
    #  does not correspond to the 0th entry in all_electrodes): as you can
    #  check, flattening the elec_layout does not yield an ordered list.
    all_electrodes = elec_layout.flatten().tolist()

    # if self.USE_FIELD_POTENTIALS:
    #     M = len(all_electrodes)
    #     return (
    #         [e for e in all_electrodes if e in self.good_electrodes] +
    #         [e+M for e in all_electrodes if e in self.good_electrodes]
    #     )
    # elif self.REFERENCE_BIPOLAR:
    return [
        ch for ch, elec_pair in enumerate(bipolar_to_elec_map)
        if all([e in good_electrodes for e in elec_pair])
    ]
    # else:
    #     return [e for e in all_electrodes if e in self.good_electrodes]

2024-03-11 06:24:48.238312: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-11 06:24:48.238342: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-11 06:24:48.239392: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-11 06:24:48.244231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




In [2]:
import matplotlib.pyplot as plt
from scipy.signal import butter, lfilter, filtfilt, hilbert
from scipy.fft import fft, ifft

all_example_dict = []
patient = 'EFC403'
blocks = [3, 4, 6, 7, 9, 10, 12, 13, 15, 17, 
    18, 19, 20, 21, 22, 27, 28, 30, 33,
    35, 38, 39, 40, 42, 44, 46, 48, 50,
    52, 53, 54, 55, 56, 59, 60, 61, 62,
    63, 64, 65, 70, 73, 74, 75, 76, 77,
    83, 92, 93, 94, 95, 97, 98, 99, 100,
    101, 108, 109, 110, 111, 112, 113, 
    114, 115]

# [4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 26, 27, 33, 34, 35, 44, 48, 49, 59, 60, 58, 45, 46, 47]

#[3, 23, 72] # [4, 6, 8, 12, 17, 18, 20, 32, 34, 41, 57, 61, 66, 69, 73, 77, 83, 87]
        # [4, 41, 57, 61, 66, 69, 73, 77, 83, 87] # [3,4,6,8,10,12,14,15,19,23,28,30,38,40,42,46,57,61,72] # change this for what tf_record you're making
_bad_electrodes = [129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
                         139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
                         149, 161, 162, 163, 164, 165, 166, 167, 168, 169,
                         170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
                         180, 181]

# [129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 
#                          139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 
#                          149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 
#                          159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 
#                          169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 
#                          179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 
#                          189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 
#                          199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 
#                          209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 
#                          219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 
#                          229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
#                          239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 
#                          249, 250, 251, 252, 253, 254, 255, 256]


# [1, 2, 33, 50, 54, 64, 128, 129, 193, 194, 256] # [1,2,63,64,65,127,143,193,194,195,196,235,239,243,252,254,255,256]
    #[1, 2, 33, 50, 54, 64, 128, 129, 193, 194, 256]
    #[1,2,63,64,65,127,143,193,194,195,196,235,239,243,252,254,255,256]
# bad_electrodes = [i - 1 for i in _bad_electrodes]
# good_electrodes = [x for x in list(np.arange(256)) if x not in bad_electrodes]

grid_size = np.array([16, 16])
grid_step = 1

_good_electrodes = good_electrodes(grid_size, _bad_electrodes)
_elec_layout = elec_layout(grid_size, grid_step)
_bipolar_to_elec_map = bipolar_to_elec_map(_elec_layout)

# print(_bad_electrodes)
# print(_good_electrodes)

_good_channels = good_channels(_elec_layout, _bipolar_to_elec_map, _good_electrodes)

# print(len(_good_channels))
for block in blocks:

    tfrecord_path = f'/home/bayuan/Documents/fall23/ecog2vec/wav2vec_tfrecords/ecog2txt/word_sequence/tf_records_403_orig/{patient}_B{block}.tfrecord' # CHANGE PER SUBJECT

    nwb_filepath = folder_path = f"/NWB/{patient}/{patient}_B{block}.nwb"
    io = NWBHDF5IO(nwb_filepath, load_namespaces=True, mode='r')
    nwbfile = io.read()
    
    electrode_table = nwbfile.acquisition['ElectricalSeries'].\
                                        electrodes.table[:]

    indices = np.where(np.logical_or(electrode_table['group_name'] == 
                                        'L256GridElectrode electrodes', 
                                        electrode_table['group_name'] == 
                                        'Grid electrodes'))[0]
    # R256GridElectrode electrodes

    raw_data = nwbfile.acquisition['ElectricalSeries'].\
                                    data[:,indices]
                                    
    raw_data = raw_data[:,sorted(list(_good_electrodes))]
                    
    # high_gamma = downsample(high_gamma, 400, 200, ZSCORE=True)
    
    nwb_sr = nwbfile.acquisition['ElectricalSeries'].\
                                rate
    
    w_l = 70 / (nwb_sr / 2) # Normalize the frequency
    w_h = 200 / (nwb_sr / 2)
    b, a = butter(5, [w_l,w_h], 'band')
    
    for ch in range(raw_data.shape[1]):
        raw_data[:,ch] = filtfilt(b, 
                                a, 
                                raw_data[:,ch])
        
        #analytic amp
        raw_data[:,ch] = np.abs(hilbert(raw_data[:,ch]))
        
    high_gamma = raw_data
    
    phoneme_transcriptions = nwbfile.processing['behavior'].data_interfaces['BehavioralEpochs'].interval_series #['phoneme transcription'].timestamps[:]

    token_type = 'word_sequence'

    max_seconds_dict = {
        'phoneme': 0.2,
        'word': 1.0,
        'word_sequence': 6.25,
        'word_piece_sequence': 6.25,
        'phoneme_sequence': 6.25,
        'trial': 6.25
    }

    if 'phoneme transcription' in phoneme_transcriptions:
        print(f'Phoneme transcription for block {block} exists.')
        phoneme_transcript = phoneme_transcriptions['phoneme transcription']
        phoneme_onset_times = phoneme_transcript.timestamps[
            phoneme_transcript.data[()] == 1]
        phoneme_offset_times = phoneme_transcript.timestamps[
            phoneme_transcript.data[()] == -1]
    else:
        phoneme_transcript = None
        phoneme_onset_times = None
        phoneme_offset_times = None

    example_dicts = []

    makin_sr = 101.7 # 200
    
    
    high_gamma = downsample(high_gamma, nwb_sr, makin_sr, ZSCORE=True)
    
    # starts = list(nwbfile.trials[:]['start_time']) # * nwb_sr)
    # stops = list(nwbfile.trials[:]['stop_time']) # * nwb_sr)
    
    # print(starts[0], stops[0])

    for index, trial in enumerate(nwbfile.trials):
        t0 = float(trial.iloc[0].start_time)
        tF = float(trial.iloc[0].stop_time)
    
        i0 = np.rint(makin_sr*t0).astype(int)
        iF = np.rint(makin_sr*tF).astype(int)
        
        # ECOG (C) SEQUENCE
        c = high_gamma[i0:iF,:]
        # print(c.shape)
        # plt.plot(c[:,0])
        # break
    
        print(c.shape)
        nsamples = c.shape[0]
        
        # TEXT SEQUENCE
        speech_string = trial['transcription'].values[0]
        text_sequence = sentence_tokenize(speech_string.split(' ')) # , 'text_sequence')
        
        # AUDIO SEQUENCE    
        audio_sequence = []
        
        # PHONEME SEQUENCE
        
        M = iF - i0
        
        max_seconds = max_seconds_dict.get(token_type) # , 0.2) # i don't think this 0.2 default is necessary for the scope of this
        max_samples = int(np.floor(makin_sr*max_seconds))
        max_length = min(M, max_samples)
        
        phoneme_array = transcription_to_array(
                        t0, tF, phoneme_onset_times, phoneme_offset_times,
                        phoneme_transcript, max_length, makin_sr
                    )
        
        phoneme_sequence = [ph.encode('utf-8') for ph in phoneme_array]
        
        if len(phoneme_sequence) != nsamples:
            if len(phoneme_sequence) > nsamples:
                phoneme_sequence = [phoneme_sequence[i] for i in range(nsamples)]
            else:
                for i in range(nsamples - len(phoneme_sequence)):
                    phoneme_sequence.append(phoneme_sequence[len(phoneme_sequence) - 1])
        
        print('\n------------------------')
        print(f'For sentence {index}: ')
        print(c[0:5,0:5])
        print(f'Latent representation shape: {c.shape} (should be [samples, nchannel])')
        print(text_sequence)
        print(f'Audio sequence: {audio_sequence}')
        print(f'Length of phoneme sequence: {len(phoneme_sequence)}')
        print(phoneme_sequence)
        print('------------------------\n')
        
        example_dicts.append({'ecog_sequence': c, 'text_sequence': text_sequence, 'audio_sequence': [], 'phoneme_sequence': phoneme_sequence,})
        
        
        # break
    
    # break
        
    all_example_dict.extend(example_dicts)
    print(len(example_dicts))
    print(len(all_example_dict))
    write_to_Protobuf(tfrecord_path, example_dicts)

print(len(all_example_dict))


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 3 exists.
Downsampling signals to 101.7 Hz; please wait...
(248, 214)

------------------------
For sentence 0: 
[[ 0.22171723  0.55805213  1.773877   -0.40281356  0.9100834 ]
 [ 0.2134003  -0.12646927 -0.19319663  0.33022219  0.76221152]
 [-0.80818572 -0.48961807  0.05786537  2.00256761 -0.00338057]
 [-0.41198161 -0.59507076 -0.69685591 -0.89069729  1.74252249]
 [-0.04106124  0.89912314  0.0676038   0.06475192  0.16839942]]
Latent representation shape: (248, 214) (should be [samples, nchannel])
[b'coconut_', b'cream_', b'pie_', b'makes_', b'a_', b'nice_', b'dessert_']
Audio sequence: []
Length of phoneme sequence: 248
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'k', b'k', b'k', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'n', b'n', b'n', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 4 exists.
Downsampling signals to 101.7 Hz; please wait...
(197, 214)

------------------------
For sentence 0: 
[[-0.93781565 -1.11237207  1.31704505  1.33657644 -0.68471512]
 [-0.2536624  -0.50520259 -1.05251373 -0.17271652 -0.70549279]
 [-0.31462073  0.25752186 -0.95691903 -0.9529565  -0.04990929]
 [-0.2640519  -0.31818023 -0.41221172  0.401283    0.04211006]
 [-0.40037498 -0.38460657  0.66057139  0.35919413 -0.47270281]]
Latent representation shape: (197, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 197
[b'pau', b'pau', b'pau', b'aa', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah', b'v', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 6 exists.
Downsampling signals to 101.7 Hz; please wait...
(202, 214)

------------------------
For sentence 0: 
[[-0.35740876 -0.81118126 -0.32688487  0.73331951  0.41746873]
 [-1.08956508 -0.18995905 -0.97312341  1.25146918 -1.04582142]
 [-1.09581439  0.06852681 -0.68482859 -1.29685199 -0.46970164]
 [-0.23660922  0.49164439  1.41910275 -0.70723548 -0.75096051]
 [ 0.478884    1.04867576  1.31501731  0.41397442  2.30400672]]
Latent representation shape: (202, 214) (should be [samples, nchannel])
[b'doctors_', b'prescribe_', b'drugs_', b'too_', b'freely_']
Audio sequence: []
Length of phoneme sequence: 202
[b'pau', b'pau', b'pau', b'd', b'd', b'd', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'k', b'k', b'k', b't', b't', b'er', b'er', b'er', b'er', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'p', b'p', b'r', b'r', b'r', b'r', b'ax', b'ax', b's', b's', b's', b's', b's', b's', b's',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 7 exists.
Downsampling signals to 101.7 Hz; please wait...
(265, 214)

------------------------
For sentence 0: 
[[ 0.29336704 -0.63721155 -0.68109304 -1.25115867  0.49242839]
 [ 0.20787207 -0.42778416 -0.53377293  1.04453736  0.03570399]
 [ 0.22816406  0.08106519  0.00392751  0.34329781 -0.95825091]
 [ 1.38622413 -0.28327054  0.02401797  0.37298545  0.75394642]
 [-0.47639114  0.38931579  1.35246307  0.12972078  0.21265916]]
Latent representation shape: (265, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 265
[b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 9 exists.
Downsampling signals to 101.7 Hz; please wait...
(377, 214)

------------------------
For sentence 0: 
[[-0.03264222 -0.01696766 -0.59868345  2.7867852   0.30827417]
 [-0.01191072  0.03525553  0.79959667  0.29453725 -0.26920331]
 [-0.02264419  0.82921599 -0.46067849  0.59483664 -0.55494088]
 [-0.02143564 -0.60701425 -0.42122192 -0.20496679 -1.04808108]
 [-0.02968082 -0.16355583 -0.50495243 -0.56917868 -0.78885993]]
Latent representation shape: (377, 214) (should be [samples, nchannel])
[b'tradition_', b'requires_', b'parental_', b'approval_', b'for_', b'under_', b'age_', b'marriage_']
Audio sequence: []
Length of phoneme sequence: 377
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 10 exists.
Downsampling signals to 101.7 Hz; please wait...
(178, 214)

------------------------
For sentence 0: 
[[-0.32011289 -1.30585466 -1.04558268  0.33061601 -1.16201482]
 [ 0.34801152 -0.58098934 -0.37313052 -0.57997192  0.59641416]
 [-0.42328512 -1.54732036 -1.1507244  -0.9296157  -0.43059181]
 [-0.17920244 -0.00856648  1.01905462  0.47690108  0.37153521]
 [ 0.28993143  0.3139358   1.51551123  1.58226611 -1.22722993]]
Latent representation shape: (178, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 178
[b'pau', b'pau', b'pau', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah', b'ah', b'v', b'v', b'v', b'v', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 12 exists.
Downsampling signals to 101.7 Hz; please wait...
(244, 214)

------------------------
For sentence 0: 
[[-0.91086036 -1.03568165 -0.01668584  0.83472029  0.37831073]
 [-0.13550849 -0.87012055  1.04846506  1.15802733  0.44850757]
 [-0.7216975  -1.08292962  0.02201981 -0.13031312  0.41707978]
 [-1.07337107 -0.12590796 -0.13793138  0.0503189  -0.46396498]
 [-0.07101128  0.37926436 -1.35339579  0.90626661 -0.00770491]]
Latent representation shape: (244, 214) (should be [samples, nchannel])
[b'her_', b'experiments_', b'positive_', b'outcome_', b'was_', b'unexpected_']
Audio sequence: []
Length of phoneme sequence: 244
[b'hh', b'hh', b'hh', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'ih', b'ih', b'k', b'k', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b'p', b'p', b'p', b'p', b'eh', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'ax',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 13 exists.
Downsampling signals to 101.7 Hz; please wait...
(275, 214)

------------------------
For sentence 0: 
[[ 0.14264375  0.4853765  -0.89502136  1.29172091  0.12816533]
 [ 0.95269746 -0.84625721  1.13791896  0.22324892 -0.27955235]
 [-0.667536    0.23010503  0.91875379  1.67872174 -1.13283741]
 [-0.3583789  -0.84504429 -0.1293067  -0.47107187 -0.9930629 ]
 [ 0.88989096 -0.23831774 -0.17647174 -0.42820175 -0.74941724]]
Latent representation shape: (275, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 275
[b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 15 exists.
Downsampling signals to 101.7 Hz; please wait...
(223, 214)

------------------------
For sentence 0: 
[[-0.41191443 -0.33649126 -0.02815055  0.06151974 -0.28485208]
 [-0.94959319 -0.57806092 -0.82878312 -0.85953072  0.20052484]
 [-0.38391979 -0.46625572  0.35300792  0.21137583  0.81455193]
 [-0.20801225 -0.33965316  0.81865614  0.69670726  0.36916332]
 [-1.01942642 -0.15953186 -0.17534601  0.82730734  0.45508576]]
Latent representation shape: (223, 214) (should be [samples, nchannel])
[b'michael_', b'colored_', b'the_', b'bedroom_', b'wall_', b'with_', b'crayons_']
Audio sequence: []
Length of phoneme sequence: 223
[b'pau', b'pau', b'pau', b'm', b'm', b'm', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'k', b'k', b'ax', b'ax', b'ax', b'ax', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'ah', b'l', b'l', b'l', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 17 exists.
Downsampling signals to 101.7 Hz; please wait...
(203, 214)

------------------------
For sentence 0: 
[[-0.1301644   0.03076495 -0.36155579  0.45523926 -0.86593427]
 [-0.02269949 -0.46365757 -0.48979485 -0.09205357 -0.3274758 ]
 [-0.16538419  0.70298463  0.12874765 -0.44067663  0.56569166]
 [-0.17930877 -0.19179032 -1.31446702 -0.97569002 -0.17093764]
 [-0.08182965 -0.33346259  0.38099139 -0.16576428 -0.56488276]]
Latent representation shape: (203, 214) (should be [samples, nchannel])
[b'the_', b'rose_', b'corsage_', b'smelled_', b'sweet_']
Audio sequence: []
Length of phoneme sequence: 203
[b'pau', b'pau', b'pau', b'pau', b'ax', b'ax', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'k', b'k', b'k', b'k', b'k', b'k', b'ao', b'ao', b'r', b'r

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 18 exists.
Downsampling signals to 101.7 Hz; please wait...
(411, 214)

------------------------
For sentence 0: 
[[-0.24031478  0.70419581  1.56535144  0.86185513 -0.15320205]
 [-0.06666325  0.54820594 -0.67865091 -0.42126731  1.46711831]
 [-0.17120104  0.56316811 -1.53697823 -0.74190128  0.43558132]
 [-0.01205237  0.22313766  0.34121669 -0.39309953 -0.10255765]
 [-0.31409977  0.47474723 -0.71696128 -0.98895087 -0.18434029]]
Latent representation shape: (411, 214) (should be [samples, nchannel])
[b'i_', b"don't_", b'hah_', b'i_', b'know_', b'i_', b"didn't_", b'meet_', b'her_', b'early_', b'enough_']
Audio sequence: []
Length of phoneme sequence: 411
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pa

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 19 exists.
Downsampling signals to 101.7 Hz; please wait...
(169, 214)

------------------------
For sentence 0: 
[[ 0.59762168 -0.53793511 -0.32019649  1.04703941  0.00387131]
 [ 0.20570583  0.60284208 -0.29219409 -0.48571727 -0.02486658]
 [-0.23968409 -0.00341054  0.42149022  0.45675932  0.2780528 ]
 [-0.18740538  0.02578807 -1.33774089 -0.46222047 -0.53780114]
 [-0.12329137 -0.30980546 -1.24819423 -1.41474693 -1.11770302]]
Latent representation shape: (169, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 169
[b'pau', b'pau', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b'ah', b'ah', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'ey', b'ey', b'ey', b'ey', b'e

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 20 exists.
Downsampling signals to 101.7 Hz; please wait...
(262, 214)

------------------------
For sentence 0: 
[[ 0.1973696  -0.42108048 -0.55591205 -1.12216246  0.57389101]
 [ 0.15713756 -0.32441375 -0.46954831 -0.51360557  0.06392186]
 [ 0.07470862 -0.3742123  -0.06533078  0.04843577 -0.36751734]
 [-0.36085064 -0.10934806 -0.51307388 -0.31495116 -0.62886646]
 [-0.30923467 -0.05096227  0.41491047 -0.86612333 -0.11391606]]
Latent representation shape: (262, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 262
[b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 21 exists.
Downsampling signals to 101.7 Hz; please wait...
(286, 214)

------------------------
For sentence 0: 
[[-1.07223631 -0.32189934 -0.80873192  0.80388504 -0.61933871]
 [-0.03487048  0.29195879 -0.46985815 -0.56657405  0.05215803]
 [-0.64339645  0.39827836 -0.32856341 -0.65470462 -0.12242337]
 [-0.7980274   1.32640244  0.76375377 -1.04626298 -0.5260627 ]
 [ 1.38098664 -0.16739188  0.51633834 -0.6753096   0.54925807]]
Latent representation shape: (286, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 286
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 22 exists.
Downsampling signals to 101.7 Hz; please wait...
(342, 214)

------------------------
For sentence 0: 
[[ 0.28756811 -1.99412038 -0.60099899  0.98631589 -0.11410233]
 [ 0.21280267 -1.47104763  1.00487986  1.29632803 -0.29527707]
 [-0.64201851  1.28659765 -0.847812    0.39207763 -0.34345585]
 [ 0.09153146 -1.37997126 -1.00839275  0.51460264 -0.37096815]
 [-0.56670871  2.17440432  0.09036993  1.46462319 -0.41232832]]
Latent representation shape: (342, 214) (should be [samples, nchannel])
[b'lori_', b'costume_', b'needed_', b'black_', b'gloves_', b'to_', b'be_', b'completely_', b'elegant_', b'-_', b'ahem_']
Audio sequence: []
Length of phoneme sequence: 342
[b'pau', b'pau', b'pau', b'l', b'l', b'l', b'l', b'ao', b'ao', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'iy', b'iy', b'iy', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'k', b'k', b'k', b'k', b'k', b'k', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 27 exists.
Downsampling signals to 101.7 Hz; please wait...
(246, 214)

------------------------
For sentence 0: 
[[-0.13296916 -0.07968339  0.57417975  0.93716663  0.0684909 ]
 [-1.3658792  -0.51645389 -0.13387398 -0.10129548  0.86519465]
 [-0.73264031 -0.3184487   0.77334701  0.33202102 -0.31117856]
 [-0.2552379   0.51461874 -0.22261159 -0.37505314  0.09007012]
 [-0.30393141 -0.97929109 -0.97618487 -0.44511879 -0.62703661]]
Latent representation shape: (246, 214) (should be [samples, nchannel])
[b"we'll_", b'serve_', b'rhubarb_', b'pie_', b'after_', b'rachel_', b'talk_']
Audio sequence: []
Length of phoneme sequence: 246
[b'pau', b'pau', b'w', b'w', b'w', b'w', b'iy', b'iy', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 28 exists.
Downsampling signals to 101.7 Hz; please wait...
(213, 214)

------------------------
For sentence 0: 
[[-9.49955605e-01  4.94946513e-01  2.03735709e+00  1.07606717e-01
   3.08885998e-02]
 [-4.44412774e-01 -1.35385279e+00  1.27905878e+00  7.11119966e-01
   7.72091301e-01]
 [-7.31977043e-01  5.60583341e-01 -2.56243800e-01  8.29807011e-04
   1.28855717e+00]
 [-6.79948693e-01  7.66315218e-01 -1.80574086e-01  1.82825107e+00
   4.01434941e-01]
 [-1.44979393e+00 -1.28306165e+00 -1.07366740e+00  1.91530380e+00
   1.24954975e-01]]
Latent representation shape: (213, 214) (should be [samples, nchannel])
[b'several_', b'adults_', b'and_', b'kids_', b'are_', b'in_', b'the_', b'room_']
Audio sequence: []
Length of phoneme sequence: 213
[b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b'eh', b'eh', b'v', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'ax', b'ax', b'l', b'l', b'l', b'l

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 30 exists.
Downsampling signals to 101.7 Hz; please wait...
(120, 214)

------------------------
For sentence 0: 
[[ 0.59114074  0.10318198  1.35432403  1.24186956 -0.70150105]
 [-1.80755464  0.29095676 -0.8401408   0.35123078 -0.81901585]
 [-1.33672611  0.10172357 -0.31636582 -0.39983724  0.33782691]
 [-1.58544379 -1.02882636 -0.22101324  1.48992785  0.36002113]
 [-2.71562685 -1.57224691  1.90212022  3.42351992 -1.01025719]]
Latent representation shape: (120, 214) (should be [samples, nchannel])
[b'this_', b'was_', b'easy_', b'for_', b'us_']
Audio sequence: []
Length of phoneme sequence: 120
[b'pau', b'pau', b'pau', b'pau', b'ih', b'ih', b'ih', b'ih', b'ih', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b'w', b'aa', b'aa', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 33 exists.
Downsampling signals to 101.7 Hz; please wait...
(137, 214)

------------------------
For sentence 0: 
[[-0.32174221 -0.53665009  0.00585737 -0.63238389 -0.10259649]
 [-0.54495893 -0.23369787 -0.25414308  0.30590827 -0.64440229]
 [ 0.68448536  0.52873452 -0.13634457 -0.3600208   0.18705096]
 [ 1.03337571  0.19769796  0.72899648 -0.76840507  0.65676211]
 [-0.52023656 -0.95718886 -0.74848177  0.68777166 -0.374915  ]]
Latent representation shape: (137, 214) (should be [samples, nchannel])
[b'did_', b'you_', b'eat_', b'lunch_', b'yesterday_']
Audio sequence: []
Length of phoneme sequence: 137
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ih', b'ih', b'ih', b'ih', b'ih', b'd', b'd', b'd', b'd', b'd', b'd', b'd', b'd', b'y', b'y', b'y', b'uw', b'uw', b'uw', b'uw', b'uw', b'uw', b'uw', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b'iy', b't', b't', b't', b't', b't', b't', b'l', b'l', b'l', b'l

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 35 exists.
Downsampling signals to 101.7 Hz; please wait...
(171, 214)

------------------------
For sentence 0: 
[[-1.13748853  0.11848467 -0.70898209  1.56671596 -0.10023455]
 [-0.09408244  0.66939769 -0.2122985   0.35581241  0.1592308 ]
 [-0.79755534  1.81368309  1.32201458 -0.28040421 -0.86123424]
 [ 0.25094226  0.06812116  0.97521524  0.70558544  0.13740074]
 [-0.33228604  0.41103397 -0.02579577  0.284266    0.11301183]]
Latent representation shape: (171, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 171
[b'p', b'p', b'p', b'p', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah', b'v', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'ey', b'ey', b'ey', b'ey', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 38 exists.
Downsampling signals to 101.7 Hz; please wait...
(312, 214)

------------------------
For sentence 0: 
[[-0.11413511  1.75510017 -0.89172144  0.81658368 -0.56231468]
 [-0.17769817 -0.73422578 -0.36464687 -0.60410749 -0.38999324]
 [-0.26055769 -1.28241229 -0.12031013 -0.44383156  0.40486116]
 [-0.24010057  2.34949383 -0.57823256  0.44185834  0.52224058]
 [-0.23848929  0.02000277 -1.03824058  0.29130802 -0.39959432]]
Latent representation shape: (312, 214) (should be [samples, nchannel])
[b'tradition_', b'requires_', b'parental_', b'approval_', b'for_', b'under_', b'age_', b'marriage_']
Audio sequence: []
Length of phoneme sequence: 312
[b't', b't', b't', b't', b't', b't', b'r', b'r', b'ax', b'ax', b'd', b'd', b'd', b'd', b'd', b'd', b'ih', b'ih', b'ih', b'ih', b'ih', b'ih', b'ih', b'sh', b'sh', b'sh', b'sh', b'sh', b'sh', b'sh', b'sh', b'sh', b'sh', b'sh', b'sh', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'n', b'n', b'n', b'n', b'n', b'n

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 39 exists.
Downsampling signals to 101.7 Hz; please wait...
(311, 214)

------------------------
For sentence 0: 
[[ 0.17237436 -0.78985327  1.07164477  2.06478003  1.69703537]
 [ 0.09818381 -1.98777619 -1.56342568  1.27504522 -0.88471095]
 [ 0.1710889  -1.19816623 -0.25730592 -0.78394068 -0.72351119]
 [ 0.31943376 -0.72968189 -0.88695046  2.10284435  1.77356724]
 [-0.47747333 -0.2331533  -0.06515724  2.46745893  2.04188093]]
Latent representation shape: (311, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 311
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 40 exists.
Downsampling signals to 101.7 Hz; please wait...
(332, 214)

------------------------
For sentence 0: 
[[ 0.63722944 -0.91799136 -0.65424568  0.15211409  0.44254751]
 [ 0.75626333  0.19778798 -0.01846143  0.13624566 -0.95320042]
 [ 0.67116821 -0.93296545 -0.1242405   0.16592912  0.83825585]
 [ 0.6923432  -0.328309   -0.58222592 -0.34928891  0.25982893]
 [ 0.70276037 -0.41251705  0.32093168  0.27801857  1.56959203]]
Latent representation shape: (332, 214) (should be [samples, nchannel])
[b'as_', b'a_', b'precaution_', b'the_', b'outlaws_', b'uh_', b'bought_', b'gunpowder_', b'for_', b'their_', b'stronghold_']
Audio sequence: []
Length of phoneme sequence: 332
[b'ae', b'ae', b'ae', b'ae', b'ae', b'ae', b'ae', b'ae', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'ax', b'p', b'p', b'p', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'iy', b'iy', b'iy', b'k', b'k', b'k', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 42 exists.
Downsampling signals to 101.7 Hz; please wait...
(233, 214)

------------------------
For sentence 0: 
[[ 0.35573936 -0.44709314  0.33778799 -0.53602721  0.39502116]
 [ 0.1038284   0.80186749 -0.61909464  0.8017579   0.45726744]
 [ 1.99099925 -1.82506578  0.10072725  0.47301573  0.52100683]
 [ 1.38892059 -1.6060081   0.6479806  -0.54436197  0.01568154]
 [-0.57811054  0.69364369 -0.04999171  0.30734214 -0.49260709]]
Latent representation shape: (233, 214) (should be [samples, nchannel])
[b'jeff_', b'toy_', b'go_', b'kart_', b'never_', b'worked_']
Audio sequence: []
Length of phoneme sequence: 233
[b'pau', b'pau', b'jh', b'jh', b'jh', b'jh', b'jh', b'jh', b'jh', b'jh', b'eh', b'eh', b'eh', b'eh', b'eh', b'eh', b'eh', b'eh', b'eh', b'eh', b'eh', b'f', b'f', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 44 exists.
Downsampling signals to 101.7 Hz; please wait...
(297, 214)

------------------------
For sentence 0: 
[[ 0.15557111  0.55340435  0.02339291  1.85974771  0.05907485]
 [-0.78449725  1.12084896  0.44600449  2.20187844  0.81552475]
 [ 1.32990023 -0.68388546 -0.64591404  0.58317351 -0.73284846]
 [-0.10691107 -0.78688041  0.75499633 -0.09173101 -0.11927062]
 [-0.55846851 -0.19796813  0.17995682  0.60696455  0.15419525]]
Latent representation shape: (297, 214) (should be [samples, nchannel])
[b'alice_', b'ability_', b'to_', b'work_', b'without_', b'supervision_', b'is_', b'noteworthy_']
Audio sequence: []
Length of phoneme sequence: 297
[b'pau', b'pau', b'pau', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'ax', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b's', b's', b's', b's', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 46 exists.
Downsampling signals to 101.7 Hz; please wait...
(292, 214)

------------------------
For sentence 0: 
[[-0.02377951  1.44221503 -0.585441   -0.68333736  0.0525748 ]
 [-0.07766098 -0.57400537  0.23645404  1.56887517  1.20860656]
 [-0.55998354  0.60975515 -0.7156267  -0.21337678  0.49044779]
 [-2.0269999  -2.51646597 -1.69812585 -0.76285356 -1.44541543]
 [-1.45676873  0.24187958 -0.6848779  -0.82650667 -0.61850262]]
Latent representation shape: (292, 214) (should be [samples, nchannel])
[b'alice_', b'says_', b'the_', b'ability_', b'to_', b'work_', b'without_', b'supervision_', b'is_', b'noteworthy_']
Audio sequence: []
Length of phoneme sequence: 292
[b'ae', b'ae', b'ae', b'ae', b'ae', b'ae', b'ae', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'ih', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b's', b'eh', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'z', b'dh', b'dh',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 48 exists.
Downsampling signals to 101.7 Hz; please wait...
(219, 214)

------------------------
For sentence 0: 
[[-1.23889442 -0.82748512 -1.0369387  -0.04111233 -1.25889987]
 [ 0.33326584  0.57607011 -0.02739603 -0.89719428 -0.66077803]
 [ 1.33747614  0.68319634 -1.53021351  0.4835631   0.39760142]
 [-0.8359601   0.9502648   0.05950637  0.05952997  1.23040418]
 [ 0.6293539  -0.96393483 -0.41540357  0.06582482  1.04433476]]
Latent representation shape: (219, 214) (should be [samples, nchannel])
[b'cottage_', b'cheese_', b'with_', b'chives_', b'is_', b'delicious_']
Audio sequence: []
Length of phoneme sequence: 219
[b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b't', b't', b't', b't', b'ax', b'ax', b'jh', b'jh', b'jh', b'jh', b'jh', b'jh', b'jh', b'jh', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch', b'ch

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 50 exists.
Downsampling signals to 101.7 Hz; please wait...
(321, 214)

------------------------
For sentence 0: 
[[-0.51145578 -0.25647485  0.61950936  1.58782537 -0.25555428]
 [ 0.24971227 -0.00348718  0.3943471  -0.35629888  0.44011981]
 [-0.53180994 -1.82929345 -0.77470905 -0.56304657  0.03302896]
 [-0.48764272 -1.67895954 -0.70169943 -0.7073701  -0.22362874]
 [-2.30421036  0.43633922 -0.67448874 -0.4304553   0.71741121]]
Latent representation shape: (321, 214) (should be [samples, nchannel])
[b'growing_', b'well_', b'kept_', b'gardens_', b'is_', b'very_', b'time_', b'consuming_']
Audio sequence: []
Length of phoneme sequence: 321
[b'pau', b'pau', b'g', b'g', b'g', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ih', b'ih', b'ih', b'ih', b'ih', b'ng', b'ng', b'ng', b'ng', b'ng', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 52 exists.
Downsampling signals to 101.7 Hz; please wait...
(176, 214)

------------------------
For sentence 0: 
[[ 0.86383866  1.08811024 -0.56668987 -1.50185989  0.28427757]
 [ 0.99131637 -1.47250568 -0.41669163  0.08910224  0.39660588]
 [-0.26116544 -0.61880811 -0.48713703  0.32108262  1.13478455]
 [-0.98597807 -0.64878624 -0.34461723  0.99974583 -0.54715972]
 [ 0.43019491  0.68701875 -0.52884145  1.78286258 -1.39351019]]
Latent representation shape: (176, 214) (should be [samples, nchannel])
[b'the_', b'firemen_', b'are_', b'coming_', b'to_', b'the_', b'rescue_']
Audio sequence: []
Length of phoneme sequence: 176
[b'pau', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'f', b'f', b'f', b'f', b'f', b'f', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'r', b'r', b'r', b'r', b'r', b'm', b'm', b'ax', b'ax', b'ax', b'n', b'n'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 53 exists.
Downsampling signals to 101.7 Hz; please wait...
(213, 214)

------------------------
For sentence 0: 
[[-0.46942837 -0.09019641 -0.6812084   0.28059779 -1.32119313]
 [-0.92415002 -1.37940723 -0.8029339  -0.80469145 -1.56492239]
 [ 1.04371551 -0.16952862 -0.23430596 -0.74584858 -0.33634524]
 [ 0.32009468 -0.91028017 -0.86177972 -0.25646496  0.06990122]
 [ 0.06896321  0.82720053 -1.52327586 -0.47863695 -1.17194809]]
Latent representation shape: (213, 214) (should be [samples, nchannel])
[b'the_', b'firemen_', b'are_', b'coming_', b'to_', b'the_', b'rescue_']
Audio sequence: []
Length of phoneme sequence: 213
[b'pau', b'pau', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 54 exists.
Downsampling signals to 101.7 Hz; please wait...
(211, 214)

------------------------
For sentence 0: 
[[-0.39622818  1.0649384   0.34472826 -1.03732178 -0.5295999 ]
 [-1.19034857 -1.89723818  0.11163016  0.17361347  0.03117632]
 [-0.11023247  0.55327544 -0.0455035   0.15771958 -1.11576745]
 [ 0.95489853  1.65759069 -0.52153575 -0.35582365  0.39288193]
 [-2.40096587 -0.05444235 -1.31048892 -0.13341008 -0.39777782]]
Latent representation shape: (211, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 211
[b'pau', b'pau', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'f', b'f', b'ao', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'ih', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 55 exists.
Downsampling signals to 101.7 Hz; please wait...
(211, 214)

------------------------
For sentence 0: 
[[ 0.96750489  0.78843374  0.19944895  1.36542212 -0.72349423]
 [-1.68915768 -1.38458684  0.03837561  0.45849061 -0.47280318]
 [-0.30101661  0.15121462  1.59700113  1.34830869  0.03865391]
 [-2.77828789 -2.890388    0.32707948 -0.15164133 -0.81400045]
 [ 0.73968398 -0.23581904  0.34275844 -0.52851923 -0.20274175]]
Latent representation shape: (211, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 211
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ay', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'f', b'f', b'ao', b'ao', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 56 exists.
Downsampling signals to 101.7 Hz; please wait...
(225, 214)

------------------------
For sentence 0: 
[[ 0.42255449  0.69364067 -0.02736198  2.72276476  0.77194387]
 [-0.34404989  1.24538041 -0.11098681  0.98572682 -1.13891749]
 [-0.07903083 -0.60855101 -0.20382254  0.74802444 -1.15029689]
 [ 0.22506325 -0.99444383 -0.70450723 -1.21996048 -0.48430173]
 [-0.64437935  0.30700487 -0.21235382 -0.39202363 -0.93869822]]
Latent representation shape: (225, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 225
[b'pau', b'pau', b'pau', b'pau', b'aa', b'aa', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah', b'ah', b'ah', b'v', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'k', b'k', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 59 exists.
Downsampling signals to 101.7 Hz; please wait...
(272, 214)

------------------------
For sentence 0: 
[[ 0.0971455   0.95631154 -1.29126977 -0.18366994  2.47983818]
 [ 0.11170553  0.29770968 -0.38484556  1.53205809  0.83314525]
 [-0.32763316  0.15062355 -0.01506323 -0.18788699  2.20312345]
 [-0.88863946  0.21451113 -0.05097288 -0.43804753 -0.58954308]
 [-0.58518327 -1.02981008  1.31141337 -0.0345869  -0.20828979]]
Latent representation shape: (272, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 272
[b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'p', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 60 exists.
Downsampling signals to 101.7 Hz; please wait...
(221, 214)

------------------------
For sentence 0: 
[[-0.15766836  1.16511574  0.13006223  0.52150539  1.34355553]
 [ 0.23419526  0.96409272  1.66942165 -0.68095427  1.27724385]
 [-0.84744692 -0.78538572  0.4451599  -0.76598307 -0.19657694]
 [-0.39287303  0.14751602 -0.0442403  -1.4275461  -0.86645179]
 [-0.28430017  0.23650251 -0.11286783 -0.00496159 -1.3849281 ]]
Latent representation shape: (221, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 221
[b'pau', b'pau', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'f', b'f', b'f', b'f', b'f', b'ae', b'ae', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'ih', b'i

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 61 exists.
Downsampling signals to 101.7 Hz; please wait...
(202, 214)

------------------------
For sentence 0: 
[[ 0.34153806 -1.28623144 -1.78209005  0.87773367  0.7895579 ]
 [-0.31161715 -1.17221379 -0.20388374  2.35983267  0.24572872]
 [ 1.93338956  0.59439874  0.03138331  1.18290165 -0.69784211]
 [ 0.69910472  0.65283266 -0.24335663  3.03132134  0.19387312]
 [-1.24507261 -0.18493043 -0.58770953  1.6985745  -1.20351234]]
Latent representation shape: (202, 214) (should be [samples, nchannel])
[b'the_', b'firemen_', b'are_', b'coming_', b'to_', b'the_', b'rescue_']
Audio sequence: []
Length of phoneme sequence: 202
[b'pau', b'pau', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'm', b'm', b'ax', b'ax', b'n', b'n', b'n', b'n', b'n', b'n'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 62 exists.
Downsampling signals to 101.7 Hz; please wait...
(205, 214)

------------------------
For sentence 0: 
[[ 1.34129272  0.55077291  1.69343457 -0.21448317  0.80677944]
 [ 0.35261223 -0.7017962   0.73874622  0.04075686  0.9248406 ]
 [ 0.205088   -0.74926036 -0.32461255  0.00606691  0.76374545]
 [-0.07933548 -0.36660617 -1.39517612  0.67813123 -0.02966218]
 [ 0.57296214  1.32193198 -0.26564249  0.31048603 -0.55193461]]
Latent representation shape: (205, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 205
[b'pau', b'pau', b'p', b'p', b'p', b'p', b'p', b'p', b'p', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'v', b'v', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'k', b'k', b'k'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 63 exists.
Downsampling signals to 101.7 Hz; please wait...
(172, 214)

------------------------
For sentence 0: 
[[ 6.36309938e-01  9.53324191e-02 -9.43085346e-01 -4.20318001e-01
  -6.80178402e-01]
 [ 4.17622102e-01 -2.17421133e-01 -1.09475917e+00  1.82664399e-01
   9.59561275e-02]
 [ 1.90090022e+00  5.47771284e-02 -1.28218562e+00  1.50957283e+00
   1.20838254e+00]
 [ 1.00716198e+01 -1.44345934e-02 -3.06667355e-03  5.51415044e-01
   4.69029984e-01]
 [ 5.47363789e+00 -3.12305744e-01  3.48557946e-01  2.65167983e-01
  -5.20821749e-01]]
Latent representation shape: (172, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 172
[b'pau', b'pau', b'p', b'p', b'p', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b't', b'ah', b'v', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'k', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 64 exists.
Downsampling signals to 101.7 Hz; please wait...
(174, 214)

------------------------
For sentence 0: 
[[ 0.22708106 -0.09880038  0.42851376  0.34088113 -0.10246891]
 [ 0.52852698  0.08211416  0.53072331  0.06613796  1.19170678]
 [-0.0842009  -0.22308845 -0.4482189  -0.11330057  2.26437221]
 [ 0.56392663 -1.03689788  1.36804098 -0.36117777  0.58382447]
 [ 0.00641524 -0.47076891  0.08253409 -0.5444534   0.93100172]]
Latent representation shape: (174, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 174
[b'pau', b'pau', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah', b'ah', b'v', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'ey', b'ey', b'ey', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 65 exists.
Downsampling signals to 101.7 Hz; please wait...
(195, 214)

------------------------
For sentence 0: 
[[-0.02376032 -0.64736456  0.27100887 -0.07800324 -0.54176297]
 [-0.02727151  0.09235679 -0.26645209 -0.05426636  0.01121683]
 [-0.03888579 -0.40021823 -0.8436769  -0.09410688  0.71945258]
 [-0.02276378 -0.18998136 -0.76729227 -0.03114368 -0.42975041]
 [-0.03443482  0.1139045  -0.56540593 -0.0806931  -0.5414761 ]]
Latent representation shape: (195, 214) (should be [samples, nchannel])
[b'although_', b'always_', b'alone_', b'we_', b'survive_']
Audio sequence: []
Length of phoneme sequence: 195
[b'pau', b'ao', b'ao', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'dh', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ow', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 70 exists.
Downsampling signals to 101.7 Hz; please wait...
(182, 214)

------------------------
For sentence 0: 
[[ 0.36602983 -1.4268681   1.24317572  0.37042305 -0.60639264]
 [ 0.77281467 -0.50610483 -1.20927704 -0.70074812  0.20253643]
 [ 0.2092567  -0.79146441 -0.16209618  0.93586694  0.32375935]
 [ 0.42579932 -0.86699313  0.43602156  1.04917224 -0.01127042]
 [ 0.63691565 -0.6561043   0.92045134 -0.31732743  0.32597017]]
Latent representation shape: (182, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 182
[b'pau', b'pau', b'pau', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b't', b'ah', b'ah', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'ey', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 73 exists.
Downsampling signals to 101.7 Hz; please wait...
(202, 214)

------------------------
For sentence 0: 
[[-1.15892869  0.99537744  0.5353445  -0.42915642  2.5541394 ]
 [-0.29328869 -2.03489766  1.03971528 -0.22846425  0.9344959 ]
 [ 0.22213068 -0.674518   -0.56946787  0.46609351 -0.36967459]
 [-0.3227471  -1.85105232 -0.62680139  0.24408714  0.27741601]
 [ 1.35091171  0.01622267  0.62107036  1.03074092 -1.02563333]]
Latent representation shape: (202, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 202
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 74 exists.
Downsampling signals to 101.7 Hz; please wait...
(205, 214)

------------------------
For sentence 0: 
[[ 0.5267097   1.33780368 -0.92545334 -0.96778596  1.33161248]
 [ 0.58054595 -0.42513506  1.23566938  1.87951035  0.70644949]
 [ 0.24819844 -2.05530591 -0.27046497  0.69103343 -0.6010678 ]
 [-0.07918322 -1.01344882 -0.02402226 -0.75559797 -0.37452451]
 [ 0.15388923  0.27681102 -0.2863364   0.49654387 -0.64255732]]
Latent representation shape: (205, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 205
[b'pau', b'pau', b'pau', b'pau', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ao', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 75 exists.
Downsampling signals to 101.7 Hz; please wait...
(205, 214)

------------------------
For sentence 0: 
[[-0.86148074 -0.41905521 -0.12468153  0.29385578  0.19096922]
 [ 1.92826354  1.72038072 -0.62478179 -0.34212137 -0.49677661]
 [ 2.3589551   1.91992208 -0.58488904  0.59855465 -0.10818872]
 [ 1.24224394  0.06732622  0.26424282 -0.5672688  -0.75052767]
 [-1.90614794 -1.14886326 -0.07718739  0.2192654  -0.04641718]]
Latent representation shape: (205, 214) (should be [samples, nchannel])
[b'the_', b'firemen_', b'are_', b'coming_', b'to_', b'the_', b'rescue_']
Audio sequence: []
Length of phoneme sequence: 205
[b'pau', b'pau', b'pau', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'ax', b'f', b'f', b'f', b'f', b'f', b'f', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'm', b'ax',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 76 exists.
Downsampling signals to 101.7 Hz; please wait...
(219, 214)

------------------------
For sentence 0: 
[[-6.51526280e-01  1.19670393e-01 -3.83324836e-01 -7.46710563e-01
  -1.14241008e-01]
 [-1.17082316e+00 -4.51373853e-01 -1.00930235e+00 -1.05175878e+00
   1.02183079e+00]
 [ 1.08030994e+00  9.89392430e-01  5.17665547e-01 -1.15184111e-01
   8.23492685e-01]
 [-8.50702123e-02  1.10091486e-01  2.33214027e+00  1.72153366e+00
  -3.75664608e-01]
 [-2.12943503e-03  1.01057434e-01 -5.27241528e-01  8.35697860e-02
  -3.07165681e-01]]
Latent representation shape: (219, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 219
[b'pau', b'pau', b'pau', b'pau', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 77 exists.
Downsampling signals to 101.7 Hz; please wait...
(202, 214)

------------------------
For sentence 0: 
[[ 0.05177581 -0.81719964 -1.50542061  0.41330146 -0.33096448]
 [-0.98659809  0.33288671  5.80347349 -0.24038078  0.79420017]
 [-0.94208022 -0.58099475  8.15733594  4.23422485 -1.03592181]
 [-0.84014665 -0.79606464  4.98292503  2.64988348 -0.38702789]
 [-1.3066976  -0.18670086 -0.24682995 -0.79508602 -0.33455785]]
Latent representation shape: (202, 214) (should be [samples, nchannel])
[b'the_', b'firemen_', b'are_', b'coming_', b'to_', b'the_', b'rescue_']
Audio sequence: []
Length of phoneme sequence: 202
[b'pau', b'pau', b'pau', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'ax', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'm', b'a

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 83 exists.
Downsampling signals to 101.7 Hz; please wait...
(176, 214)

------------------------
For sentence 0: 
[[ 0.91052147  0.05382627 -0.91084605 -0.03589884 -0.95562243]
 [ 0.90123809 -0.41377107  2.26605055  0.0099673   0.10792706]
 [-0.02734018 -0.30558194  0.01146548 -0.08422377  0.33575096]
 [ 0.84516583  0.09922607  0.86410216 -0.01368794  0.92657148]
 [ 0.54465849  0.59100487  1.32882345 -0.09344875  2.2640502 ]]
Latent representation shape: (176, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 176
[b'pau', b'pau', b'pau', b'pau', b'p', b'p', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah', b'v', b'v', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 92 exists.
Downsampling signals to 101.7 Hz; please wait...
(179, 214)

------------------------
For sentence 0: 
[[-0.67787119 -0.35771384  0.21902375  0.13441458 -0.6886965 ]
 [-1.20193986 -1.74308819 -0.49872187 -0.12312695 -0.70839283]
 [ 0.76175608 -0.0220933   0.9020262   0.572033    0.53935357]
 [-0.15601523 -1.45117994  0.22722188 -0.25124784 -0.61896656]
 [ 0.69458143 -1.10548841 -0.19830174 -0.79152811  0.91292448]]
Latent representation shape: (179, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 179
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 93 exists.
Downsampling signals to 101.7 Hz; please wait...
(167, 214)

------------------------
For sentence 0: 
[[-1.16288798  0.37853356  0.91915394  0.08158628 -0.81164354]
 [-1.06968973 -1.28830901 -0.26516499 -0.75046755 -0.05870604]
 [ 1.43536894 -0.05979105 -0.46844236 -0.16014494 -0.58385158]
 [ 1.14046681 -1.46589913  0.71604646 -0.33842694 -0.18013777]
 [ 0.3026269   0.51908166 -0.34511621  0.50421876  0.2684899 ]]
Latent representation shape: (167, 214) (should be [samples, nchannel])
[b'the_', b'firemen_', b'are_', b'coming_', b'to_', b'the_', b'rescue_']
Audio sequence: []
Length of phoneme sequence: 167
[b'pau', b'pau', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'ax', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'm', b'm', b'ax', b'n', b'n', b'n', b'n', b'n', b'n', b'n', b'n', b'n', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 94 exists.
Downsampling signals to 101.7 Hz; please wait...
(173, 214)

------------------------
For sentence 0: 
[[-0.24080679 -0.57363237  1.98438442  0.76379883  0.47509279]
 [-0.00408634 -0.97270972 -0.04592348 -0.22763423 -0.78791286]
 [-1.3805527  -0.0033586   0.00851269  1.10518583 -0.29995528]
 [ 0.45979068 -1.16023231  1.15200259  0.16563014  1.11865901]
 [-0.2943037   0.29306516  0.07128174 -0.01056916 -0.03613902]]
Latent representation shape: (173, 214) (should be [samples, nchannel])
[b'the_', b'firemen_', b'are_', b'coming_', b'to_', b'the_', b'rescue_']
Audio sequence: []
Length of phoneme sequence: 173
[b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'pau', b'pau', b'pau', b'pau', b'pau', b'f', b'f', b'f', b'f', b'f', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'm', b'm', b'm', b'ax', b'ax', b'ax', b'ax',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 95 exists.
Downsampling signals to 101.7 Hz; please wait...
(207, 214)

------------------------
For sentence 0: 
[[-0.20283762 -0.07743061  0.43312349 -0.78651783  1.33327613]
 [-1.47284013  2.17805042 -0.88485699 -0.42688612  3.08531137]
 [ 1.09264548  1.00132463 -0.07699299  0.16851917  1.98056197]
 [ 0.07475757 -0.57619187 -0.89967835 -0.30488796  0.63860243]
 [ 0.34896879 -0.45555047 -0.17857516  0.402321    0.44349856]]
Latent representation shape: (207, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 207
[b'pau', b'pau', b'p', b'p', b'p', b'p', b'p', b'p', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b'ah', b'ah', b'ah', b'v', b'v', b'v', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 97 exists.
Downsampling signals to 101.7 Hz; please wait...
(187, 214)

------------------------
For sentence 0: 
[[-0.73535153 -0.03112504 -0.37052764 -0.17462066  1.79100502]
 [ 0.30666731 -0.4328696   1.08753184 -0.8507759  -0.03268264]
 [ 0.01377521  0.34316133  0.04762123 -1.53853025 -0.16368132]
 [ 0.02870938  0.96620256  0.8721543  -0.01312553 -1.01240891]
 [-0.20637103  1.37783814  0.05275709 -0.18758202  1.75935404]]
Latent representation shape: (187, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 187
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ay', b'l', b'l', b'l', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'f', b'f', b'f', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 98 exists.
Downsampling signals to 101.7 Hz; please wait...
(242, 214)

------------------------
For sentence 0: 
[[ 0.31307505  0.20949681 -0.80007547 -0.0016795  -0.21680095]
 [-0.5390217  -0.43190031 -1.26948287  0.61149969  1.47020015]
 [-0.33544042  1.42354626  0.87946701  0.54571405  1.09620448]
 [-1.11013328 -0.16255547  0.3061319  -0.71862424  1.47291969]
 [-0.59574188 -0.21554854 -0.28776248 -1.27062646  1.02098641]]
Latent representation shape: (242, 214) (should be [samples, nchannel])
[b'a_', b'dog_', b'is_', b'barking_', b'at_', b'the_', b'man_', b'in_', b'the_', b'tree_']
Audio sequence: []
Length of phoneme sequence: 242
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'd', b'd', b'd', b'd', b'd', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'g', b'g', b'g', b'ih', b'ih'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 99 exists.
Downsampling signals to 101.7 Hz; please wait...
(213, 214)

------------------------
For sentence 0: 
[[-0.36006045  1.11721467  1.38695045  0.75292907  1.18719656]
 [ 0.09199895  0.89980813 -0.06395995  0.64645278 -0.17067586]
 [ 0.8863236  -1.96735073  0.08512167  0.60619488 -0.81684226]
 [-0.10697771 -0.23421526  2.02135309  0.64339637 -0.79540377]
 [-0.83271815  2.16593729  0.1580597   2.64884134 -0.49127615]]
Latent representation shape: (213, 214) (should be [samples, nchannel])
[b'water_', b'is_', b'overflowing_', b'from_', b'the_', b'sink_']
Audio sequence: []
Length of phoneme sequence: 213
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b't', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'er', b'ih', b'ih', b'ih', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 100 exists.
Downsampling signals to 101.7 Hz; please wait...
(242, 214)

------------------------
For sentence 0: 
[[ 1.65199801  1.78715443 -0.03320785  0.28083486 -0.74000188]
 [ 1.21473098 -0.3927196   1.36692289  0.53013086 -0.18440017]
 [-0.08447657  0.58650307 -0.14103434 -0.75318823  0.10067113]
 [-0.00277911 -0.05282742 -1.00088897 -0.05962392  0.47824809]
 [ 0.00729972 -0.33612221 -0.83213993 -0.77001162 -0.50500999]]
Latent representation shape: (242, 214) (should be [samples, nchannel])
[b'uh_', b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 242
[b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'p', b'p', b'p', b'p', b'p', b'p'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 101 exists.
Downsampling signals to 101.7 Hz; please wait...
(197, 214)

------------------------
For sentence 0: 
[[ 0.19578547  0.26887366 -0.23403827 -0.42376854 -0.17816345]
 [-1.0261129   0.02024735  0.91201678  0.91514282  0.26813368]
 [-0.0554694  -0.7753868   1.63534733 -0.0573691  -0.17344416]
 [-0.42232813 -0.29096679  1.09223073 -0.43029426  1.29660282]
 [-1.07356186  0.52932704  0.20375316 -0.55060688 -0.43133204]]
Latent representation shape: (197, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 197
[b'pau', b'pau', b'p', b'p', b'p', b'aa', b'aa', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'v', b'v', b'v', b'v', b'v', b'pau', b'pau', b'pau', b'pau', b'pau', b'dh', b'dh', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 108 exists.
Downsampling signals to 101.7 Hz; please wait...
(211, 214)

------------------------
For sentence 0: 
[[-0.61297165 -0.46727397 -0.70087131 -0.5991345  -0.50988324]
 [-0.60011164 -0.62185656  1.27681507 -0.4314712  -0.92752688]
 [-0.76687226  0.48533851 -1.07168796 -0.45442423 -1.00653395]
 [-0.59365917 -1.14220176 -0.56261409 -0.4812386  -0.49200679]
 [-0.55318428 -0.1595358   0.01831087 -0.43382908  0.66464524]]
Latent representation shape: (211, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 211
[b'p', b'p', b'p', b'p', b'p', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'ah', b'ah', b'ah', b'v', b'v', b'v', b'v', b'v', b'v', b'v', b'v', b'v', b'dh', b'ax', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', 

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 109 exists.
Downsampling signals to 101.7 Hz; please wait...
(254, 214)

------------------------
For sentence 0: 
[[ 0.70543119 -0.47780879  1.07383503 -0.04149201 -0.1230665 ]
 [-0.10063942  0.93617339 -0.61935003 -0.08366561 -0.31004105]
 [-1.07646696 -0.89583995 -0.07591583 -0.02857258 -0.95662945]
 [ 0.28925964 -0.03011316 -0.05126126 -0.05218206  0.3078664 ]
 [-0.61983609 -0.76301223  0.09267884 -0.09101689 -0.85298514]]
Latent representation shape: (254, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 254
[b'pau', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'ah', b'v', b'v', b'v', b'v', b'v', b'v', b'v', b'dh', b'dh', b'dh', b'dh', b'ax', b'ax', b'ax', b'k', b'k',

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 110 exists.
Downsampling signals to 101.7 Hz; please wait...
(178, 214)

------------------------
For sentence 0: 
[[ 1.12825643  0.5722287   1.18392769  0.12702569 -0.71693066]
 [ 1.28547897  0.82354191 -0.40188094 -0.12461238 -0.43685084]
 [-0.99245767  1.30833755  0.01297835 -0.00612624 -0.3581324 ]
 [-0.71770217  1.0151792   0.37561825 -0.13969339 -0.68170711]
 [-0.68647932 -0.23849366 -0.50629206 -0.02683799 -0.29951016]]
Latent representation shape: (178, 214) (should be [samples, nchannel])
[b'the_', b'firemen_', b'are_', b'coming_', b'to_', b'the_', b'rescue_']
Audio sequence: []
Length of phoneme sequence: 178
[b'pau', b'pau', b'dh', b'dh', b'ax', b'ax', b'ax', b'ax', b'ax', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'ay', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'm', b'm', b'ax', b'n', b'n', b'aa', b'aa', b'aa', b'aa', b'aa', b'aa', b'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 111 exists.
Downsampling signals to 101.7 Hz; please wait...
(188, 214)

------------------------
For sentence 0: 
[[ 4.84181313e-01  2.08199222e+00 -5.40322972e-01 -1.66489802e-01
  -2.94578348e-01]
 [-1.72079436e-01 -1.09853421e-01  7.88629680e-02 -1.37718746e-01
   5.15386812e-01]
 [ 4.66200616e-01  1.02345497e+00  3.31990766e+00 -1.86861626e-01
   3.51979293e-01]
 [ 1.40134941e-01  3.10798798e+00  2.77137986e-01 -1.40126616e-01
  -1.32192510e+00]
 [-3.06244885e-01 -6.79807306e-01  7.11435234e-01 -1.60584850e-01
   2.08370449e-03]]
Latent representation shape: (188, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 188
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ay', b'ay', b'l', b'f', b'f', b'f', b'f', b'f', b'f'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 112 exists.
Downsampling signals to 101.7 Hz; please wait...
(200, 214)

------------------------
For sentence 0: 
[[-0.44341889 -0.44142264 -0.99302765 -0.12473678  2.05129502]
 [-0.18212832 -1.09866588 -0.56350538 -0.1195142   3.09781308]
 [-0.68469495  0.40659029 -1.55853688 -0.10987142  0.20729424]
 [-0.18415339  0.86412892  0.01396846 -0.11489124  0.34925406]
 [ 0.45657905 -0.79398152  2.24262115 -0.11988181 -0.43533089]]
Latent representation shape: (200, 214) (should be [samples, nchannel])
[b'part_', b'of_', b'the_', b'cake_', b'was_', b'eaten_', b'by_', b'the_', b'dog_']
Audio sequence: []
Length of phoneme sequence: 200
[b'pau', b'pau', b'pau', b'aa', b'aa', b'aa', b'aa', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b'r', b't', b't', b'ah', b'v', b'v', b'v', b'v', b'v', b'dh', b'dh', b'ax', b'ax', b'ax', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'k', b'ey', b'ey', b'ey', b'ey

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 113 exists.
Downsampling signals to 101.7 Hz; please wait...
(183, 214)

------------------------
For sentence 0: 
[[ 2.67692677  1.46204889  0.03319829 -0.04864118  0.2531564 ]
 [ 0.78060384 -0.65127653 -0.84715966 -0.02737277 -0.74099513]
 [-0.37675714 -0.82734106 -0.64295462 -0.05567038 -0.22220592]
 [ 0.69155896  0.91393638  0.53783711 -0.03097598  0.55297484]
 [ 0.35470332 -0.6062062   1.43599021 -0.05630138 -0.17985966]]
Latent representation shape: (183, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 183
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'f', b'f', b'f', b'f', b'f', b'ao', b'ao', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 114 exists.
Downsampling signals to 101.7 Hz; please wait...
(185, 214)

------------------------
For sentence 0: 
[[-0.11575077 -1.1497878  -0.52806964 -0.11036315  0.04414395]
 [-0.0392771  -0.1235964   0.62004747 -0.08861924 -0.92007914]
 [ 0.22017311  0.5101905   0.8515937  -0.11546724  0.58498108]
 [ 0.21022731  1.75277633  0.1711094  -0.10298338  1.22349926]
 [ 0.93189159 -0.02910567  1.43093386 -0.11877499  0.30755808]]
Latent representation shape: (185, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 185
[b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'w', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'f', b'f', b'f', b'f', b'f', b'f', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'ih', b'ih', b'ih'

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Phoneme transcription for block 115 exists.
Downsampling signals to 101.7 Hz; please wait...
(181, 214)

------------------------
For sentence 0: 
[[-0.42352699 -1.46304704 -1.03524411 -0.11510974 -1.0099619 ]
 [ 0.58753764 -0.26799196  0.1730179  -0.10763705 -0.59602263]
 [-0.3781208  -0.22809991  0.49035204 -0.10530593 -0.49308917]
 [-0.31552593 -0.23906713 -1.28238471 -0.10516713 -0.69748389]
 [ 0.27166109  1.25730326 -0.90660601 -0.10758943  1.14763952]]
Latent representation shape: (181, 214) (should be [samples, nchannel])
[b'while_', b'falling_', b'the_', b'boy_', b'grabs_', b'a_', b'cookie_']
Audio sequence: []
Length of phoneme sequence: 181
[b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'pau', b'ay', b'ay', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'f', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'ao', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b'l', b