# Batches v2

In [90]:
import tensorflow as tf

def process_path(paths, labels):
    features = []
    # load waveforms in memory
    for path in paths:
        raw_audio = tf.io.read_file(path)
        waveform, sr = tf.audio.decode_wav(raw_audio)
        features.append(waveform)

    tf.print(features[1], len(features[0]), len(features[1]))
    features = tf.keras.preprocessing.sequence.pad_sequences(features, dtype='float', padding='post', truncating='post')
    tf.print(features[1], len(features[0]), len(features[1]))
    return features, labels

def tf_function_wrapper(features, labels):
    return tf.py_function(func=process_path, inp=[features['filename'], labels], Tout=tf.int32), labels

librivox_paths_ds = tf.data.experimental.make_csv_dataset(
    "librivox-test-clean-wav.csv", batch_size=4,
    shuffle=False, label_name="transcript")

librivox_ds = librivox_paths_ds.map(tf_function_wrapper)

for feature_batch, labels in librivox_ds.take(1):
    tf.print(feature_batch[0])

    

[[-0.00778198242]
 [-0.00921630859]
 [-0.00909423828]
 ...
 [-0.00262451172]
 [-0.00671386719]
 [-0.0113830566]] 97200 98480
array([[-0.00778198],
       [-0.00921631],
       [-0.00909424],
       ...,
       [-0.00262451],
       [-0.00671387],
       [-0.01138306]]) 98480 98480
[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]


In [51]:
for feature_batch in librivox_ds.take(1):
    print(feature_batch)
    for key in feature_batch.numpy():
        print(key)

[[-0.00106811523]
 [-0.000671386719]
 [-0.000823974609]
 ...
 [-0.00183105469]
 [-0.00204467773]
 [-0.00119018555]]
[[-0.00100708008]
 [-0.00103759766]
 [-0.00103759766]
 ...
 [-0.00149536133]
 [-0.00140380859]
 [-0.00134277344]]
[[0.0022277832]
 [0.00152587891]
 [0.000732421875]
 ...
 [0.000152587891]
 [-0.000396728516]
 [0.00180053711]]
[[-0.000823974609]
 [-0.000885009766]
 [-0.000854492188]
 ...
 [-0.000366210938]
 [-0.000457763672]
 [-0.000549316406]]
tf.Tensor(
[[[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]

 [[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]

 [[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]

 [[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]], shape=(4, 213920, 1), dtype=int32)
[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]
[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]
[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]
[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]


In [91]:
import tensorflow as tf
t = tf.constant([[1, 2, 3], [4, 5, 6]])
paddings = tf.constant([[0, 0], [0, 2]])
# 'constant_values' is 0.
# rank of 't' is 2.
a = tf.pad(t, paddings, "CONSTANT")  # [[0, 0, 0, 0, 0, 0, 0],
                                 #  [0, 0, 1, 2, 3, 0, 0],
                                 #  [0, 0, 4, 5, 6, 0, 0],
                                 #  [0, 0, 0, 0, 0, 0, 0]]
a

<tf.Tensor: shape=(2, 5), dtype=int32, numpy=
array([[1, 2, 3, 0, 0],
       [4, 5, 6, 0, 0]], dtype=int32)>

In [94]:
from soundfile import read
path = '/Users/gabriel.t.nishimura/projects/masters/ctc/data_dir/LibriSpeech/test-clean-new/61-70970-0000.wav'
read(path)

(array([ 0.00073242,  0.00082397,  0.0017395 , ..., -0.00631714,
        -0.00570679, -0.00668335]),
 16000)

In [102]:
import tensorflow as tf
raw_audio = tf.io.read_file(path)
waveform = tf.audio.decode_wav(raw_audio)
transwav = tf.transpose(waveform[0])
stfts = tf.signal.stft(transwav, frame_length=1024, frame_step=256, fft_length=1024)
stfts

<tf.Tensor: shape=(1, 376, 513), dtype=complex64, numpy=
array([[[-6.1453873e-01+0.0000000e+00j,  3.9627367e-01-2.5253747e-02j,
         -1.2849604e-01-9.8455995e-02j, ...,
          6.5933913e-04-5.0569773e-03j,  3.5126954e-03+3.7779622e-03j,
         -5.3518414e-03+0.0000000e+00j],
        [-6.2469614e-01+0.0000000e+00j,  2.0468718e-01+4.9309013e-03j,
          6.7919180e-02-1.4873762e-01j, ...,
         -1.1299253e-03+6.2555000e-03j,  3.6285445e-03-3.7918477e-03j,
         -5.4290295e-03+0.0000000e+00j],
        [-7.4047655e-01+0.0000000e+00j,  4.1499108e-01-1.4053226e-01j,
         -1.8156120e-01+1.0911730e-01j, ...,
         -3.7077069e-04-3.1395219e-03j, -1.3211370e-04+5.2835792e-04j,
         -2.2668242e-03+0.0000000e+00j],
        ...,
        [-1.7547101e+00+0.0000000e+00j,  1.0129362e+00-2.3802727e-02j,
         -4.9253292e-02-2.9792801e-01j, ...,
          4.9935952e-03+2.9322207e-03j, -1.4759898e-03-1.3403771e-03j,
          1.4561415e-03+0.0000000e+00j],
        [-1.624615

In [15]:
from soundfile import read
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from librosa.feature import mfcc, melspectrogram

batch_size=4
frame_length=320
hop_length=160
n_mels=40
mfcc_features=26
shuffle=True

def read_and_aggregate(features):
    sr = 0
    batch_spectograms = []
    max_length = 0
    for index, value in enumerate(features.numpy()):
        # open sound file
        spectogram_frames, sr = read(value.decode('UTF-8'))
        if len(spectogram_frames) > max_length:
            max_length = len(spectogram_frames)

        # add it to the array
        batch_spectograms.append(spectogram_frames)
    
    return batch_spectograms, sr, max_length

def inner_test(features, labels):
    x_data_raw, sr, max_length = read_and_aggregate(features)

    # converting into tensor
    new_features = []
    padded = None
    for spectogram in x_data_raw:
        pre_tensor = np.asarray(spectogram).astype(np.float32)
        new_features.append(tf.convert_to_tensor(pre_tensor,
                                                 dtype=np.float32))
        paddings = tf.constant([[0, 0,], [0, max_length]])
        padded = tf.pad(new_features, paddings, "CONSTANT")
        print(padded)
        print(len(labels))

    return paddings, labels

def test(features, labels):
    return tf.py_function(func=inner_test, inp=[features['filename'], labels], Tout=tf.float32), labels

for feature_batch, label_batch in librivox_ds.map(test).take(1):
    for key in feature_batch.numpy():
        print(key)


[ 0.          0.00039673 -0.00018311 ...  0.00167847  0.00222778
  0.00228882]
tf.Tensor(
[[ 0.          0.00039673 -0.00018311 ...  0.          0.
   0.        ]], shape=(1, 224560), dtype=float32)
4


UnknownError: InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [41760] != values[1].shape = [47920] [Op:Pack] name: packed
Traceback (most recent call last):

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/script_ops.py", line 234, in __call__
    return func(device, token, args)

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/script_ops.py", line 123, in __call__
    ret = self._func(*args)

  File "<ipython-input-15-747ac09c10a5>", line 40, in inner_test
    padded = tf.pad(new_features, paddings, "CONSTANT")

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/array_ops.py", line 2873, in pad_v2
    return pad(tensor, paddings, mode, name, constant_values)

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/array_ops.py", line 2939, in pad
    result = gen_array_ops.pad(tensor, paddings, name=name)

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_array_ops.py", line 5784, in pad
    input, paddings, name=name, ctx=_ctx)

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_array_ops.py", line 5806, in pad_eager_fallback
    _attr_T, (input,) = _execute.args_to_matching_eager([input], ctx)

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/eager/execute.py", line 263, in args_to_matching_eager
    t, dtype, preferred_dtype=default_dtype, ctx=ctx))

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 1314, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/array_ops.py", line 1368, in _autopacking_conversion_function
    return _autopacking_helper(v, dtype, name or "packed")

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/array_ops.py", line 1274, in _autopacking_helper
    return gen_array_ops.pack(list_or_tuple, name=name)

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_array_ops.py", line 5693, in pack
    _ops.raise_from_not_ok_status(e, name)

  File "/Users/gabriel.t.nishimura/miniconda3/envs/usp/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 6606, in raise_from_not_ok_status
    six.raise_from(core._status_to_exception(e.code, message), None)

  File "<string>", line 3, in raise_from

tensorflow.python.framework.errors_impl.InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [41760] != values[1].shape = [47920] [Op:Pack] name: packed


	 [[{{node EagerPyFunc}}]]