In [2]:
#  Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
# ==============================================================================
"""Using this class, we can extract features from the text and audio input."""
!pip install tensorflow==1.12.0
import tensorflow as tf
print(tf.__version__)
# since tensorflow 2 is not supported by the model

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import __future__
# future statements like absolute_import, division and print_function required

import codecs
import numpy as np

def spectrogram_features(samples, samplerate, stridems=10.0,
                                windowms=20.0, freqmax=None, eps=1e-14):
  """Compute the spectrograms for the input samples(waveforms)."""
  
  # samplerate is the number of times a signal is read in a second (eg n, then the signal is sampled every 1/n seconds)
  # windowms is the number of samples(window duration) in ms

  # samplerate/2 is the Nyquist frequency
  if freqmax is None:
    freqmax = samplerate / 2

  # raising exception when maximum freq is above Nyquist frequency  
  if freqmax > samplerate / 2:
    raise ValueError("freqmax must not be greater than half of sample rate.")

  if stridems > windowms:
    raise ValueError("Stride size must not be greater than window size.")

  # stride_size is the frame (window) overlap amplitudes (frequency bins)
  # each window is going to have window_size amplitudes
  stride_size = int(0.001 * samplerate * stridems)
  window_size = int(0.001 * samplerate * windowms)

  # Extract strided windows
  truncate_size = (len(samples) - window_size) % stride_size
  samples = samples[:len(samples) - truncate_size]
  nshape = (window_size, (len(samples) - window_size) // stride_size + 1)
  nstrides = (samples.strides[0], samples.strides[0] * stride_size)
  windows = np.lib.stride_tricks.as_strided(
      samples, shape=nshape, strides=nstrides)
  assert np.all(
      windows[:, 1] == samples[stride_size:(stride_size + window_size)])

  # Window weighting, squared Fast Fourier Transform (fft), scaling
  # Weighting window is used here to handle discontinuity of this small signal
  weighting = np.hanning(window_size)[:, None]
  fft = np.fft.rfft(windows * weighting, axis=0)
  fft = np.absolute(fft)
  fft = fft**2
  scale = np.sum(weighting**2) * samplerate
  fft[1:-1, :] *= (2.0 / scale)
  fft[(0, -1), :] /= scale
  # Prepare fft frequency list
  freqs = float(samplerate) / window_size * np.arange(fft.shape[0])

  # Compute spectrogram feature
  ind = np.where(freqs <= freqmax)[0][-1] + 1
  specgram = np.log(fft[:ind, :] + eps)
  return np.transpose(specgram, (1, 0))


class Audio_Featurizer(object):
  """Class to extract spectrogram features from the audio input."""

  def __init__(self,
               samplerate=16000,
               windowms=20.0,
               stridems=10.0):
    
    # samplerate: an integer specifying the sample rate of the input waveform.
    # windowms: an integer for the length of a spectrogram frame, in ms.
    # stridems: an integer for the frame stride, in ms.
    
    self.samplerate = samplerate
    self.windowms = windowms
    self.stridems = stridems


def label_feature(text, token_to_index):
  """Convert string to a list of integers."""

  # removing all the leading and trailing spaces from a string and then converting into lowercase and making a list of it
  tokens = list(text.strip().lower())
  features = [token_to_index[token] for token in tokens]
  return features


class TextFeaturizer(object):
  """Extract text feature based on char-level granularity.
  By looking up the vocabulary table, each input string (one line of transcript)
  will be converted to a sequence of integer indexes.
  """

  def __init__(self, vocab_file):
    lines = []
    with codecs.open(vocab_file, "r", "utf-8") as fin:
      lines.extend(fin.readlines())
    self.token_to_index = {}
    self.index_to_token = {}
    self.speech_labels = ""
    index = 0
    
    for line in lines:
      line = line[:-1]  
      # Strip the '\n' char.
      
      if line.startswith("#"):
        # Skip from reading comment line.
        continue
      self.token_to_index[line] = index
      self.index_to_token[index] = line
      self.speech_labels += line
      index += 1

Collecting tensorflow==1.12.0
[?25l  Downloading https://files.pythonhosted.org/packages/22/cc/ca70b78087015d21c5f3f93694107f34ebccb3be9624385a911d4b52ecef/tensorflow-1.12.0-cp36-cp36m-manylinux1_x86_64.whl (83.1MB)
[K     |████████████████████████████████| 83.1MB 36kB/s 
Collecting tensorboard<1.13.0,>=1.12.0
[?25l  Downloading https://files.pythonhosted.org/packages/07/53/8d32ce9471c18f8d99028b7cef2e5b39ea8765bd7ef250ca05b490880971/tensorboard-1.12.2-py3-none-any.whl (3.0MB)
[K     |████████████████████████████████| 3.1MB 34.9MB/s 
Installing collected packages: tensorboard, tensorflow
  Found existing installation: tensorboard 1.15.0
    Uninstalling tensorboard-1.15.0:
      Successfully uninstalled tensorboard-1.15.0
  Found existing installation: tensorflow 1.15.2
    Uninstalling tensorflow-1.15.2:
      Successfully uninstalled tensorflow-1.15.2
Successfully installed tensorboard-1.12.2 tensorflow-1.12.0


1.15.2
