In [1]:
!pip install tensorflow==1.14
!pip install -U -q keras
!pip install -U -q pandas
!pip install -U -q numpy

Collecting tensorflow==1.14
[?25l  Downloading https://files.pythonhosted.org/packages/de/f0/96fb2e0412ae9692dbf400e5b04432885f677ad6241c088ccc5fe7724d69/tensorflow-1.14.0-cp36-cp36m-manylinux1_x86_64.whl (109.2MB)
[K     |████████████████████████████████| 109.2MB 69kB/s 
Collecting tensorboard<1.15.0,>=1.14.0
[?25l  Downloading https://files.pythonhosted.org/packages/91/2d/2ed263449a078cd9c8a9ba50ebd50123adf1f8cfbea1492f9084169b89d9/tensorboard-1.14.0-py3-none-any.whl (3.1MB)
[K     |████████████████████████████████| 3.2MB 32.8MB/s 
[?25hCollecting tensorflow-estimator<1.15.0rc0,>=1.14.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/3c/d5/21860a5b11caf0678fbc8319341b0ae21a07156911132e0e71bffed0510d/tensorflow_estimator-1.14.0-py2.py3-none-any.whl (488kB)
[K     |████████████████████████████████| 491kB 52.0MB/s 
Installing collected packages: tensorboard, tensorflow-estimator, tensorflow
  Found existing installation: tensorboard 1.15.0
    Uninstalling tensorboa

In [2]:
# Download the DataSet file from public google drive folder
# The direct download link was generated via https://www.gdirect.link/

# https://stackoverflow.com/a/49576950/289970
import os
import urllib.request
urllib.request.urlretrieve('https://drive.google.com/uc?export=download&id=1s6MbI8_Dw9jBp7LLwWycvfs_-kOqisET', 'DataSet.zip')
!ls

DataSet.zip  sample_data


In [3]:
# Unzip the dataset and remove the zip file
!unzip DataSet.zip
!rm DataSet.zip

Archive:  DataSet.zip
   creating: DataSet/
  inflating: DataSet/.DS_Store       
  inflating: __MACOSX/DataSet/._.DS_Store  
  inflating: DataSet/class_labels_indices.csv  
   creating: DataSet/train/
   creating: DataSet/val/
  inflating: DataSet/train/3b.tfrecord  
  inflating: __MACOSX/DataSet/train/._3b.tfrecord  
  inflating: DataSet/train/3C.tfrecord  
  inflating: __MACOSX/DataSet/train/._3C.tfrecord  
  inflating: DataSet/train/4e.tfrecord  
  inflating: __MACOSX/DataSet/train/._4e.tfrecord  
  inflating: DataSet/train/4D.tfrecord  
  inflating: __MACOSX/DataSet/train/._4D.tfrecord  
  inflating: DataSet/train/0_.tfrecord  
  inflating: __MACOSX/DataSet/train/._0_.tfrecord  
  inflating: DataSet/train/2Q.tfrecord  
  inflating: __MACOSX/DataSet/train/._2Q.tfrecord  
  inflating: DataSet/train/2p.tfrecord  
  inflating: __MACOSX/DataSet/train/._2p.tfrecord  
  inflating: DataSet/train/0L.tfrecord  
  inflating: __MACOSX/DataSet/train/._0L.tfrecord  
  inflating: DataSet/train/0

In [4]:
!ls

DataSet  __MACOSX  sample_data


In [0]:
def fetch_model():
  new_model = Sequential()
  new_model.add(BatchNormalization(input_shape=(10, 128)))  # The input shape excludes batch
  new_model.add(Flatten())
  new_model.add(Dense(2048, activation="relu"))
  new_model.add(Dense(audioset_label_count, activation="sigmoid"))
  new_model.compile("adam", loss="binary_crossentropy", metrics=["accuracy"])
  return new_model


def parser(record, training=True, total_label_count=528):
  context_features = {
    "start_time_seconds": tf.FixedLenFeature([], dtype=tf.float32),
    "end_time_seconds": tf.FixedLenFeature([], dtype=tf.float32),
    "video_id": tf.FixedLenFeature([], dtype=tf.string),
  }
  sequence_features = {
    "audio_embedding": tf.FixedLenSequenceFeature([], dtype=tf.string)
  }

  # In training mode labels will be returned, otherwise they won't be
  if training:
    context_features["labels"] = tf.VarLenFeature(tf.int64)

  context_parsed, sequence_parsed = tf.parse_single_sequence_example(record, context_features, sequence_features)

  x = sequence_parsed['audio_embedding']
  if training:
    y = tf.sparse_to_dense(context_parsed["labels"].values, [total_label_count], 1)
    return x, y
  else:
    return x

    
def make_dataset_provider(tf_records, repeats=1000, num_parallel_calls=12, batch_size=32, total_label_count=100):
  def my_parser(record): return parser(record, total_label_count=total_label_count)

  dataset = tf.data.TFRecordDataset(tf_records)
  dataset = dataset.map(map_func=my_parser, num_parallel_calls=num_parallel_calls)
  dataset = dataset.repeat(repeats)

  dataset = dataset.shuffle(buffer_size=1000)
  dataset = dataset.batch(batch_size)
  d_iter = dataset.make_one_shot_iterator()
  return d_iter


def data_generator(tf_records, batch_size=1, repeats=1000, num_parallel_calls=12, total_label_count=528):
  """
  :return: Data in shape (batch_size, n_frames=10, 128 features - 1 byte each)
  """
  tf_provider = make_dataset_provider(tf_records, repeats=repeats, num_parallel_calls=num_parallel_calls, batch_size=batch_size,
                                      total_label_count=total_label_count)
  sess = tf.compat.v1.Session()

  next_el = tf_provider.get_next()
  max_frames = 10
  while True:
    try:
      raw_x, y = sess.run(next_el)  # returns (batch_size, n_frames, 128)
      x = []
      for entry in raw_x:
        n_frames = entry.shape[0]  # Entry has a shape (n_frames, )
        audio_frame = []
        for i_frame in range(n_frames):
          frame = np.frombuffer(entry[i_frame], np.uint8).astype(np.float32)
          # print("trigger appending a frame of size {}".format(len(float_frames)))
          audio_frame.append(frame)

        if n_frames < max_frames:
          pad = [np.zeros([128], np.float32) for i in range(max_frames-n_frames)]
          audio_frame += pad

        x.append(audio_frame)

      # print("audio_frame.shape=({}, {}, {}). y.shape()={}".format(len(x), len(x[0]), len(x[0][0]), len(y)))
      for i in range(len(x)):
        if len(x[i]) != 10:
          print("ERROR-1")
        for j in range(len(x[i])):
          if len(x[i][j]) != 128:
            print("ERROR-2")

      yield np.array(x), np.array(y)
    except tf.errors.OutOfRangeError:
      print("Iterations exhausted")
      break

In [6]:
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Flatten

import tensorflow as tf
import os
import pandas as pd
import numpy as np

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [7]:
dirname = os.path.abspath('')
bal_train_folder = os.path.join(dirname, 'DataSet/train')
eval_folder = os.path.join(dirname, 'DataSet/val')
print('Train folder: ', bal_train_folder)
print('Eval folder: ', eval_folder)

def get_record(folder, file_names):
  files_glob = []
  for name in file_names:
    files_glob.append("{}/{}".format(folder, name))

  return files_glob


def get_bal_record(file_names):
  return get_record(bal_train_folder, file_names)


def get_eval_record(file_names):
  return get_record(eval_folder, file_names)

Train folder:  /content/DataSet/train
Eval folder:  /content/DataSet/val


In [11]:
train_data = []
eval_data = []
batch_size = 3
audioset_label_count = 528

for file in os.listdir(bal_train_folder):
  if file.endswith(".tfrecord"):
    train_data.append(os.path.join(bal_train_folder, file))

for file in os.listdir(eval_folder):
  if file.endswith(".tfrecord"):
    eval_data.append(os.path.join(eval_folder, file))

print('Total training file records: ', len(train_data))
print('Total eval file records: ', len(eval_data))

Total training file records:  219
Total eval file records:  764


In [12]:
train_generator = data_generator(train_data, batch_size=batch_size, num_parallel_calls=1)
validation_generator = data_generator(eval_data, batch_size=batch_size, num_parallel_calls=1)
model = fetch_model()
model.summary()

model.fit_generator(train_generator,  
                    steps_per_epoch=len(train_data),
                    epochs=10,
                    validation_data=validation_generator,
                    validation_steps=10)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_2 (Batch (None, 10, 128)           512       
_________________________________________________________________
flatten_2 (Flatten)          (None, 1280)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 2048)              2623488   
_________________________________________________________________
dense_4 (Dense)              (None, 528)               1081872   
Total params: 3,705,872
Trainable params: 3,705,616
Non-trainable params: 256
_________________________________________________________________
Epoch 1/10
 20/219 [=>............................] - ETA: 16s - loss: 0.1337 - accuracy: 0.9518

InvalidArgumentError: ignored

In [0]:

# pip show tensorflow

Name: tensorflow
Version: 1.14.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/lib/python3.6/dist-packages
Requires: wheel, tensorflow-estimator, grpcio, numpy, termcolor, keras-applications, google-pasta, astor, protobuf, keras-preprocessing, six, absl-py, tensorboard, wrapt, gast
Required-by: stable-baselines, magenta, fancyimpute
