In [1]:
!pip install -qq -U cufflinks

[?25l[K     |████                            | 10kB 13.9MB/s eta 0:00:01[K     |████████                        | 20kB 2.2MB/s eta 0:00:01[K     |████████████                    | 30kB 3.2MB/s eta 0:00:01[K     |████████████████                | 40kB 2.1MB/s eta 0:00:01[K     |████████████████████▏           | 51kB 2.6MB/s eta 0:00:01[K     |████████████████████████▏       | 61kB 3.1MB/s eta 0:00:01[K     |████████████████████████████▏   | 71kB 3.6MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 3.8MB/s 
[?25h  Building wheel for cufflinks (setup.py) ... [?25l[?25hdone


In [0]:
import tensorflow as tf

In [0]:
if tf.io.gfile.exists('./fenwicks'):
  tf.io.gfile.rmtree('./fenwicks')
!git clone -q https://github.com/fenwickslab/fenwicks.git

In [4]:
from IPython.display import Audio
from scipy.io import wavfile
import fenwicks as fw
import os
import functools
import numpy as np

In [5]:
ROOT_DIR = 'gs://gs_colab'
PROJECT = 'tutorial6'
BATCH_SIZE = 128 #@param ["128", "256", "512"] {type:"raw"}
EPOCHS = 24 #@param {type:"slider", min:0, max:100, step:1}
LEARNING_RATE = 0.001 #@param ["0.001", "0.01", "0.1"] {type:"raw"}
WARMUP = 0.1 #@param {type:"slider", min:0, max:0.5, step:0.05}

In [6]:
fw.colab_utils.setup_gcs()

In [7]:
data_dir, work_dir = fw.io.get_project_dirs(ROOT_DIR, PROJECT)

In [8]:
data_dir_local = fw.datasets.untar_data(fw.datasets.URLs.SPEECH_CMD_001, './speech001')

Downloading data from http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz


In [9]:
example_audio_fn = os.path.join(data_dir_local, 'happy/012c8314_nohash_0.wav')
Audio(example_audio_fn)

In [10]:
SAMPLE_RATE = 16000
NOISE_DIR = '_background_noise_'
train_fn = os.path.join(data_dir, 'speech001_images_train')
valid_fn = os.path.join(data_dir, 'speech001_images_valid')

In [11]:
def gen_silence():
  NUM_SEGMENT = 400
  NUM_SYNTH = 500

  path = os.path.join(data_dir_local, NOISE_DIR)
  out_path = os.path.join(data_dir_local, 'silence/')

  fw.io.create_clean_dir(out_path)  
  files = fw.io.enum_files(path, 'wav')
  
  for filename in files:
    _, samples = wavfile.read(filename)
    for i in range(NUM_SEGMENT):
      out_name = f'segment_{i}_{os.path.basename(filename)}'
      data = (samples[i * 200: i * 200 + SAMPLE_RATE] 
              * max(0, 2 * (np.random.random() - 0.25))).astype('int16')
      if data.max() != 0:
        wavfile.write(out_path + out_name, SAMPLE_RATE, data)

  for i in range(NUM_SYNTH):
    d = fw.audio.gen_synth_silence(sr = SAMPLE_RATE, n_rand = 4600)
    wavfile.write(os.path.join(out_path, f'new_synthesized_{i}.wav'), SAMPLE_RATE, d)

In [12]:
gen_silence()


Chunk (non-data) not understood, skipping it.



In [13]:
x_example = fw.audio.read_logmelspectrogram(example_audio_fn)
x_example.shape

(40, 101)

The following step runs the audio-to-image extractor on the CPU. On Google Colab, sometimes this takes a very long time (like 3 hours) since Colab limits CPU usage. If you are lucky, it can also finish quickly.

In [14]:
paths_train, paths_valid, y_train, y_valid, labels = fw.data.data_dir_tfrecord_split(data_dir_local, train_fn, valid_fn,  
  extractor=fw.audio.read_logmelspectrogram, file_ext='wav', exclude_dirs=[NOISE_DIR])

n_classes = len(labels)
n_train, n_valid = len(y_train), len(y_valid)

INFO:tensorflow:Output file already exists. Skipping.
INFO:tensorflow:Output file already exists. Skipping.


In [15]:
fw.plt.plot_counts_pie(y_train, labels, w=450)

In [16]:
def build_nn(c=16, kernel_size=(2,5), c_dense=256, drop_rate=0.5):
  model = fw.Sequential()
  model.add(fw.layers.ConvBlk(c, convs=2, kernel_size=kernel_size))
  model.add(fw.layers.ConvBlk(c*2, convs=2, kernel_size=kernel_size))
  model.add(fw.layers.ConvBlk(c*4, convs=2, kernel_size=kernel_size))
  model.add(fw.layers.ConvBlk(c*8, convs=2, kernel_size=kernel_size))
  model.add(fw.layers.GlobalPools2D())
  model.add(fw.layers.DenseBN(c_dense, drop_rate=drop_rate))
  model.add(fw.layers.DenseBN(c_dense, drop_rate=drop_rate))
  model.add(fw.layers.Classifier(n_classes))
  return model

In [17]:
steps_per_epoch = n_train // BATCH_SIZE
total_steps = steps_per_epoch * EPOCHS
warmup_steps = int(total_steps * WARMUP)

In [18]:
cosine_decay = tf.train.cosine_decay_restarts
lr_func = fw.train.one_cycle_lr(LEARNING_RATE, total_steps, warmup_steps, cosine_decay)
fw.plt.plot_lr_func(lr_func, total_steps)

Instructions for updating:
Colocations handled automatically by placer.


In [19]:
opt_func = fw.train.adam_optimizer(lr_func)
model_func = fw.train.get_clf_model_func(build_nn, opt_func)

In [20]:
parser = lambda x: fw.data.tfexample_numpy_image_parser(x, 40, 101, 1)

train_input_func = lambda params: fw.data.tfrecord_ds(train_fn, parser, params['batch_size'], training=True)
valid_input_func = lambda params: fw.data.tfrecord_ds(valid_fn, parser, params['batch_size'], training=False)

In [22]:
fw.anim.show_input_func(valid_input_func, h_inch=1, w_inch=2.5, converter=functools.partial(fw.image.gray2rgb, normalize=True))

In [23]:
est = fw.train.get_tpu_estimator(steps_per_epoch, model_func, work_dir, trn_bs=BATCH_SIZE, val_bs=n_valid)
est.train(train_input_func, steps=total_steps)

INFO:tensorflow:Using config: {'_model_dir': 'gs://gs_colab/work/tutorial6/2019-05-24-11:14:18', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.114.6.74:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc9b9d4ca20>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://10.114.6.74:8470', '_evaluation_master': 'grpc://10.114.6.74:8470', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=418, num_shards=None, 

<tensorflow.contrib.tpu.python.tpu.tpu_estimator.TPUEstimator at 0x7fc9b9c1a1d0>

In [24]:
result = est.evaluate(valid_input_func, steps=1)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-05-24T11:16:15Z
INFO:tensorflow:TPU job name worker
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from gs://gs_colab/work/tutorial6/2019-05-24-11:14:18/model.ckpt-10032
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Init TPU system
INFO:tensorflow:Initialized TPU in 8 seconds
INFO:tensorflow:Starting infeed thread controller.
INFO:tensorflow:Starting outfeed thread controller.
INFO:tensorflow:Initialized dataset iterators in 0 seconds
INFO:tensorflow:Enqueue next (1) batch(es) of data to infeed.
INFO:tensorflow:Dequeue next (1) batch(es) of data from outfeed.
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Stop infeed thread controller
INFO:tensorflow:Shutting down InfeedController thread.
INFO:tensorflow:

In [25]:
print(f'Test results: accuracy={result["accuracy"] * 100: .2f}%, loss={result["loss"]: .2f}.')

Test results: accuracy= 97.28%, loss= 0.09.


In [26]:
fw.io.create_clean_dir(work_dir)