# Embed Audio

This notebook provides a single-machine workflow for embedding raw audio files.
This notebookis ideal for a single machine with a GPU for accelarated embedding.

For parallelized workflow, try `inference/embed.py`, which uses a Beam pipeline.


In [None]:
#@title Imports. { vertical-output: true }

# Global imports
import collections
import os
import numpy as np
import tensorflow as tf
from etils import epath
import matplotlib.pyplot as plt
import tqdm

use_tf_gpu = True #@param
if not use_tf_gpu:
  tf.config.experimental.set_visible_devices([], "GPU")

from chirp import config_utils
from chirp.configs import config_globals
from chirp.inference import embed_lib
from chirp.inference import tf_examples

In [None]:
#@title Configuration. { vertical-output: true }

# Name of base configuration file in `chirp/inference/configs`
config_key = 'raw_soundscapes' #@param
config = embed_lib.get_config(config_key)
config = config_utils.parse_config(config, config_globals.get_globals())

# Here we adjust the input and output targets.
config.output_dir = '/tmp/testrun' #@param
config.source_file_patterns = [''] #@param

# Location of Perch model
model_path = '' #@param
config.embed_fn_config.model_config.model_path = model_path

# Embedding windows are broken up into groups, typically one minute in length.
# This lets us limit input size to the model, track progres and
# recover from failures more easily.
config.shard_len_s = 60 #@param
config.num_shards_per_file = 10 #@param

# Number of parent directories to include in the filename.
config.embed_fn_config.file_id_depth = 1

# Number of TF Record files to create.
tf_record_shards = 10 #@param

In [None]:
#@title Set up. { vertical-output: true }

# Create output directory and write the configuration.
output_dir = epath.Path(config.output_dir)
output_dir.mkdir(exist_ok=True, parents=True)
embed_lib.maybe_write_config(config, output_dir)

# Create SourceInfos.
source_infos = embed_lib.create_source_infos(
    config.source_file_patterns,
    config.num_shards_per_file,
    config.shard_len_s)
print(f'Found {len(source_infos)} source infos.')

# Set up the embedding function, including loading models.
embed_fn = embed_lib.EmbedFn(**config.embed_fn_config)
print('\n\nLoading model(s)...')
%time embed_fn.setup()

print('\n\nTest-run of model...')
# We run the test twice - the first run optimizes the execution, and
# subsequent runs will be full-speed.
window_size_s = config.embed_fn_config.model_config.window_size_s
sr = config.embed_fn_config.model_config.sample_rate
z = np.zeros([int(sr * window_size_s)])
print('    Cold-start timing:')
%time unused = embed_fn.embedding_model.embed(z)
print('    Typical run timing:')
%time unused = embed_fn.embedding_model.embed(z)


In [None]:
#@title Run embedding. { vertical-output: true }

embed_fn.min_audio_s = 1.0
record_file = (output_dir / 'embeddings.tfrecord').as_posix()
succ, fail = 0, 0
with EmbeddingsTFRecordMultiWriter(
    output_dir=output_dir, num_files=tf_record_shards) as file_writer:
  for source_info in tqdm.tqdm(source_infos):
    examples = embed_fn.process(source_info=source_info)
    if examples is None:
      fail += 1
      continue
    for example in examples:
      file_writer.write(example.SerializeToString())
    succ += 1
  file_writer.flush()
print(f'\n\nSuccessfully processed {succ} source_infos, failed {fail} times.')

fns = [fn for fn in output_dir.glob('embeddings-*')]
ds = tf.data.TFRecordDataset(fns)
parser = tf_examples.get_example_parser()
ds = ds.map(parser)
for ex in ds.as_numpy_iterator():
  print(ex['filename'])
  print(ex['embedding'].shape)
  break