In [1]:
import boto3
from pathlib import Path
import subprocess as sp

In [2]:
DATA_DIR = '/media/Extra_Drive/fushigi'
BUCKET = 'fushigi-audio'
USERS = ['dane']
SEGMENT_LEN = 5  # seconds
FFMPEG_PATH = '/usr/bin/ffmpeg'
SAMPLE_RATE_WATCH = 8000
SAMPLE_RATE_OUT = 8000

NNDATA_DIR = '/media/Extra_Drive/fushigi_nn_data'

In [3]:
boto3.setup_default_session(profile_name='hlnetworks')
s3_client = boto3.client('s3')
s3_resource = boto3.resource('s3')

for each user, create their directory, create subdir for each pcm file, use ffmpeg to generate wavs for each segment

In [4]:
# get users file names.
user2keys = {}
bucket = s3_resource.Bucket(BUCKET)
for o in bucket.objects.all():
    if not o.key.endswith('.pcm'):
        continue
    user = o.key.split('/')[0]
    if user not in USERS:
        continue
    if user not in user2keys:
        user2keys[user] = []
    user2keys[user].append(o.key)

In [25]:
# ffmpeg -ar 8000 -f s16le -i url -c copy -f segment -segment_time 30 -ar 8000 -sample_fmt s16 output_%03d.wav
def split_s3_audio(url, recording_path):
    audio_filepath = str(recording_path/'out%05d.wav')
    audio_dl_args = [
        FFMPEG_PATH,
        '-ar', str(SAMPLE_RATE_WATCH),
        '-f', 's16le',
        '-i', url,
        '-c', 'copy',
        '-f', 'segment',
        '-segment_time', str(SEGMENT_LEN),
        '-ar', str(SAMPLE_RATE_OUT),
        '-sample_fmt', 's16',
        audio_filepath
    ]

    proc = sp.Popen(audio_dl_args, stdout=sp.PIPE, stderr=sp.PIPE)
    stdout, stderr = proc.communicate()
    if proc.returncode != 0:
        print(stderr)
    else:
        print("Downloaded audio to " + audio_filepath)

In [26]:
# create dir for user and fnames
def get_fname(key):
    return key.split('/')[-1]

def get_signed_url(s3, bucket):
    return s3.generate_presigned_url('get_object', 
                                     {'Bucket': bucket, 'Key': key})

datapath = Path(DATA_DIR)
datapath.mkdir(exist_ok=True)
for user in USERS:
    (datapath/user).mkdir(exist_ok=True)
    for key in user2keys[user]:
        fname = get_fname(key)
        audio_path = datapath/user/fname
        (datapath/user/fname).mkdir(exist_ok=True)
        url = get_signed_url(s3_client, BUCKET)
        split_s3_audio(url, audio_path)

Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_07_32_50_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_07_38_18_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_07_43_46_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_07_49_14_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_07_54_42_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_08_00_10_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_08_05_38_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_08_11_06_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_08_16_34_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/fushigi/dane/recording_08_22_02_20190211.pcm/out%05d.wav
Downloaded audio to /media/Extra_Drive/f

In [29]:
import sys
import os
import soundfile
import librosa

import tensorflow as tf
import vggish_input
import vggish_params
import vggish_postprocess
import vggish_slim
slim = tf.contrib.slim

import numpy as np
import h5py

from collections import Counter

SyntaxError: invalid syntax (<ipython-input-29-6d3ffb03e853>, line 8)

In [None]:
def read_audio(path):
    (audio, fs) = soundfile.read(path)

    #if audio.ndim > 1:
    #    audio = np.mean(audio, axis=1)
        
    #if target_fs is not None and fs != target_fs:
    #    audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
    #    fs = target_fs
        
    return audio, fs

In [None]:
"""Extract log mel spectrogram features. 
"""

# Arguments & parameters
#mel_bins = vggish_params.NUM_BANDS
#sample_rate = vggish_params.SAMPLE_RATE
#input_len = vggish_params.NUM_FRAMES
#embedding_size = vggish_params.EMBEDDING_SIZE

'''You may modify the EXAMPLE_HOP_SECONDS in vggish_params.py to change the 
hop size. '''

# Paths
checkpoint_path = os.path.join('../vggish_model.ckpt')
pcm_params_path = os.path.join('../vggish_pca_params.npz')

if not os.path.isfile(checkpoint_path):
    raise Exception('Please download vggish_model.ckpt from '
        'https://storage.googleapis.com/audioset/vggish_model.ckpt '
        'and put it in the root of this codebase. ')

if not os.path.isfile(pcm_params_path):
    raise Exception('Please download pcm_params_path from '
    'https://storage.googleapis.com/audioset/vggish_pca_params.npz '
    'and put it in the root of this codebase. ')

# Load model
sess = tf.Session()

vggish_slim.define_vggish_slim(training=False)
vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)

pproc = vggish_postprocess.Postprocessor(pcm_params_path)

In [None]:
# Read audio and map to embeddings
outputs = []
clip_ids = []
errors = []

nndata_path = Path(NNDATA_DIR)
nndata_path.mkdir(exists_ok=True)

for user in USERS:
    for key in user2keys[user]:
        pcm_fname = get_fname(key)
        audio_dir = datapath/user/pcm_fname  # (<-- a directory of wav files)
        for wav_fname in os.listdir(audio_dir):
            try:
                audio_path = os.path.join(audio_dir, wav_fname)
                audio, sample_rate = read_audio(audio_path)

                # Extract log mel feature
                logmel = vggish_input.waveform_to_examples(audio, sample_rate)

                # Extract embedding feature
                [embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: logmel})

                # PCA
                postprocessed_batch = pproc.postprocess(embedding_batch)

                clip_id = "%s_%s_%s" % (user, pcm_fname, wav_fname)
                clip_ids.append(clip_id)
                outputs.append(postprocessed_batch)
            except:
                print('error processing: ', str(audio_dir/wav_fname))
                errors.append(str(audio_dir/wav_fname))

    print('Audio length: {}'.format(len(audio)))
    print('Log mel shape: {}'.format(logmel.shape))
    print('Embedding feature shape: {}'.format(postprocessed_batch.shape))