In [1]:
import pandas as pd
import numpy as np
import six
import soundfile
import tensorflow.compat.v1 as tf
import os
from tqdm.notebook import tqdm


from models.vggish import vggish_input, vggish_params, vggish_slim

In [2]:
VGGISH_EMBEDDING_DIR = "training_set/Features/VGGish"
AUDIO_DIR = "training_set/Audio"

In [3]:
if not os.path.exists(VGGISH_EMBEDDING_DIR):
    os.mkdir(VGGISH_EMBEDDING_DIR)

In [4]:
data = pd.read_csv("training_set/scores_v2.csv").set_index("video_id")

In [5]:
wav_filenames = [f"{AUDIO_DIR}/{video_id}.wav" for video_id in data.index]
embedding_filenames = [f"{VGGISH_EMBEDDING_DIR}/{video_id}.csv" for video_id in data.index]

In [6]:
lengths = []
with tf.Graph().as_default(), tf.Session() as sess:
    vggish_slim.define_vggish_slim(training = False)
    vggish_slim.load_vggish_slim_checkpoint(sess, "models/vggish/vggish_model.ckpt")
    
    features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
    embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)

    for wav_filename, embedding_filename in tqdm(list(zip(wav_filenames, embedding_filenames))):
        if os.path.exists(wav_filename) and not os.path.exists(embedding_filename):
            examples_batch = vggish_input.wavfile_to_examples(wav_filename)
            [embedding_batch] = sess.run([embedding_tensor],
                                        feed_dict={features_tensor: examples_batch})
            lengths.append(embedding_batch.shape[0])
            np.savetxt(embedding_filename, embedding_batch)

Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.Flatten instead.
INFO:tensorflow:Restoring parameters from models/vggish/vggish_model.ckpt


HBox(children=(FloatProgress(value=0.0, max=590.0), HTML(value='')))




In [7]:
if len(lengths):
    print("Max number of embedding for a video:", np.max(lengths))
    print("Min number of embedding for a video:", np.min(lengths))
    print("Avg number of embedding for a video:", np.avg(lengths))