In [1]:
import numpy as np
import os
import json
import tensorflow as tf
import matplotlib.pyplot as plt
import keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from VGGish.vggish import VGGish
from VGGish import vggish_params
from VGGish.vggish_input import wavfile_to_examples
from DataGenerator import ClassificationDataGenerator, FingerprintingDataGenerator
from ModelLogger import ModelLogger
import time
np.random.seed(42)
tf.version.VERSION

Using TensorFlow backend.


'2.1.0'

In [29]:
def get_spectrograms(file_path, hop_length=1):
    spectrograms = wavfile_to_examples(file_path, hop_length=hop_length)
    spectrograms = np.reshape(spectrograms,spectrograms.shape + tuple([1]))
    return spectrograms

In [30]:
def get_fingerprint(file_path, encoder, chorus_model, hop_length=1):
    spectrograms = get_spectrograms(file_path, hop_length)
    chorus_mask = is_chorus(spectrograms, chorus_model)
    fps = encoder.predict(spectrograms[~chorus_mask])
    fp = np.average(fps, axis=0)
    del spectrograms, chorus_mask, fps
    return fp

In [2]:
def get_similarity(encoder, similarity_model, spectrograms, fingerprint):
    # Repeat fingerprint vector n times where n is the number of input spectrograms
    fps = np.array([fingerprint,]*spectrograms.shape[0])
    encoded_spec = encoder.predict(spectrograms)
    similarity = similarity_model.predict([encoded_spec,fps])
    return similarity

In [None]:
def is_chorus(spectrograms, chorus_model, CHORUS_CONFIDENCE_THRESHOLD = 0.40):
    chorus_pred = chorus_model.predict(spectrograms)
    chorus_mask = np.array([True if p>CHORUS_CONFIDENCE_THRESHOLD else False for p in chorus_pred])
    return chorus_mask

In [1]:
def custom_similarity(similarities, SIMILARITY_CONFIDENCE_THRESHHOLD = 0.4):
    """Given a list of similarity scores, returns the proportion of times the value is greater than SIMILARITY_CONFIDENCE_THRESHHOLD"""
    return sum([1 if s>SIMILARITY_CONFIDENCE_THRESHOLD else 0 for s in similarities])/len(similarities)

In [26]:
MODELS_FOLDER = os.path.join('.','TrainedModels')
CHORUS_MODEL_NAME = 'binary_VGGish_Chorus_v0.1.h5'
ENCODER_MODEL_NAME = 'ENCODER_fingerprinting_VGGish_16_v0.1_flatten_6.h5'
SIMILARITY_MODEL_NAME = 'SIMILARITY_fingerprinting_VGGish_16_v0.1_flatten_6.h5'
CHORUS_CONFIDENCE_THRESHOLD = 0.40
SIMILARITY_CONFIDENCE_THRESHOLD = 0.70

chorus_model = tf.keras.models.load_model(os.path.join(MODELS_FOLDER, CHORUS_MODEL_NAME))
encoder = tf.keras.models.load_model(os.path.join(MODELS_FOLDER, ENCODER_MODEL_NAME))
fp_similarity_model = tf.keras.models.load_model(os.path.join(MODELS_FOLDER, SIMILARITY_MODEL_NAME))



In [28]:
file_for_fp = os.path.join('.','Data','Kumar','Akhana Jyoti Jalao.wav')
fp = get_fingerprint(file_for_fp, encoder, chorus_model)

In [24]:
start = time.time()
input_song = os.path.join('.','Data','Emma','Vanamali Vasudeva.wav')
specs = get_spectrograms(input_song)
chorus_mask = is_chorus(specs, chorus_model, CHORUS_CONFIDENCE_THRESHOLD)
non_chorus_specs = specs[~chorus_mask]
similarities = get_similarity(encoder, fp_similarity_model, non_chorus_specs, fp)
end = time.time()
med = np.median(similarities)
custom_metric = custom_similarity(similarities, SIMILARITY_CONFIDENCE_THRESHHOLD)
avg = np.average(similarities)
print("Median -", med)
print("Average -", avg)
print("Custom -", custom_metric)
print("Time Taken -", end-start)

Median - 0.048038762
Average - 0.1246605
Custom - 0.02
Time Taken - 28.216921091079712
