# Set-Up

In [None]:
!pip install audiosegment
!pip install soundfile 
!pip install fastdtw
!pip install scikit-bio
!pip uninstall future
!pip install future==0.13.1

In [None]:
#Connecting Drive to save model checkpoints during training and to use custom data, uncomment if needed

import os
from google.colab import drive
drive.mount('/content/drive')
%cd drive/"My Drive"/"Audio-Style-Transfer"

In [None]:
import os, time
import librosa
import librosa.display
import math 
import skbio
import itertools

import soundfile as sf
import tensorflow as tf
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt 
import pandas as pd

from scipy.spatial.distance import euclidean, cosine
from fastdtw import fastdtw
from glob import glob
from nltk import ngrams
from shutil import copyfile

# Read audio

In [None]:
#Hyperparameters

hop=192     #hop size (window size = 6*hop)
sr=16000     #sampling rate
min_level_db=-100     #reference values to normalize data
ref_level_db=20

shape=64     #length of time axis of split specrograms to feed to generator            
vec_len=128     #length of vector generated by siamese vector
bs = 16     #batch size
delta = 2     #constant for siamese loss

In [None]:
def audio_array(path, sr=16000): 
    """
        path: "Notifications/notificationN/
    """
    x = []
    paths = sorted(glob(path))
    for path in paths:
        a, _ = librosa.load(path+'/A.wav', sr=sr)
        ab, _ = librosa.load(path+'/AB.wav', sr=sr)
        x += [np.stack([a, ab])]
    return x, paths  

In [None]:
samples, paths = audio_array(path="Evaluation/guitar-transfer/Notifications/*")

# Melody preservation evaluation 

## Pitch similarity score

Source : https://julian-urbano.info/files/publications/009-using-shape-music-compute-similarity-between-symbolic-musical-pieces.pdf

In [None]:
def compute_spans(pitches):
    """
        pitches : pitches[f, t] contains instantaneous frequency at bin f, time t, take value 0 at bins of non-maximal magnitude.
    """
    n = 4
    max_freqs = np.max(pitches, axis=0) 
    quad_grams = [max_freqs[t:t+n] for t in range(max_freqs.shape[0]-n)]
    f_spans = [np.array([b-a, c-a, d-a]) for i in range(len(quad_grams)) for a, b, c, d in zip(quad_grams[i], quad_grams[i][1:], quad_grams[i][2:], quad_grams[i][3:])]
    return np.array(f_spans)

def local_matching(source_spans, target_spans):
    scores = []
    alphabet = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
    for src, tgt in zip(source_spans, target_spans):
        # extract unique values from spans 
        src_values = np.sort(np.unique(src)) 
        tgt_values = np.sort(np.unique(tgt))
        values = sorted(np.unique(np.concatenate([src_values, tgt_values])))
        values = values+[0]*(len(alphabet)-len(values))
        # mapping between protein names (alphabet) and values (0 padded)
        alphabet_value_map = {a:v for a, v in zip(alphabet, values)}
        value_alphabet_map = {v:a for v, a in zip(values, alphabet)}
        # encode sequences for alignment
        src = ''.join([value_alphabet_map[v] for v in src])
        tgt = ''.join([value_alphabet_map[v] for v in tgt])
        src = skbio.sequence.Protein(src)
        tgt = skbio.sequence.Protein(tgt)
        # normalize
        max = np.max([abs(v1 - v2) for v1 in values for v2 in values]) 
        # compute scoring matrix
        if max == 0: # in this case source and target spans are identical
            substitution_matrix = {a1:{a2:0 for a2,v2 in alphabet_value_map.items()} for a1,v1 in alphabet_value_map.items()} # v1 - v2
        else:
            substitution_matrix = {a1:{a2:int(100*(max - abs(v1 - v2))/max) for a2,v2 in alphabet_value_map.items()} for a1,v1 in alphabet_value_map.items()} # v1 - v2
        _, score, _ = skbio.alignment.local_pairwise_align_ssw(sequence1=src, sequence2=tgt, substitution_matrix=substitution_matrix, score_size=1, gap_open_penalty=255, gap_extend_penalty=255)
        scores += [score / (100 * len(src))]
    return np.mean(scores)

def pitch_similarity_score(source_wv, stylized_wv):
    # extract pitch information from source and stylized wav file
    # pitches[f, t] contains instantaneous frequency at bin f, time t, take value 0 at bins of non-maximal magnitude.
    source_pitches, _ = librosa.core.piptrack(y=source_wv, sr=sr, n_fft=6*hop, hop_length=hop, fmin=0, fmax=20000.0)   
    target_pitches, _ = librosa.core.piptrack(y=stylized_wv, sr=sr, n_fft=6*hop, hop_length=hop, fmin=0, fmax=20000.0)
    source_spans = compute_spans(source_pitches)
    target_spans = compute_spans(target_pitches)
    score = local_matching(source_spans, target_spans)
    return score

# Rythme preservation evaluation

## Cosine similarity of rythmic envelopes

From http://dafx.de/paper-archive/2018/papers/DAFx2018_paper_48.pdf

In [None]:
def half_wave_rectifier(x):
    return 0.5 * (x + abs(x)) 

def compute_spectral_flux(spec): # (hop, shape) : (freq, time)
    R = []
    for t in range(1, spec.shape[1]): # range on time 
        R += [half_wave_rectifier(abs(next) - abs(prev)) for prev, next in zip(spec[:,t-1], spec[:,t])]
    return np.array(R)

def to_spec(wav):
    d = np.abs(librosa.stft(wav, n_fft=6*hop, hop_length=hop))
    db = librosa.amplitude_to_db(d, ref=np.max)
    return db

def cosine_similarity_spectral_flux_score(source_spec, stylized_spec):
    source_spec = to_spec(source_spec)
    stylized_spec = to_spec(stylized_spec)
    R_source = compute_spectral_flux(source_spec)
    R_stylized = compute_spectral_flux(stylized_spec)
    return cosine(R_source, R_stylized)

# Run evaluation 

In [None]:
t0 = time.time()

x = []
for sample, path in zip(samples, paths):
    print(path)
    a, ab = sample[0], sample[1]
    pitch_sim_score = pitch_similarity_score(a, ab)
    cosine_spectral_flux_score = cosine_similarity_spectral_flux_score(a, ab)
    x += [[path, pitch_cross_correlation_score, pitch_sim_score, cosine_spectral_flux_score]]

columns = ["name", "pitch_similarity_score", "cosine_spectral_flux_score"]
pd.DataFrame.from_records(x, columns=columns).to_csv("dataset/scores/notifications_transfer_objective_evaluations.csv")
