# Evaluate the Audio (Outputs)

Mainly for Model Lyra and Pegasus.

In [None]:
import numpy as np
import librosa
import librosa.display
import scipy.signal
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt

## Evaluation Function

In [None]:
# load audio file
def load_audio(filename):
    y, sr = librosa.load(filename, sr=None)
    return y, sr

# calculate SNR
def calculate_snr(signal, noise):
    signal_power = np.mean(signal**2)
    noise_power = np.mean(noise**2)
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

# calculate resolution
def calculate_thd(signal, sr):
    harmonics = librosa.effects.harmonic(signal)
    residual = signal - harmonics
    thd = np.sqrt(np.sum(residual**2)) / np.sqrt(np.sum(signal**2))
    return thd

# calculate MFCC
def calculate_mfcc(signal, sr, n_mfcc=13):
    mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc)
    return mfccs

# Calculate the resolution of the audio signal (using simple energy distribution calculations)
def calculate_resolution(signal):
    resolution = np.sum(np.abs(np.diff(signal)))
    return resolution

# Calculate the diversity of generated audio (distance metrics based on MFCC)
def calculate_diversity(audio_signals, sr):
    mfccs_list = [calculate_mfcc(signal, sr) for signal in audio_signals]
    mfccs_flattened = [mfcc.flatten() for mfcc in mfccs_list]
    diversity = np.mean(pairwise_distances(mfccs_flattened))
    return diversity

## Load the Audios

In [None]:
model1_output1 = "models outputs/model1_1.wav"
model2_output1 = "models outputs/model2_1.wav"

model1_output2 = "models outputs/model1_2.wav"
model2_output2 = "models outputs/model2_2.wav"

model1_output3 = "models outputs/model1_3.wav"
model2_output3 = "models outputs/model2_3.wav"

model1_signal1, model1_sr1 = load_audio(model1_output1)
model2_signal1, model2_sr1 = load_audio(model2_output1)

model1_signal2, model1_sr2 = load_audio(model1_output2)
model2_signal2, model2_sr2 = load_audio(model2_output2)

model1_signal3, model1_sr3 = load_audio(model1_output3)
model2_signal3, model2_sr3 = load_audio(model2_output3)

### Evaluation for Audio1

In [None]:
# calculate THD
thd1 = calculate_thd(model1_signal1, model1_sr1)
print(f'THD (model 1): {thd1}')

# calculate THD
thd2 = calculate_thd(model2_signal1, model2_sr1)
print(f'THD (model 2): {thd2}')

# calculate resolution
resolution1 = calculate_resolution(model1_signal1)
resolution2 = calculate_resolution(model2_signal1)
print(f'Resolution (model 1): {resolution1}')
print(f'Resolution (model 2): {resolution2}')

THD (model 1): 0.11003703624010086
THD (model 2): 0.11261150985956192
Resolution (model 1): 17528.90234375
Resolution (model 2): 17222.611328125


### Evaluation for Audio2

In [None]:
# calculate THD
thd1 = calculate_thd(model1_signal2, model1_sr2)
print(f'THD (model 1): {thd1}')

# calculate THD
thd2 = calculate_thd(model2_signal2, model2_sr2)
print(f'THD (model 2): {thd2}')

# calculate resolution
resolution1 = calculate_resolution(model1_signal2)
resolution2 = calculate_resolution(model2_signal2)
print(f'Resolution (model 1): {resolution1}')
print(f'Resolution (model 2): {resolution2}')

THD (model 1): 0.11390731483697891
THD (model 2): 0.1219075471162796
Resolution (model 1): 18634.3828125
Resolution (model 2): 19353.29296875


### Evaluation for Audio3

In [None]:
# calculate THD
thd1 = calculate_thd(model1_signal3, model1_sr3)
print(f'THD (model 1): {thd1}')

# calculate THD
thd2 = calculate_thd(model2_signal3, model2_sr3)
print(f'THD (model 2): {thd2}')

# calculate resolution
resolution1 = calculate_resolution(model1_signal3)
resolution2 = calculate_resolution(model2_signal3)
print(f'Resolution (model 1): {resolution1}')
print(f'Resolution (model 2): {resolution2}')

THD (model 1): 0.10915417969226837
THD (model 2): 0.11625958234071732
Resolution (model 1): 18693.884765625
Resolution (model 2): 19340.333984375


## Diversity Evaluation

### Model Lyra

In [None]:
# calculate diversity
model1_audio_files = [model1_output1, model1_output2, model1_output3]

audio_signals = [load_audio(file)[0] for file in model1_audio_files]
sr = load_audio(model1_audio_files[0])[1]

diversity = calculate_diversity(audio_signals, sr)
print(f'Diversity: {diversity}')

Diversity: 363.7644348144531


### Model Pegasus

In [None]:
model1_audio_files = [model2_output1, model2_output2, model2_output3]

audio_signals = [load_audio(file)[0] for file in model1_audio_files]
sr = load_audio(model1_audio_files[0])[1]

diversity = calculate_diversity(audio_signals, sr)
print(f'Diversity: {diversity}')

Diversity: 621.8104858398438
