In [None]:
import import_ipynb
import cnn_genre_classifier_spectrograms as mgr
import IPython.display as ipd
import librosa
import numpy as np
import torch
from captum.attr import IntegratedGradients

In [None]:
dataset = torch.load("./data/feature_sets/spectro10_normalized.pth")
X_train = dataset['X_train']
X_validation = dataset['X_validation']
X_test = dataset['X_test']
y_train = dataset['y_train']
y_validation = dataset['y_validation']
y_test = dataset['y_test']
scale_min = dataset['scale_min']
scale_max = dataset['scale_max']

In [None]:
model = mgr.load_model("./data/saved_models/model-spectro10-epoch15-batch64_512-norm.pth")
model_predictions = model.get_predictions(X_test)
matrix = mgr.create_confusion_matrix(model_predictions, y_test)
mgr.plot_confusion_matrix(matrix)

matrix = matrix.numpy()
true_pos = np.diag(matrix)
false_pos = np.sum(matrix, axis=0) - true_pos
false_neg = np.sum(matrix, axis=1) - true_pos

precision = true_pos / (true_pos + false_pos)
recall = true_pos / (true_pos + false_neg)

In [None]:
for index, (p, q) in enumerate(zip(model_predictions.argmax(dim=1).type(torch.LongTensor), y_test.type(torch.LongTensor))):
    if (p == 9 and q == 1):
        print(index)

In [None]:
X_db = mgr.unscale_input(X_test[1787][0], scale_min, scale_max).numpy()
X_amplitude = librosa.db_to_amplitude(X_db)
reconstruction = librosa.griffinlim(X_amplitude, hop_length=512, win_length=1024)

model = mgr.load_model("./data/saved_models/model-spectro10-epoch15-batch64_512-norm.pth").cpu().eval()
output = model(X_test[1787].unsqueeze(0))
prediction = torch.argmax(output)
print(output, prediction)

ipd.Audio(reconstruction, rate=22050)

In [None]:
model = mgr.load_model("./data/saved_models/model-spectro10-epoch15-batch64_512-norm.pth").cpu().eval()
gbp = mgr.GuidedBackprop(model)
guided_grads = gbp.generate_gradients(X_test[1787].unsqueeze(0), 5)
pos_sal_gbp, _ = mgr.get_positive_negative_saliency(guided_grads)
X_masked_gbp = X_amplitude * pos_sal_gbp
reconstruction_gbp = librosa.griffinlim(X_masked_gbp, hop_length=512, win_length=1024)
X_masked_gbp = librosa.amplitude_to_db(X_masked_gbp)

In [None]:
mgr.plot_spectrogram(pos_sal_gbp, 22050, 512)

In [None]:
mgr.plot_spectrogram(X_db, 22050, 512)

In [None]:
mgr.plot_spectrogram(X_masked_gbp, 22050, 512, title="GBP masked spectrum")

In [None]:
ipd.Audio(reconstruction_gbp, rate=22050)
# sf.write("./sample_masked_gbp.wav", reconstruction_gbp, 22050)

In [None]:
model = mgr.load_model("./data/saved_models/model-spectro10-epoch15-batch64_512-norm.pth").cpu().eval()
baseline = torch.zeros(1, 1, 513, 130)
ig = IntegratedGradients(model)
integrated_grads = ig.attribute(X_test[1787].unsqueeze(0), baseline, 9).squeeze().numpy()
pos_sal_ig, _ = mgr.get_positive_negative_saliency(integrated_grads)
X_masked_ig = X_amplitude * pos_sal_ig
reconstruction_ig = librosa.griffinlim(X_masked_ig, hop_length=512, win_length=1024)
X_masked_ig = librosa.amplitude_to_db(X_masked_ig)

In [None]:
mgr.plot_spectrogram(pos_sal_ig, 22050, 512)

In [None]:
mgr.plot_spectrogram(X_db, 22050, 512)

In [None]:
mgr.plot_spectrogram(X_masked_ig, 22050, 512, title="IG masked spectrum")

In [None]:
ipd.Audio(reconstruction_ig, rate=22050)
# sf.write("./sample_masked_ig.wav", reconstruction_ig, 22050)