In [None]:
import import_ipynb
import cnn_genre_classifier_spectrograms as mgr
import IPython.display as ipd
import librosa
import numpy as np
import torch
from pytorch_model_summary import summary

In [None]:
# path to file that stores spectrograms and genre labels for each processed segment
DATA_PATH = "/data/shared/GTZAN-DATASET/spectrograms_10segments.npy"

# load processed dataset
    # X_train, X_validation, X_test, y_train, y_validation, y_test = mgr.load_data(DATA_PATH, test_size=0.10, validation_size=0.10)
dataset = torch.load("./data/shuffled_sets_1.pth")
X_train = dataset['X_train']
X_validation = dataset['X_validation']
X_test = dataset['X_test']
y_train = dataset['y_train']
y_validation = dataset['y_validation']
y_test = dataset['y_test']

In [None]:
# load existing or create new model

# model = mgr.new_model()
model = mgr.load_model("./saved_models/model-spectro10-epoch15-batch512.pth")

print(summary(model, torch.rand(1, 1, np.shape(X_train)[2], np.shape(X_train)[3]).cuda()), sep='\n')

In [None]:
# training loop

history = model.fit(
    X_train, y_train,
    validation_data=(X_validation, y_validation),
    epochs=15,
    batch_size=512,
    log=True)

In [None]:
# plot results

history = torch.load("./saved_models/model-spectro10-epoch15-batch512_history.pth")
mgr.plot_history(history)

print("Max_train_acc:", max(history['acc']), "  Min_train_loss:", min(history['loss']))
print("Max_val_acc:", max(history['val_acc']), "  Min_val_loss:", min(history['val_loss']))
    
t_acc, t_loss = model.test(X_test, y_test, out=True)

In [None]:
# rename model

model.model_name = 'model-spectro10-epoch15-batch512'

In [None]:
# save model

mgr.save_model(model, "./saved_models")

In [None]:
# save training history

torch.save(history, "./saved_models/"  + model.model_name + "_history.pth")

In [None]:
# create and plot confusion matrix

model_predictions = model.get_predictions(X_train)
matrix = mgr.create_confusion_matrix(model_predictions, y_train)
mgr.plot_confusion_matrix(matrix)

In [None]:
model = mgr.load_model("./saved_models/model-spectro10-epoch15-batch512.pth").cpu().eval()

In [None]:
# load and process specific musical sample, print spectrogram

file_path = "/data/shared/GTZAN-DATASET/genres_dataset/rock/rock.00090.wav"

signal, sample_rate = librosa.load(file_path, sr=22050)

S_signal = librosa.stft(signal[0:66150], n_fft=1024, hop_length=512)

Y_signal = np.abs(S_signal) ** 2

Y_log_signal = librosa.power_to_db(Y_signal)

mgr.plot_spectrogram(Y_log_signal, sample_rate, 512)

ipd.Audio(file_path)

In [None]:
# create variables for GBP out of loaded sample

sample_signal = torch.ones(1, 1, 513, 130)
sample_signal[0][0] = torch.tensor(Y_log_signal)
output = model(sample_signal)
sample_prediction = torch.argmax(output)
print(output, sample_prediction)
    # mgr.plot_spectrogram(sample_signal[0][0].numpy(), 22050, 512)

In [None]:
# just a code to find most neutral input prediction

minimum = torch.tensor(1)
min_position = 0

for i in range(100):
    sample_signal = torch.ones(1, 1, 513, 130) * i
    output = model(sample_signal)
    prediction = torch.argmax(output)
    value = output[0][prediction]
    if value < minimum:
        minimum = value
        min_position = i
        
print(minimum, min_position)

In [None]:
# generate guided gradients and pos/neg saliency

gbp = mgr.GuidedBackprop(model)
guided_grads = gbp.generate_gradients(sample_signal, sample_prediction)
pos_sal, neg_sal = mgr.get_positive_negative_saliency(guided_grads)

In [None]:
mgr.plot_spectrogram(guided_grads[0], 22050, 512)

In [None]:
from PIL import Image

def save_gradient_images(gradient, file_name):
    if not os.path.exists('./results'):
        os.makedirs('./results')
    # normalize between 0-1
    gradient = gradient - gradient.min()
    gradient /= gradient.max()
    # save image
    path_to_file = os.path.join('./results', file_name + '.jpg')
    save_image(gradient, path_to_file)

    
def save_image(im, path):
    if isinstance(im, (np.ndarray, np.generic)):
        im = format_np_output(im)
        im = Image.fromarray(im)
    im.save(path)
    

def format_np_output(np_arr):
    # repeat first channel and convert 1xWxH to 3xWxH
    if np_arr.shape[0] == 1:
        np_arr = np.repeat(np_arr, 3, axis=0)
    # convert to WxHx3 in order to make it saveable by PIL
    if np_arr.shape[0] == 3:
        np_arr = np_arr.transpose(1, 2, 0)
    # multiply with 255 and change type to make it saveable by PIL
    if np.max(np_arr) <= 1:
        np_arr = (np_arr*255).astype(np.uint8)
    return np_arr

In [None]:
save_gradient_images(guided_grads, 'test1' + '_guided_bp')
save_gradient_images(pos_sal, 'test2' + '_pos_sal')
save_gradient_images(neg_sal, 'test3' + '_neg_sal')

In [None]:
from captum.attr import IntegratedGradients

In [None]:
model = mgr.load_model("./saved_models/model-spectro10-epoch30-batch128.pth").cpu().eval()

In [None]:
baseline = torch.zeros(1, 1, 513, 130)

In [None]:
model(baseline)

In [None]:
ig = IntegratedGradients(model)

In [None]:
attributions, delta = ig.attribute(sample_signal, baseline, target=sample_prediction, return_convergence_delta=True)
print('IG Attributions:', attributions)
print('Convergence Delta:', delta)

In [None]:
mgr.plot_spectrogram(attributions[0][0], 22050, 512)