In [None]:
import tensorflow as tf
import numpy as np
import scipy
from scipy import misc
import glob
from PIL import Image
import os
import matplotlib.pyplot as plt
import librosa
from keras import layers
from keras.layers import (Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, 
                          Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D)
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
import pydot
from IPython.display import SVG, Audio
from keras.layers import Dropout, GlobalAveragePooling2D
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from keras.optimizers import Adam, Adadelta
from keras.initializers import glorot_uniform
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from pydub import AudioSegment
import shutil
import keras.backend as K
from keras.preprocessing.image import ImageDataGenerator
import random
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, plot_confusion_matrix
from HelperFunctions import *

gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [None]:
# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [None]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()

directory = "content/spectrograms3sec/train/"
for g in genres:
  if len(os.listdir(os.path.join('content/spectrograms3sec/test/',f"{g}"))) == 0:
    filenames = os.listdir(os.path.join(directory,f"{g}"))
    random.shuffle(filenames)
    test_files = filenames[0:200]

    for f in test_files:

      shutil.move(directory + f"{g}"+ "/" + f,"content/spectrograms3sec/test/" + f"{g}")


In [None]:
train_dir = "content/spectrograms3sec/train/"
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(train_dir,target_size=(128,128),color_mode="rgba",class_mode='categorical',batch_size=64)

validation_dir = "content/spectrograms3sec/test/"
vali_datagen = ImageDataGenerator(rescale=1./255)
vali_generator = vali_datagen.flow_from_directory(validation_dir,target_size=(128,128),color_mode='rgba',class_mode='categorical',batch_size=64)

In [None]:
def GenreModel(input_shape = (128,128, 4),classes=10):
  X_input = Input(input_shape)

  X = Conv2D(32, kernel_size=(3,3))(X_input)
  X = MaxPooling2D((2,2))(X)
  X = Dropout(rate=0.3)(X)

  X = Conv2D(64, kernel_size=(3,3))(X_input)
  X = MaxPooling2D((2,2))(X)
  X = Dropout(rate=0.3)(X)

  X = Conv2D(128, kernel_size=(3,3))(X_input)
  X = MaxPooling2D((2,2))(X)
  X = Dropout(rate=0.3)(X)

  X = Flatten()(X)

  X = Dense(classes)(X)

  X = Dropout(rate=0.3)(X)

  X = Dense(classes, activation='softmax')(X)


  model = Model(inputs=X_input,outputs=X,name='GenreModel')

  return model

In [None]:
# checkpoint_path = "saved_cnn3/cp.ckpt"
# checkpoint_dir = os.path.dirname(checkpoint_path)

# # Create a callback that saves the model's weights
# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
#                                                 save_weights_only=False,
#                                                 verbose=1)

model = GenreModel(input_shape=(128,128, 4), classes=10)
opt = Adadelta()
model.compile(optimizer = opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [8]:
fresh_model = True

if os.path.exists("saved/saved_cnn5_3"):
    model_history = keras.models.load_model("saved/saved_cnn5_3")
else:
    model_history = model.fit(train_generator,epochs=80,validation_data=vali_generator)
    model.save("saved/saved_cnn5_3")
    fresh_model = True




In [None]:
if fresh_model:
    metrics = model_history.history
    plt.figure(figsize=(16,6))
    plt.subplot(1,2,1)
    plt.plot(model_history.epoch, metrics['loss'], metrics['val_loss'])
    plt.legend(['loss', 'val_loss'])
    plt.ylim([0, max(plt.ylim())])
    plt.xlabel('Epoch')
    plt.ylabel('Loss [CrossEntropy]')

    plt.subplot(1,2,2)
    plt.plot(model_history.epoch, 100*np.array(metrics['accuracy']), 100*np.array(metrics['val_accuracy']))
    plt.legend(['accuracy', 'val_accuracy'])
    plt.ylim([0, 100])
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy [%]')

In [None]:
test_loss, test_acc = model.evaluate(vali_generator, batch_size=128)
print("The test Loss is :", test_loss)
print("\nThe Best test Accuracy is :", test_acc*100)

In [None]:
class estimator:
  _estimator_type = ''
  classes_=[]
  def __init__(self, model, classes):
    self.model = model
    self._estimator_type = 'classifier'
    self.classes_ = classes
  def predict(self, X):
    y_prob= self.model.predict(X)
    y_pred = y_prob.argmax(axis=1)
    return y_pred

classifier = estimator(model, genres)
figsize = (12,12)

x, y = zip(*(vali_generator[i] for i in range(len(vali_generator))))
x_val, y_val = np.vstack(x), np.vstack(y)
y_val = np.argmax(y_val, axis=1)

In [None]:
ConfusionMatrixDisplay.from_estimator(classifier, x_val, y_val, cmap='Blues', display_labels=genres ,normalize='true', ax=plt.subplots(figsize=figsize)[1])

In [None]:
import pathlib
input_dir = pathlib.Path("input/")
audio_file_name = "chopin.wav"
sample_dir = "input/samples/"
audio_seg_dir = "input/audio_segments/"
spec_dir = "input/mfccs_segments/"

if not input_dir.exists():
    os.mkdir("input")
    os.mkdir(sample_dir)
    os.mkdir(audio_seg_dir)

sample, sample_sr = librosa.load(os.path.join(sample_dir, audio_file_name))
sample_duration = int(librosa.get_duration(y=sample, sr=sample_sr))
Audio(sample, rate=sample_sr)

In [None]:
chopAudio(os.path.join(sample_dir, audio_file_name), audio_seg_dir)

In [None]:
audiosToGraph(audio_seg_dir, spec_dir,type="mfcc")

In [None]:
predictions = []

for af in os.listdir(spec_dir):
    image_data = load_img(os.path.join(spec_dir, af[:-3] + 'png'),color_mode='rgba',target_size=(26,65))
    image = img_to_array(image_data)
    image = np.reshape(image,(1,26,65,4))

    p = model.predict(image/255)
    p = p.reshape((10,))

    predictions.append(p)

In [None]:
avg_preds = [x / len(predictions) for x in np.array(predictions).sum(axis=0)]
predicted_label = np.argmax(avg_preds)

print("The Predicted Label was: " + genres[predicted_label])

In [None]:
plt.figure(figsize=(30,10))
plt.bar(genres, np.array(avg_preds))
plt.title("Inference Results")
plt.show()