# import 

In [None]:
import os
import gc
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import TimeDistributed, Input, Conv1D, GlobalMaxPooling1D, MaxPooling1D, Bidirectional, BatchNormalization, Dropout, Add, LSTM, Dense
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping
!pip install tensorflow_addons
import tensorflow_addons as tfa
from sklearn.model_selection import KFold
from keras import backend as k
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import pathlib
import os
import urllib.request
from zipfile import ZipFile 
import warnings
import librosa
import sklearn
from sklearn.model_selection import train_test_split
from keras.models import load_model

plt.style.use('seaborn');


import itertools
plt.style.use('seaborn');

print(tf.__version__)
print(tf.test.gpu_device_name())

!nvidia-smi
warnings.filterwarnings('ignore')


# Data preparation

### Load mp3 files

In [3]:
!mkdir audiofiles
!mkdir labelled_data
!wget --no-check-certificate -r "http://cvml.unige.ch/databases/DEAM/DEAM_audio.zip" -O "audiofiles/Input.zip" 

will be placed in the single file you specified.

--2021-06-24 13:43:20--  http://cvml.unige.ch/databases/DEAM/DEAM_audio.zip
Resolving cvml.unige.ch (cvml.unige.ch)... 129.194.10.44
Connecting to cvml.unige.ch (cvml.unige.ch)|129.194.10.44|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cvml.unige.ch/databases/DEAM/DEAM_audio.zip [following]
--2021-06-24 13:43:20--  https://cvml.unige.ch/databases/DEAM/DEAM_audio.zip
Connecting to cvml.unige.ch (cvml.unige.ch)|129.194.10.44|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1343203527 (1.2G) [application/zip]
Saving to: ‘audiofiles/Input.zip’


2021-06-24 13:44:07 (27.5 MB/s) - ‘audiofiles/Input.zip’ saved [1343203527/1343203527]

FINISHED --2021-06-24 13:44:07--
Total wall clock time: 48s
Downloaded: 1 files, 1.2G in 47s (27.5 MB/s)


In [4]:
zipname = "audiofiles/Input.zip"
with ZipFile(zipname,'r') as zpfile:
  files = zpfile.namelist()
  for f in files:
    if (f.endswith('.mp3')):
      zpfile.extract(f, 'audiofiles')

## load annotations
Annotations per second. In the first line we can retrieve the sample number at which they were calculated in the format sample_xxxxxms

In [5]:
!wget --no-check-certificate -r "https://drive.google.com/uc?export=download&id=1JA5dpdmP8TEh4sOfW40GcyNlD_t0K2EM" -O "labelled_data/arousal.csv"
!wget --no-check-certificate -r "https://drive.google.com/uc?export=download&id=1_PVma3Eb4YleUHQgmk6Ekjs5k7Pr6uWx" -O "labelled_data/valence.csv"


will be placed in the single file you specified.

--2021-06-24 13:44:18--  https://drive.google.com/uc?export=download&id=1JA5dpdmP8TEh4sOfW40GcyNlD_t0K2EM
Resolving drive.google.com (drive.google.com)... 172.217.164.174, 2607:f8b0:4004:815::200e
Connecting to drive.google.com (drive.google.com)|172.217.164.174|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0o-38-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/qj2m08bnhg87tl5r7v8r9ikhm1fdecop/1624542225000/02572735245738743972/*/1JA5dpdmP8TEh4sOfW40GcyNlD_t0K2EM?e=download [following]
--2021-06-24 13:44:20--  https://doc-0o-38-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/qj2m08bnhg87tl5r7v8r9ikhm1fdecop/1624542225000/02572735245738743972/*/1JA5dpdmP8TEh4sOfW40GcyNlD_t0K2EM?e=download
Resolving doc-0o-38-docs.googleusercontent.com (doc-0o-38-docs.googleusercontent.com)... 142.250.73.193, 2607:f8b0:4004:829::2001
Connecting to d

In [6]:
arousal = pd.read_csv('labelled_data/arousal.csv')
valence = pd.read_csv('labelled_data/valence.csv')

## Data preprocessing

Some files appeared corrupted, missing some samples. So we get rid of them.

In [None]:
black_list = ['435', '990', '146',
              '272', '1273', '1200',
              '1174',
              '1493', '1789', ]
for r in black_list:
  os.remove('audiofiles/MEMD_audio/' + str(r) + '.mp3')

We create a list of all the paths to the mp3 files

In [None]:
audio_paths = [str(file) for file in Path().glob('./audiofiles/MEMD_audio/*.mp3')]
SR = 44100 # 1/s
SEGMENT_DURATION = 0.5 # s
N_BUFFERS = 60 # per song
BUF_LENGTH = int(SEGMENT_DURATION * SR)


def load_files(data, labels, paths, unique_ids):
    for i, path in enumerate(paths):
        audio, _ = librosa.load(path, sr=SR)
        audio = audio[15*SR:45*SR]
        audio = librosa.utils.normalize(audio)
        id = path.split('.mp3')[0].split('/')[2] if i<unique_ids else path.split('.mp3')[0].split('/')[4].split('-')[0]
        for j, buf in enumerate(range(N_BUFFERS)):
            v = valence[valence['song_id']== int(id)].values[0,buf+1]
            a = arousal[arousal['song_id']== int(id)].values[0,buf+1]
            labels[i*N_BUFFERS + j] = np.array([a,v]).reshape(-1,1)
            data[i*N_BUFFERS + j] = (audio[buf*BUF_LENGTH : (buf+1)*BUF_LENGTH]).reshape(-1,1)

In [None]:
data_size = len(audio_paths)*60

X = np.empty((data_size, BUF_LENGTH, 1))
y = np.empty((data_size, 2, 1))

load_files(X, y, audio_paths, N_FILES)

We then save the data to a npy file to speed up this process. The bottleneck is in the decoding of .mp3 files. We suggest to create this .npy file, upload them on your drive and load them directly from here on Colab (in place of the .mp3 files)

In [None]:
np.save('numpy_data/X', X)
np.save('numpy_data/y', y)

## Load directly data from .npy files

In [None]:
X = np.load('numpy_data/X.npy')
y = np.load('numpy_data/y.npy')

## Data augmentation (not applied but implemented)

In [None]:
def noiser(sample):
  noise = np.random.normal(0,1, len(sample)) * 0.075
  sample = sample + noise
  return sample

In [None]:
def pitch_shifter(sample, semitones):
  return librosa.effects.pitch_shift(sample,44100,semitones)

In [None]:
def data_augmenter(songs):
  for s in songs:
    audio, _ = librosa.load(path, sr=44100)
    
    noise = noiser(audio)
    shift = pitch_shifter(audio, np.random.random_integers(-2,2))
    id = Path(s).stem.split('.')[0]
    sf.write(id+'-n'+'.wav', noise, 44100)
    sf.write(id+'-ps'+'.wav', shift, 44100)


# Create model


In [None]:
def get_model(batch_size):
  input_shape = (22050,1)
  input1 = Input(shape=input_shape, batch_size=batch_size, name='Input')
  conv1 = Conv1D(8, 32, 8, activation='relu', padding='causal', kernel_regularizer=tf.keras.regularizers.l2(0.01)) (input1)
  batch1 = BatchNormalization(name='BatchNorm1')(conv1)
  mp1 = MaxPooling1D(8, padding='same')(batch1)

  conv2 = Conv1D(8, 128, 32, activation='relu', padding='causal', kernel_regularizer=tf.keras.regularizers.l2(0.01))(input1)
  batch2 = BatchNormalization(name='BatchNorm2')(conv2)
  mp2 = MaxPooling1D(2, padding='same')(batch2)

  add = Add(name='Add')([mp1, mp2])
  drop1 = Dropout(0.2, name='Dropout1')(add)
  lstm1 = Bidirectional(LSTM(32, return_sequences=True), name='BiLSTM1')(drop1)

  lstm2 = Bidirectional(LSTM(32, return_sequences=False), name='BiLSTM2')(lstm1)
  drop2 = Dropout(0.2)(lstm2)

  output = Dense(2)(drop2)
  model = tf.keras.Model(inputs=input1, outputs=output)

  return model 

In [None]:
model = get_model(None)

We created a model with a None batch size, now we compile it with an Adam optimizer. we can also plot it.

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss="mse", metrics=[ tf.keras.metrics.RootMeanSquaredError() ])

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)

# Model training

Before, we create a visualization plot function to see the loss behavior.

In [None]:
def plot_history(history):
  plt.figure(figsize=(15,5))
  plt.subplot(1,2,1)
  plt.plot(history.history[tf.keras.metrics.RootMeanSquaredError().name], c='r')
  plt.title('Model RMSE')
  plt.ylabel('RMSE')
  plt.xlabel('Epochs')
  plt.legend(('Training'))
  plt.subplot(1,2,2)
  plt.plot(history.history['loss'], c='r')
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epochs')
  plt.legend(('Training'))


The scheduler will be called after each epoch, and sometimes it will decrease the learning rate to help convergence.

In [None]:
def scheduler(epoch, lr):
  if epoch == 10 or epoch == 30 :
    return lr * 0.1
  return lr  

Let's also create a function to save in a .txt file the results of the training process

In [None]:
def print_results(r2, rmse):
  output = '------------------------------------------------------------------------\nScore Per fold:\n'
  for i in range(0, len(r2)):
    output += '------------------------------------------------------------------------\n'
    output += f'> Fold {i+1} - R2: {r2[i]} - Rmse: {rmse[i]}%\n'
  output += '------------------------------------------------------------------------\nAverage scores for all folds:\n'
  output += f'> R2: {np.mean(r2)}\n'
  output += f'> Rmse: {np.mean(rmse)} (+- {np.std(rmse)})\n'
  output += '------------------------------------------------------------------------'
  print(output)

In [None]:
EPOCHS = 60
N_SONGS_PER_BATCH = 8
BATCH_SIZE = N_SONGS_PER_BATCH * 60
PATIENCE = 20
LEARNING_RATE = 0.0001
N_FOLDS = 10

output_folder = 'results'
run_idx = 1

scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)
early_stop = EarlyStopping('loss', patience=PATIENCE, restore_best_weights=True)

kfold = KFold(n_splits=N_FOLDS, shuffle=False) 
r2=[]
rmse=[]

for idx_train, idx_test in kfold.split(X, y):
    STEPS = int(( np.floor(len(idx_train))) // (BATCH_SIZE) )
    print('Run index : ' + str(run_idx))

    #Setup of csv logger
    log_file_name = 'Run_' + str(run_idx)
    run_idx+=1
    csv_logger = CSVLogger(os.path.join(output_folder, log_file_name), append=False)

    #create callbacklist
    callbacks = [csv_logger, early_stop, scheduler_callback]
    #init model
    model = get_model2(None)
    opt = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    model.compile(optimizer=opt, loss="mse", metrics=[ tf.keras.metrics.RootMeanSquaredError() ])

    #Train and plot results
    history = model.fit(X[idx_train], y[idx_train], 
    epochs=EPOCHS, 
    steps_per_epoch=STEPS, 
    batch_size=BATCH_SIZE, 
    callbacks=callbacks, 
    verbose=1)

    #metrics creation
    scores = model.evaluate(X[idx_test], y[idx_test], verbose=2)
    plot_history(history)
    rmse.append(scores[1])
    metric = tfa.metrics.r_square.RSquare(y_shape=(2,))
    metric.update_state(tf.squeeze(y[idx_test]), model.predict(X[idx_test]))
    result = metric.result()
    r2.append(result.numpy())

    gc.collect()
    k.clear_session();


print_results(r2, rmse)


# Model save

After having tuned the hyperparameters of the network using also validation data, we trained the model on the whole dataset and then saved it using the ModelCheckpoint callback from Keras API.

In [None]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath='./checkpoint',
    save_weights_only=True,
    monitor='mse',
    mode='max',
    save_best_only=True)]
  
  history = model.fit(X, y, 
  epochs=EPOCHS, 
  steps_per_epoch=STEPS, 
  batch_size=BATCH_SIZE, 
  callbacks=[checkpoint], 
  verbose=1)


model.save('best_model.hdf5', include_optimizer=False)