In [None]:
# Every Run

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# Unzip files
!mkdir ravdess_data
#https://smartlaboratory.org/ravdess/
!unzip 'gdrive/My Drive/FYP Code/Audio_Speech_Actors_01-24.zip' -d ravdess_data
!rm -r ravdess_data/A*/03-01-02*
!rm -r ravdess_data/audio*

In [None]:
# Every Run


# Library Imports
import os
import librosa
import librosa.display
import numpy as np
import random
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import IPython.display as ipd
from IPython.display import Audio



# Constants

SAMPLE_RATE = 22050*2
EMOTIONS = ["","Neutral","Calm","Happy","Sad","Angry","Fearful","Disgust","Surprised"]

In [None]:
# Create initial np array to store audio data

i = 0

X_arr = np.zeros([52*24,110250])
labels = []
actors = [] 
# X, sample_rate = librosa.load('/content/ravdess_data/Actor_01/03-01-01-01-01-01-01.wav', res_type='kaiser_fast',duration=2.5,sr=SAMPLE_RATE,offset=0.5)
# X_arr[0] = X



for folder in os.listdir('ravdess_data/'):
  for file in os.listdir(f'{"ravdess_data/"}{folder}'):
    X, sample_rate = librosa.load(f'{"ravdess_data/"}{folder}{"/"}{file}', res_type='kaiser_fast',duration=2.5,sr=SAMPLE_RATE,offset=0.5)
    # print(folder,file,i)
    X_arr[i,:X.shape[0]] = X

    tempLabel = int(file[6:8])
    tempActor = int(file[18:20])

    labels.append(tempLabel)
    actors.append(tempActor)

    i += 1

labels = np.array(labels)
actors = np.array(actors)

np.save('gdrive/My Drive/FYP Code/Save Data/ravdess_AudioData.npy',X_arr)
np.save('gdrive/My Drive/FYP Code/Save Data/ravdess_Labels.npy',labels)
np.save('gdrive/My Drive/FYP Code/Save Data/ravdess_Actors.npy',actors)

In [None]:
# Load audio data

ravdess_AudioData = np.load('gdrive/My Drive/FYP Code/Save Data/ravdess_AudioData.npy')
ravdess_Labels = np.load('gdrive/My Drive/FYP Code/Save Data/ravdess_Labels.npy')
ravdess_Actors = np.load('gdrive/My Drive/FYP Code/Save Data/ravdess_Actors.npy')

In [None]:
# Test Load

idx = random.randint(0,52*24-1)
print(idx,ravdess_Labels[idx],EMOTIONS[ravdess_Labels[idx]],ravdess_Actors[idx])

Audio(ravdess_AudioData[idx], rate = SAMPLE_RATE, autoplay = True)


# 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised

In [None]:
def addNoise(X):
  meanVol = np.mean(np.abs(X))
  noiseArr = np.random.normal(0,meanVol,X.shape)
  noisyX = X+noiseArr
  return noisyX

In [None]:
# Adding Noise

X_arr = np.zeros(audioData.shape)

for i in range(1248):
  X = addNoise(audioData[i])
  X_arr[i,:X.shape[0]] = X

np.save('gdrive/My Drive/FYP Code/Save Data/ravdess_AudioDataWN.npy',X_arr)

In [None]:
ravdess_AudioDataWN = np.load('gdrive/My Drive/FYP Code/Save Data/ravdess_AudioDataWN.npy')

In [None]:
idx = random.randint(0,52*24-1)
print(idx,ravdess_Labels[idx],EMOTIONS[ravdess_Labels[idx]],ravdess_Actors[idx])

Audio(ravdess_AudioDataWN[idx], rate = SAMPLE_RATE, autoplay = True)

In [None]:
def mixAudio(fgn,bgn,fgnRatio):
  # fgn: Foreground Noise
  # bgn: Background Noise
  output = np.zeros(110250)
  output[:fgn.shape[0]] += fgnRatio/max(fgn) * fgn
  output[:bgn.shape[0]] += (1-fgnRatio)/max(bgn) * bgn

  return output


In [None]:
temp = mixAudio(audioData[random.randint(0,52*24-1)],audioData[random.randint(0,52*24-1)],0.9)
Audio(temp, rate = SAMPLE_RATE, autoplay = True)

In [None]:
# Mixed Audio

X_arr = np.zeros(audioData.shape)

for i in range(1248):
  X = mixAudio(audioData[i],audioData[random.randint(0,52*24-1)])
  X_arr[i,:X.shape[0]] = X

np.save('gdrive/My Drive/FYP Code/Save Data/ravdess_AudioDataMA.npy',X_arr)

In [None]:
ravdess_AudioDataMA = np.load('gdrive/My Drive/FYP Code/Save Data/ravdess_AudioDataMA.npy')

In [None]:
idx = random.randint(0,52*24-1)
print(idx,labels[idx],EMOTIONS[labels[idx]],actors[idx])

Audio(audioDataMA[idx], rate = SAMPLE_RATE, autoplay = True)

In [None]:
# Generate MFCCs
def extractMFCCs(audioData):
  i = 0
  s_arr = np.zeros([audioData.shape[0],13,216])
  for line in audioData:
    X = line
    sample_rate = np.array(SAMPLE_RATE)
    mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13)
    result = np.zeros((13,216))
    result[:mfccs.shape[0],:mfccs.shape[1]] = mfccs
    s_arr[i] = result

    i += 1

  return s_arr

In [None]:
s_arr = extractMFCCs(ravdess_AudioData)

In [None]:
np.save('gdrive/My Drive/FYP Code/Save Data/ravdess_mfccs.npy',s_arr)

In [None]:
s_arr = extractMFCCs(ravdess_AudioDataWN)

In [None]:
np.save('gdrive/My Drive/FYP Code/Save Data/ravdess_mfccsWN.npy',s_arr)

In [None]:
s_arr = extractMFCCs(ravdess_AudioDataMA)

In [None]:
np.save('gdrive/My Drive/FYP Code/Save Data/ravdess_mfccsMA.npy',s_arr)

In [None]:
librosa.display.waveshow(ravdess_AudioData[2], sr = SAMPLE_RATE)

In [None]:
librosa.display.waveshow(0.1*ravdess_AudioData[102], sr = SAMPLE_RATE)

In [None]:
librosa.display.waveshow(ravdess_AudioDataWN[2], sr = SAMPLE_RATE)

In [None]:
librosa.display.waveshow(mixAudio(ravdess_AudioDataMA[2],ravdess_AudioData[102],0.7), sr = SAMPLE_RATE)

In [None]:
temp = np.ones(110250)
librosa.display.waveshow(addNoise(temp), sr = SAMPLE_RATE)