In [76]:
import numpy as np
import librosa
import tensorflow as tf
from audiomentations import AddGaussianNoise, TimeStretch, PitchShift
import shutil
import os
import soundfile as sf

In [77]:
num_mfcc_list = [26, 40]
n_fft_list = [800, 1024, 2048]

## SAVEE

In [78]:
import random
dataset_path = 'SAVEEE'
data_list = os.listdir(dataset_path)
train_files = []
test_files = []

labels = {'d': 0, 'h': 1, 's': 2,
          'n': 3, 'f': 4, 'a': 5}

train_data_sentiment_path = []
train_data_sentiment_value = []
train_data_sentiment_encoded_value = []

test_data_sentiment_path = []
test_data_sentiment_value = []
test_data_sentiment_encoded_value = []

with open('random_state_savee.txt', 'r') as file:
    random_state = eval(file.read())

random.setstate(random_state)

# import random
random.seed(1234)
for file_path in data_list:
    if random.random() < 0.8:
        train_files.append(file_path)
    else:
        test_files.append(file_path)

for file in train_files:
    file_path = os.path.join(dataset_path, file)
    # print(file_path)
    train_data_sentiment_path.append(file_path)
    train_data_sentiment_value.append(file[3])
    train_data_sentiment_encoded_value.append(labels[file[3]])

for file in test_files:
    file_path = os.path.join(dataset_path, file)
    # print(file_path)
    test_data_sentiment_path.append(file_path)
    test_data_sentiment_value.append(file[3])
    test_data_sentiment_encoded_value.append(labels[file[3]])

In [79]:
def turn_into_saved_dataset_SAVEE(num_mfcc,n_fft,hop_length,SAMPLE_RATE = 22050):
    # num_mfcc = 40
    # # SAMPLE_RATE = librosa.get_samplerate(train_data_sentiment_path[0])
    # SAMPLE_RATE = 22050
    # n_fft = 2048
    # hop_length = 512

    train_data = {
        "labels": [],
        "mfcc": []
    }

    test_data = {
        "labels": [],
        "mfcc": []
    }

    for path, value in zip(train_data_sentiment_path, train_data_sentiment_encoded_value):
        # print(path)
        signal, sample_rate = librosa.load(path, sr=SAMPLE_RATE)
        mfcc = librosa.feature.mfcc(
            y=signal, sr=SAMPLE_RATE, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc = mfcc.T
        
        train_data['labels'].append(value)
        train_data["mfcc"].append(np.asarray(mfcc))

    for path, value in zip(test_data_sentiment_path, test_data_sentiment_encoded_value):
        # print(path)
        signal, sample_rate = librosa.load(path, sr=SAMPLE_RATE)
        mfcc = librosa.feature.mfcc(
            y=signal, sr=SAMPLE_RATE, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc = mfcc.T
        
        test_data['labels'].append(value)
        test_data["mfcc"].append(np.asarray(mfcc))

    processed_data_value = np.asarray(train_data['mfcc'])
    processed_data_target = np.asarray(train_data["labels"])
    processed_test_value = np.asarray(test_data['mfcc'])
    processed_test_target = np.asarray(test_data["labels"])
    maxLength = max(len(x) for x in processed_data_value)

    padded_data_value = tf.keras.preprocessing.sequence.pad_sequences(
        processed_data_value, maxlen=maxLength ,dtype="float32")
    padded_test_value = tf.keras.preprocessing.sequence.pad_sequences(
        processed_test_value, maxlen=maxLength ,dtype="float32")

    np.save('preprocess_dataset/SAVEE/{}-{}-{}_data.npy'.format(num_mfcc,n_fft,hop_length), padded_data_value)
    np.save('preprocess_dataset/SAVEE/{}-{}-{}_data_target.npy'.format(num_mfcc,n_fft,hop_length), processed_data_target)
    np.save('preprocess_dataset/SAVEE/{}-{}-{}_test.npy'.format(num_mfcc,n_fft,hop_length), padded_test_value)
    np.save('preprocess_dataset/SAVEE/{}-{}-{}_test_target.npy'.format(num_mfcc,n_fft,hop_length), processed_test_target)

In [80]:
for x in num_mfcc_list:
    for y in n_fft_list:
        firsthop = int(y/4)
        secondhop = int(y/2)
        turn_into_saved_dataset_SAVEE(num_mfcc=x, n_fft=y, hop_length=firsthop)
        turn_into_saved_dataset_SAVEE(num_mfcc=x, n_fft=y, hop_length=secondhop)

  processed_data_value = np.asarray(train_data['mfcc'])
  processed_test_value = np.asarray(test_data['mfcc'])


## RAVDESS

In [83]:
data_list = []
train_files = []
test_files = []

folder_file = os.listdir("ravdess_dataset/audio_speech_actors_01-24")
for x in folder_file:
    item_file = os.listdir("ravdess_dataset/audio_speech_actors_01-24/{}".format(x))
    for y in item_file:
        data_list.append("ravdess_dataset/audio_speech_actors_01-24/{}/{}".format(x,y))
        
train_data_sentiment_path = []
train_data_sentiment_value = []
train_data_sentiment_encoded_value = []

test_data_sentiment_path = []
test_data_sentiment_value = []
test_data_sentiment_encoded_value = []

with open('random_state_savee.txt', 'r') as file:
    random_state = eval(file.read())

random.setstate(random_state)
random.seed(1234)
for file_path in data_list:
    if random.random() < 0.8:
        train_files.append(file_path)
    else:
        test_files.append(file_path)

for file in train_files:
    sentiment_code = file[-18:-16]
    if sentiment_code == '05':
        train_data_sentiment_path.append(file)
        train_data_sentiment_value.append('angry')  
        train_data_sentiment_encoded_value.append(5)        
    elif sentiment_code == '07':
        train_data_sentiment_path.append(file)
        train_data_sentiment_value.append('disgust')  
        train_data_sentiment_encoded_value.append(0)
    elif sentiment_code == '06':
        train_data_sentiment_path.append(file)
        train_data_sentiment_value.append('fear')  
        train_data_sentiment_encoded_value.append(4)
    elif sentiment_code == '03':
        train_data_sentiment_path.append(file)
        train_data_sentiment_value.append('happy')  
        train_data_sentiment_encoded_value.append(1)
    elif sentiment_code == '01':
        train_data_sentiment_path.append(file)
        train_data_sentiment_value.append('neutral')  
        train_data_sentiment_encoded_value.append(3)
    elif sentiment_code == '04':
        train_data_sentiment_path.append(file)
        train_data_sentiment_value.append('sad')  
        train_data_sentiment_encoded_value.append(2)
    else:
        pass

for file in test_files:
    sentiment_code = file[-18:-16]
    if sentiment_code == '05':
        test_data_sentiment_path.append(file)
        test_data_sentiment_value.append('angry')  
        test_data_sentiment_encoded_value.append(5)        
    elif sentiment_code == '07':
        test_data_sentiment_path.append(file)
        test_data_sentiment_value.append('disgust')  
        test_data_sentiment_encoded_value.append(0)
    elif sentiment_code == '06':
        test_data_sentiment_path.append(file)
        test_data_sentiment_value.append('fear')  
        test_data_sentiment_encoded_value.append(4)
    elif sentiment_code == '03':
        test_data_sentiment_path.append(file)
        test_data_sentiment_value.append('happy')  
        test_data_sentiment_encoded_value.append(1)
    elif sentiment_code == '01':
        test_data_sentiment_path.append(file)
        test_data_sentiment_value.append('neutral')  
        test_data_sentiment_encoded_value.append(3)
    elif sentiment_code == '04':
        test_data_sentiment_path.append(file)
        test_data_sentiment_value.append('sad')  
        test_data_sentiment_encoded_value.append(2)
    else:
        pass

In [84]:
def turn_into_saved_dataset_RAVDESS(num_mfcc,n_fft,hop_length,SAMPLE_RATE = 48000):

    train_data = {
        "labels": [],
        "mfcc": []
    }

    test_data = {
        "labels": [],
        "mfcc": []
    }

    for path, value in zip(train_data_sentiment_path, train_data_sentiment_encoded_value):
        signal, sample_rate = librosa.load(path)
        mfcc = librosa.feature.mfcc(
            y=signal, sr=SAMPLE_RATE, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc = mfcc.T
        
        train_data['labels'].append(value)
        train_data["mfcc"].append(np.asarray(mfcc))

    for path, value in zip(test_data_sentiment_path, test_data_sentiment_encoded_value):
        signal, sample_rate = librosa.load(path)
        mfcc = librosa.feature.mfcc(
            y=signal, sr=SAMPLE_RATE, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc = mfcc.T
        
        test_data['labels'].append(value)
        test_data["mfcc"].append(np.asarray(mfcc))



    processed_data_value = np.asarray(train_data['mfcc'])
    processed_data_target = np.asarray(train_data["labels"])
    processed_test_value = np.asarray(test_data['mfcc'])
    processed_test_target = np.asarray(test_data["labels"])

    maxLength = max(len(x) for x in processed_data_value)

    padded_data_value = tf.keras.preprocessing.sequence.pad_sequences(
        processed_data_value, maxlen=maxLength ,dtype="float32")
    padded_test_value = tf.keras.preprocessing.sequence.pad_sequences(
        processed_test_value, maxlen=maxLength ,dtype="float32")
    

    np.save('preprocess_dataset/RAVDESS/{}-{}-{}_data.npy'.format(num_mfcc,n_fft,hop_length), padded_data_value)
    np.save('preprocess_dataset/RAVDESS/{}-{}-{}_data_target.npy'.format(num_mfcc,n_fft,hop_length), processed_data_target)
    np.save('preprocess_dataset/RAVDESS/{}-{}-{}_test.npy'.format(num_mfcc,n_fft,hop_length), padded_test_value)
    np.save('preprocess_dataset/RAVDESS/{}-{}-{}_test_target.npy'.format(num_mfcc,n_fft,hop_length), processed_test_target)

In [85]:
for x in num_mfcc_list:
    for y in n_fft_list:
        firsthop = int(y/4)
        secondhop = int(y/2)
        turn_into_saved_dataset_RAVDESS(num_mfcc=x, n_fft=y, hop_length=firsthop)
        turn_into_saved_dataset_RAVDESS(num_mfcc=x, n_fft=y, hop_length=secondhop)

  processed_data_value = np.asarray(train_data['mfcc'])
  processed_test_value = np.asarray(test_data['mfcc'])


## CREMA-D

In [86]:
import math
import pandas as pd
def load_to_dataframe(train_folder_path, test_folder_path):
    """
    Loads train and test data from the specified file folders, and returns them as pandas DataFrame objects.
    :param train_folder_path: The path to the folder containing the train data files.
    :type train_folder_path: str
    :param test_folder_path: The path to the folder containing the test data files.
    :type test_folder_path: str
    :return: A two pandas DataFrame objects, containing the train and test data, respectively.
    :rtype: pandas.core.frame.DataFrame, pandas.core.frame.DataFrame
    """
    train_path = 'dataset/train'
    test_path = 'dataset/test'
    train_dir_list = os.listdir(train_path)
    test_dir_list = os.listdir(test_path)

    train_sentiment_value = []
    test_sentiment_value = []
    train_file_path = []
    test_file_path = []

    for file in train_dir_list:
        train_file_path.append(train_path + '/' + file)
        sentiment_code = file.split('_')
        if sentiment_code[2] == 'ANG':
            train_sentiment_value.append('angry')
        elif sentiment_code[2] == 'DIS':
            train_sentiment_value.append('disgust')
        elif sentiment_code[2] == 'FEA':
            train_sentiment_value.append('fear')
        elif sentiment_code[2] == 'HAP':
            train_sentiment_value.append('happy')
        elif sentiment_code[2] == 'NEU':
            train_sentiment_value.append('neutral')
        elif sentiment_code[2] == 'SAD':
            train_sentiment_value.append('sad')
        else:
            train_sentiment_value.append('unknown')

    for file in test_dir_list:
        test_file_path.append(test_path + '/' + file)
        sentiment_code = file.split('_')
        if sentiment_code[2] == 'ANG':
            test_sentiment_value.append('angry')
        elif sentiment_code[2] == 'DIS':
            test_sentiment_value.append('disgust')
        elif sentiment_code[2] == 'FEA':
            test_sentiment_value.append('fear')
        elif sentiment_code[2] == 'HAP':
            test_sentiment_value.append('happy')
        elif sentiment_code[2] == 'NEU':
            test_sentiment_value.append('neutral')
        elif sentiment_code[2] == 'SAD':
            test_sentiment_value.append('sad')
        else:
            test_sentiment_value.append('unknown')

    train_sentiment_df = pd.DataFrame(
        {"File_Path": train_file_path, "Target": train_sentiment_value})

    test_sentiment_df = pd.DataFrame(
        {"File_Path": test_file_path, "Target": test_sentiment_value})

    return train_sentiment_df, test_sentiment_df

In [87]:
def turn_into_data_for_model(train_df, test_df, num_mfcc, n_fft, hop_length, SAMPLE_RATE = 16000):
    # Set Variable for MFCC

    train_data = {
        "labels": [],
        "mfcc": []
    }

    test_data = {
        "labels": [],
        "mfcc": []
    }

    # Encode Categories
    labels = {'disgust': 0, 'happy': 1, 'sad': 2,
              'neutral': 3, 'fear': 4, 'angry': 5}
    train_df_encoded = train_df.replace({'Target': labels}, inplace=False)
    test_df_encoded = test_df.replace({'Target': labels}, inplace=False)

    for item, row in train_df.iterrows():
        train_data['labels'].append(train_df_encoded.iloc[item, 1])
        signal, sample_rate = librosa.load(
            train_df_encoded.iloc[item, 0], sr=SAMPLE_RATE)
        mfcc = librosa.feature.mfcc(
            y=signal, sr=SAMPLE_RATE, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc = mfcc.T
        train_data["mfcc"].append(np.asarray(mfcc))

        if item % 300 == 0:
            print("Train Size:" + str(math.floor(item)))

    for item, row in test_df.iterrows():
        test_data['labels'].append(test_df_encoded.iloc[item, 1])
        signal, sample_rate = librosa.load(
            test_df_encoded.iloc[item, 0], sr=SAMPLE_RATE)
        mfcc = librosa.feature.mfcc(
            y=signal, sr=SAMPLE_RATE, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc = mfcc.T
        test_data["mfcc"].append(np.asarray(mfcc))
        if item % 300 == 0:
            print("Test Size:" + str(math.floor(item)))


    processed_data_value = np.asarray(train_data['mfcc'])
    processed_data_target = np.asarray(train_data["labels"])
    processed_test_value = np.asarray(test_data['mfcc'])
    processed_test_target = np.asarray(test_data["labels"])

    maxLength = max(len(x) for x in processed_data_value)

    padded_data_value = tf.keras.preprocessing.sequence.pad_sequences(
        processed_data_value, maxlen=maxLength ,dtype="float32")
    padded_test_value = tf.keras.preprocessing.sequence.pad_sequences(
        processed_test_value, maxlen=maxLength ,dtype="float32")

    # print(test_data_value.shape)
    np.save('preprocess_dataset/CREMA-D/{}-{}-{}_data.npy'.format(num_mfcc,n_fft,hop_length), padded_data_value)
    np.save('preprocess_dataset/CREMA-D/{}-{}-{}_data_target.npy'.format(num_mfcc,n_fft,hop_length), processed_data_target)
    np.save('preprocess_dataset/CREMA-D/{}-{}-{}_test.npy'.format(num_mfcc,n_fft,hop_length), padded_test_value)
    np.save('preprocess_dataset/CREMA-D/{}-{}-{}_test_target.npy'.format(num_mfcc,n_fft,hop_length), processed_test_target)
    # return train_data_value, train_data_target, test_data_value, test_data_target

In [88]:
train_df, test_df = load_to_dataframe('dataset/train', 'dataset/test')


In [89]:
for x in num_mfcc_list:
    for y in n_fft_list:
        firsthop = int(y/4)
        secondhop = int(y/2)
        turn_into_data_for_model(train_df, test_df, num_mfcc=x, n_fft=y, hop_length=firsthop)
        turn_into_data_for_model(train_df, test_df, num_mfcc=x, n_fft=y, hop_length=secondhop)

Train Size:0
Train Size:300
Train Size:600
Train Size:900
Train Size:1200
Train Size:1500
Train Size:1800
Train Size:2100
Train Size:2400
Train Size:2700
Train Size:3000
Train Size:3300
Train Size:3600
Train Size:3900
Train Size:4200
Train Size:4500
Train Size:4800
Train Size:5100
Train Size:5400
Train Size:5700
Test Size:0
Test Size:300
Test Size:600
Test Size:900
Test Size:1200


  processed_data_value = np.asarray(train_data['mfcc'])
  processed_test_value = np.asarray(test_data['mfcc'])


Train Size:0
Train Size:300
Train Size:600
Train Size:900
Train Size:1200
Train Size:1500
Train Size:1800
Train Size:2100
Train Size:2400
Train Size:2700
Train Size:3000
Train Size:3300
Train Size:3600
Train Size:3900
Train Size:4200
Train Size:4500
Train Size:4800
Train Size:5100
Train Size:5400
Train Size:5700
Test Size:0
Test Size:300
Test Size:600
Test Size:900
Test Size:1200
Train Size:0
Train Size:300
Train Size:600
Train Size:900
Train Size:1200
Train Size:1500
Train Size:1800
Train Size:2100
Train Size:2400
Train Size:2700
Train Size:3000
Train Size:3300
Train Size:3600
Train Size:3900
Train Size:4200
Train Size:4500
Train Size:4800
Train Size:5100
Train Size:5400
Train Size:5700
Test Size:0
Test Size:300
Test Size:600
Test Size:900
Test Size:1200
Train Size:0
Train Size:300
Train Size:600
Train Size:900
Train Size:1200
Train Size:1500
Train Size:1800
Train Size:2100
Train Size:2400
Train Size:2700
Train Size:3000
Train Size:3300
Train Size:3600
Train Size:3900
Train Size:4200


In [91]:
x = np.load("preprocess_dataset/SAVEE/26-2048-512_data.npy")
print(x.shape)
y = np.load("preprocess_dataset/RAVDESS/26-2048-512_data.npy")
print(y.shape)
z = np.load("preprocess_dataset/CREMA-D/26-2048-512_data.npy")
print(z.shape)


(328, 308, 26)
(847, 228, 26)
(5953, 156, 26)


In [None]:
x.shape

(328, 788, 13)

In [None]:
train_data_value = np.load(
    "preprocessed_dataset_lama/CREMA-D/40-1024-512_data.npy")
train_data_target = np.load(
    "preprocessed_dataset_lama/CREMA-D/40-1024-512_data.npy")
test_data_value = np.load(
    "preprocessed_dataset_lama/CREMA-D/40-1024-512_data.npy")
test_data_target = np.load(
    "preprocessed_dataset_lama/CREMA-D/40-1024-512_data.npy")

In [None]:
train_data_value.shape

(5953, 156, 40)

In [None]:
test_data_value.shape

(5953, 156, 40)

In [None]:
train_data_target

array([[[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        ...,
        [-5.24512207e+02,  1.12205383e+02,  3.52632217e+01, ...,
         -4.86970520e+00, -3.55292130e+00, -1.82775474e+00],
        [-5.27892639e+02,  1.08647430e+02,  2.48086662e+01, ...,
         -6.29737377e+00, -1.24458599e+00,  8.30532551e-01],
        [-5.63413208e+02,  1.07229431e+02,  2.64484749e+01, ...,
         -4.20998049e+00,  1.06432319e+00, -3.00609899e+00]],

       [[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e