In [1]:
import os
import csv
import json
import sys
import numpy as np
import random

import librosa

In [2]:
sound_emotion_path = "../../databases/TESS_and_RAVDESS/"
train_data = {
    "mfcc": [],
    "label": [],
    "emotion": []
}

validation_data = {
    "mfcc": [],
    "label": [],
    "emotion": []
}

test_data = {
    "mfcc": [],
    "label": [],
    "emotion": []
}

max_value = 0
min_value = 0

In [3]:
def process_sound(filename, dirpath, emotion, label, sample_rate, n_mfcc, n_fft, hop_length):
    file_path = os.path.join(dirpath, filename)
    # sample rate is per-second, signal.size/sample rate = time
    signal, sr = librosa.load(file_path, sr=sample_rate)
    # Option to round this so we get more samples, but having consistent 1 second samples is important too
    intervals = int(signal.size / sample_rate)
    for x in range(intervals):
        start = x * sample_rate
        stop = start + sample_rate
        mfcc = np.mean(librosa.feature.mfcc(signal[start:stop], sr=sample_rate, n_fft=n_fft, hop_length=hop_length, n_mfcc=n_mfcc).T, axis=0)           # global max_value, min_value

        randint = random.random()
        if randint < 0.8:
            # max_value = max(max_value, np.amax(mfcc))
            # min_value = min(min_value, np.amin(mfcc))

            train_data["mfcc"].append(mfcc.tolist())
            train_data["label"].append(label)
            train_data["emotion"].append(emotion)
        elif randint >= 0.8 and randint < 0.9:
            # max_value = max(max_value, np.amax(mfcc))
            # min_value = min(min_value, np.amin(mfcc))

            validation_data["mfcc"].append(mfcc.tolist())
            validation_data["label"].append(label)
            validation_data["emotion"].append(emotion)            
        else:       
            test_data["mfcc"].append(mfcc.tolist())
            test_data["label"].append(label)
            test_data["emotion"].append(emotion)             

In [4]:
def save_mfcc(dataset_path, n_mfcc=13, n_fft=1024, hop_length=256, sample_rate=22050):
    # count: 0=Angry, 1=Disgusted, 2=Fearful, 3=Happy, 4=Netural, 5=Sad, 6=Surprised
    for count, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # print(filenames)
        # print(dirnames)
        print(dirpath)
        if dirpath is not sound_emotion_path:
            if "Angry" in dirpath:
                for f in filenames:
                    process_sound(f, dirpath, "anger_disgust", 0, sample_rate, n_mfcc, n_fft, hop_length)
            elif "Disgusted" in dirpath:
                for f in filenames:
                    process_sound(f, dirpath, "anger_disgust", 0, sample_rate, n_mfcc, n_fft, hop_length)
            elif "Suprised" in dirpath:
                for f in filenames:
                    process_sound(f, dirpath, "surprised_fear", 4, sample_rate, n_mfcc, n_fft, hop_length)
            elif "Fearful" in dirpath:
                for f in filenames:
                    process_sound(f, dirpath, "surprised_fear", 4, sample_rate, n_mfcc, n_fft, hop_length)
            elif "Happy" in dirpath:
                for f in filenames:
                    process_sound(f, dirpath, "joy", 1, sample_rate, n_mfcc, n_fft, hop_length)
            elif "Sad" in dirpath:
                for f in filenames:
                    process_sound(f, dirpath, "sadness", 3, sample_rate, n_mfcc, n_fft, hop_length)
            elif "Neutral" in dirpath:
                for f in filenames:
                    process_sound(f, dirpath, "neutral", 2, sample_rate, n_mfcc, n_fft, hop_length)
            else:
                continue

save_mfcc(sound_emotion_path)

01
 -1.2940418e+01 -1.0183540e+01 -1.9061569e+01 -1.5116079e+01
 -1.0944970e+01  6.6474643e+00 -1.5926621e+00  1.2520579e+01
 -4.1049094e+00]
[-443.37802     57.33926     -1.4616545   41.151913    -4.919964
  -17.642279   -10.442412    -9.719176   -16.03972     -6.217303
   -8.926466    -5.0812273   -6.316037 ]
[-450.6265      28.99702     -0.5175898   -1.251082    -9.128572
   -5.5417185   -8.367446   -15.69403     -6.6406174   12.18183
   -2.3563216   16.175245    -2.8234756]
[-409.00372     37.685      -10.74387     -3.4439886  -14.558423
   -5.8637695  -12.978472   -23.616945    -6.107032     5.22296
  -11.028014    12.378847    -8.881022 ]
[-447.02344     21.632975    -9.837893    -4.698463    -1.4689939
   15.21125      7.557931     5.7124577   -6.46706     12.336068
  -10.164551     4.762744     4.005967 ]
[-424.02847     18.427505    -6.821861    -3.0758538  -16.958303
   -6.227956    -8.848422   -17.050512    -2.530123    12.770686
   -5.6656504   13.606583    -8.2931795]
[-42

In [5]:
print(max_value, min_value)

0 0


In [6]:
# nparray_list = np.array(train_data["mfcc"])
# normalized_nparray = 2*((nparray_list-min_value)/(max_value-min_value))-1
# train_data["mfcc"] = normalized_nparray.tolist()
# print("--------done-----------")
# nparray_list = np.array(validation_data["mfcc"])
# normalized_nparray = 2*((nparray_list-min_value)/(max_value-min_value))-1
# validation_data["mfcc"] = normalized_nparray.tolist()
# print("--------done-----------")
# nparray_list = np.array(test_data["mfcc"])
# normalized_nparray = 2*((nparray_list-min_value)/(max_value-min_value))-1
# test_data["mfcc"] = normalized_nparray.tolist()
# print("--------done-----------")

In [7]:
with open("./tess_ravdess_train_norm.json", "w") as fp:
    json.dump(train_data, fp, indent=4)
with open("./tess_ravdess_validation_norm.json", "w") as fp:
    json.dump(validation_data, fp, indent=4)
with open("./tess_ravdess_test_norm.json", "w") as fp:
    json.dump(test_data, fp, indent=4)