In [None]:
import pandas as pd
import numpy as np

import os
import sys
from tqdm import tqdm

import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [None]:
Ravdess = "./datasets/RAVDESS/audio_speech_actors_01-24/"
Crema = "./datasets/CREMA-D/AudioWAV/"
Tess = "./datasets/TESS/TESS Toronto emotional speech set data/"
Savee = "./datasets/SAVEE/ALL/"

In [None]:
ravdess_directory_list = os.listdir(Ravdess)

file_emotion = []
file_path = []
for dir in ravdess_directory_list:
    actor = os.listdir(Ravdess + dir)
    for file in actor:
        part = file.split('.')[0]
        part = part.split('-')
        file_emotion.append(int(part[2]))
        file_path.append(Ravdess + dir + '/' + file)
        
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

path_df = pd.DataFrame(file_path, columns=['Path'])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)

Ravdess_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)
Ravdess_df.drop(Ravdess_df.index[Ravdess_df['Emotions'] == 'calm'], inplace = True)
Ravdess_df.drop(Ravdess_df.index[Ravdess_df['Emotions'] == 'surprise'], inplace = True)
print(np.unique(Ravdess_df['Emotions'].values))
Ravdess_df.to_csv("ravdess_paths.csv")

In [None]:
crema_directory_list = os.listdir(Crema)

file_emotion = []
file_path = []

for file in crema_directory_list:
    file_path.append(Crema + file)
    part=file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')
        
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)
Crema_df.head()
Crema_df.to_csv("crema_paths.csv")

In [None]:
tess_directory_list = os.listdir(Tess)

file_emotion = []
file_path = []

for dir in tess_directory_list:
    directories = os.listdir(Tess + dir)
    for i, file in enumerate(directories):
        part = file.split('.')[0]
        part = part.split('_')
        if len(part) == 3:
            part = part[2]
        else:
            part = part[1]
            
        if part=='ps':
            file_emotion.append('surprise')
        else:
            file_emotion.append(part)

        if file[-4::] == ".wav":
            file_path.append(Tess + dir + '/' + file)
        
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions']) # Sprawdzic ktore dropnac, prawdopodobnie surprise

path_df = pd.DataFrame(file_path, columns=['Path'])
Tess_df = pd.concat([emotion_df, path_df], axis=1)

Tess_df['Emotions'] = Tess_df['Emotions'].replace(['Sad', 'Fear', 'surprised'], ['sad', 'fear', 'surprise'])
Tess_df.drop(Tess_df.index[Tess_df['Emotions'] == 'surprise'], inplace = True)

Tess_df.to_csv("tess_paths.csv")

In [None]:
savee_directory_list = os.listdir(Savee)

file_emotion = []
file_path = []

for file in savee_directory_list:
    file_path.append(Savee + file)
    part = file.split('_')[1]
    ele = part[:-6]
    if ele=='a':
        file_emotion.append('angry')
    elif ele=='d':
        file_emotion.append('disgust')
    elif ele=='f':
        file_emotion.append('fear')
    elif ele=='h':
        file_emotion.append('happy')
    elif ele=='n':
        file_emotion.append('neutral')
    elif ele=='sa':
        file_emotion.append('sad')
    else:
        file_emotion.append('surprise')
        
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

path_df = pd.DataFrame(file_path, columns=['Path'])
Savee_df = pd.concat([emotion_df, path_df], axis=1)
Savee_df.head()
Savee_df.drop(Savee_df.index[Savee_df['Emotions'] == 'surprise'], inplace = True)
Savee_df.to_csv("savee_paths.csv")

In [None]:
data_path = pd.concat([Ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
data_path.to_csv("data_path.csv",index=False)
data_path.head()

In [None]:
plt.title('Rozkład klas dla zbioru danych SAVEE', size=16)
sns.countplot(data_path.Emotions, palette = "rocket")
sns.set_style("dark")
plt.ylabel('Liczba próbek', size=12)
plt.xlabel('Emocje', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.savefig('./plots/rozklad_probek_dla_zbioru_SAVEE_')
plt.show()

In [None]:
map_emotions = {
    "happy": "radość",
    "disgust": "obrzydzenie",
    "fear": "strach",
    "angry": "gniew",
    "sad": "smutek",
    "neutral": "neurtalne"
}


def create_waveplot(data, sr, e):
    plt.figure(figsize=(12, 3))
    e = map_emotions[e]
    plt.title('Wykres falowy dla przykładowego pliku audio z całego zbioru danych - charakter nagrania "{}"'.format(e), size=16)
    librosa.display.waveplot(data, sr=sr)
    plt.show()

def create_spectrogram(data, sr, e):
    X = librosa.stft(data)
    Xdb = librosa.amplitude_to_db(abs(X))
    plt.figure(figsize=(12, 3))
    e = map_emotions[e]
    plt.title('Spectrogram dla przykładowego pliku audio z całego zbioru danych - charakter nagrania "{}"'.format(e), size=16)
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')   
    #librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar()

In [None]:
def noise(data):
    noise_amp = 0.03*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.7):
    return librosa.effects.time_stretch(data, rate)

def pitch(data, sampling_rate, pitch_factor=0.6):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

path = np.array(data_path.Path)[1]
data, sample_rate = librosa.load(path)

In [None]:
def extract_features(data):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr))

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) 

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) 

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) 

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) 
    
    #Spectral contrast
    spcont = np.mean(librosa.feature.spectral_contrast(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, spcont)) 
    
    #Spectral contrast
    spcent = np.mean(librosa.feature.spectral_centroid(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, spcent)) 
    
    return result

def get_features(path):
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
    
    res1 = extract_features(data)
    result = np.array(res1)
    
    noise_data = noise(data)
    res2 = extract_features(noise_data)
    result = np.vstack((result, res2))
    
    new_data = stretch(data)
    data_stretch_pitch = pitch(new_data, sample_rate)
    res3 = extract_features(data_stretch_pitch)
    result = np.vstack((result, res3))
    
    return result

In [None]:
X, Y = [], []
for path, emotion in tqdm(zip(data_path.Path, data_path.Emotions)):
    if not isinstance(path, str):
        continue
    feature = get_features(path)
    for ele in feature:
        X.append(ele)
        Y.append(emotion)

In [None]:
len(X), len(Y), data_path.Path.shape

In [None]:
Features = pd.DataFrame(X)
Features['labels'] = Y
Features.to_csv('all_features.csv', index=False)
Features

In [None]:
X = Features.iloc[: ,:-1].values
Y = Features['labels'].values

encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state=0, shuffle=True)
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size = 0.5, random_state=0, shuffle=True)
print(x_test.shape, y_test.shape, x_val.shape, y_val.shape)

In [None]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_val = scaler.transform(x_val)

x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_val = np.expand_dims(x_val, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape

In [None]:
# np.save("./saved_datasets/all_x_train.npy", x_train)
# np.save("./saved_datasets/all_y_train.npy", y_train)
# np.save("./saved_datasets/all_x_test.npy", x_test)
# np.save("./saved_datasets/all_y_test.npy", y_test)
# np.save("./saved_datasets/all_x_val.npy", x_val)
# np.save("./saved_datasets/all_y_val.npy", y_val)