# <center>Data Exploration</center>

In [2]:
import torchaudio
from tqdm.notebook import tqdm
import parselmouth
from feature_extraction_utils import *
import librosa
import librosa.display
import scipy
import random
import os
import numpy as np
import pandas as pd
random.seed(123)

In [3]:
dataset_name = 'tess'
tess_data_path = "TESS Toronto emotional speech set data/"
TESS_path = os.path.abspath(tess_data_path)

dir_list_TESS = os.listdir(TESS_path)

## TESS Dataset

In [4]:
dir_list = os.listdir(TESS_path)
dir_list.sort()
dir_list

['OAF_Fear',
 'OAF_Pleasant_surprise',
 'OAF_Sad',
 'OAF_angry',
 'OAF_disgust',
 'OAF_happy',
 'OAF_neutral',
 'YAF_angry',
 'YAF_disgust',
 'YAF_fear',
 'YAF_happy',
 'YAF_neutral',
 'YAF_pleasant_surprised',
 'YAF_sad']

In [None]:
path = []
emotion = []

for i in dir_list:
    fname = os.listdir(TESS_path + "/" + i)
    for f in fname:
        if i == 'OAF_angry' or i == 'YAF_angry':
            emotion.append('female_angry')
        elif i == 'OAF_disgust' or i == 'YAF_disgust':
            emotion.append('female_disgust')
        elif i == 'OAF_Fear' or i == 'YAF_fear':
            emotion.append('female_fear')
        elif i == 'OAF_happy' or i == 'YAF_happy':
            emotion.append('female_happy')
        elif i == 'OAF_neutral' or i == 'YAF_neutral':
            emotion.append('female_neutral')                                
        elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
            emotion.append('female_surprise')               
        elif i == 'OAF_Sad' or i == 'YAF_sad':
            emotion.append('female_sad')
        else:
            emotion.append('Unknown')
        path.append(TESS_path + "/" + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['label'])
TESS_df['source'] = 'TESS'
TESS_df = pd.concat([TESS_df,pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.label.value_counts()
TESS_df.head()

In [6]:
TESS_df.to_csv('TESS_dataset.csv', index=False)

## Feature Extraction

### Extract feature set 1

In [7]:
def extract_feature_set_1(sound_filepath):
    sound = parselmouth.Sound(sound_filepath)
    df = pd.DataFrame()
    attributes = {}

    intensity_attributes = get_intensity_attributes(sound)[0]
    pitch_attributes = get_pitch_attributes(sound)[0]
    attributes.update(intensity_attributes)
    attributes.update(pitch_attributes)

    hnr_attributes = get_harmonics_to_noise_ratio_attributes(sound)[0]
    gne_attributes = get_glottal_to_noise_ratio_attributes(sound)[0]
    attributes.update(hnr_attributes)
    attributes.update(gne_attributes)

    df['local_jitter'] = None
    df['local_shimmer'] = None
    df.at[0, 'local_jitter'] = get_local_jitter(sound)
    df.at[0, 'local_shimmer'] = get_local_shimmer(sound)

    spectrum_attributes = get_spectrum_attributes(sound)[0]
    attributes.update(spectrum_attributes)

    formant_attributes = get_formant_attributes(sound)[0]
    attributes.update(formant_attributes)
    
    '''lfcc_matrix, mfcc_matrix = get_lfcc(sound), get_mfcc(sound)
    df['lfcc'] = None
    df['mfcc'] = None
    df.at[0, 'lfcc'] = lfcc_matrix
    df.at[0, 'mfcc'] = mfcc_matrix

    delta_mfcc_matrix = get_delta(mfcc_matrix)
    delta_delta_mfcc_matrix = get_delta(delta_mfcc_matrix)
    df['delta_mfcc'] = None
    df['delta_delta_mfcc'] = None
    df.at[0, 'delta_mfcc'] = delta_mfcc_matrix
    df.at[0, 'delta_delta_mfcc'] = delta_delta_mfcc_matrix'''

    for attribute in attributes:
        df.at[0, attribute] = attributes[attribute]
    
    df.at[0, 'sound_filepath'] = sound_filepath
    rearranged_columns = df.columns.tolist()[-1:] + df.columns.tolist()[:-1]
    df = df[rearranged_columns]
    return df

In [8]:
sound_filepaths = TESS_df['path']

In [None]:
all_df_list = []
source = []
label = []
for i in tqdm(range(len(sound_filepaths))):
    source.append(TESS_df['source'][i])
    label.append(TESS_df['label'][i])
    all_df_list.append(extract_feature_set_1(sound_filepaths[i]))
all_df = pd.concat(all_df_list)
all_df['source'] = source
all_df['class'] = label
all_df.rename(columns={'sound_filepath':'path'}, inplace=True)
all_df.to_csv(dataset_name+'_feature_set_1.csv', index=False)

### Extract feature set 2

In [None]:
rms = []
zcr = []
spectral_centroid = []
spectral_bandwidth = []
spectral_contrast = []
spectral_rolloff = []
ptch = []
path = []
source = []
label = []

minF0_list = []
maxF0_list = []
avgF0_list = []
min_intensity_list = []
max_intensity_list = []
avg_intensity_list = []
jitter_list = []
shimmer_list = []
hnr_list = []
import math

for i in tqdm(range(len(TESS_df))):
    y, sr = librosa.load(TESS_df['path'][i])
    S, phase = librosa.magphase(librosa.stft(y))
    rms.append(librosa.feature.rms(S=S).mean())
    zcr.append(librosa.feature.zero_crossing_rate(y).mean())
    spectral_centroid.append(librosa.feature.spectral_centroid(y, sr=sr)[0].mean())
    spectral_bandwidth.append(librosa.feature.spectral_bandwidth(y, sr=sr)[0].mean())
    spectral_contrast.append(librosa.feature.spectral_contrast(y, sr=sr)[0].mean())
    spectral_rolloff.append(librosa.feature.spectral_rolloff(y+0.01, sr=sr)[0].mean())
    path.append(TESS_df['path'][i])
    source.append(TESS_df['source'][i])
    label.append(TESS_df['label'][i])
    
    file_name = (TESS_df['path'][i]).split(".")[0]
    input_sound = parselmouth.Sound(TESS_df['path'][i])
    # extracts the duration
    duration = input_sound.get_total_duration()
    # extracts the pitch metrics
    pitch = call(input_sound, "To Pitch", 0.0, 75.0, 600.0)
    minF0 = call(pitch, "Get minimum", 0.0, duration, "Hertz", "Parabolic")
    maxF0 = call(pitch, "Get maximum", 0.0, duration, "Hertz", "Parabolic")
    avgF0 = call(pitch, "Get mean", 0.0, duration, "Hertz")
    # extracts the intensity metrics
    intensity = call(input_sound, "To Intensity", 75.0, 0.0)
    min_intensity = intensity.get_minimum()
    max_intensity = intensity.get_maximum()
    avg_intensity = intensity.get_average()
    # extracts jitter
    point_process = call(input_sound, "To PointProcess (periodic, cc)", 75.0, 600.0)
    jitter = call(point_process, "Get jitter (local)", 0.0, 0.0, 0.0001, 0.02, 1.3)
    # extracts shimmer
    shimmer = call(
            [input_sound, point_process],
            "Get shimmer (local)",
            0,
            0,
            0.0001,
            0.02,
            1.3,
            1.6,
        )
    # extracts HNR
    harmonicity = call(input_sound, "To Harmonicity (cc)", 0.01, 75.0, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    
    minF0_list.append(round(round(minF0 if not math.isnan(minF0) else 0, 3),))
    maxF0_list.append(round(round(maxF0 if not math.isnan(maxF0) else 0, 3),))
    avgF0_list.append(round(round(avgF0 if not math.isnan(avgF0) else 0, 3),))
    min_intensity_list.append(round(round(min_intensity if not math.isnan(min_intensity) else 0, 3),))
    max_intensity_list.append(round(round(max_intensity if not math.isnan(max_intensity) else 0, 3),))
    avg_intensity_list.append(round(round(avg_intensity if not math.isnan(avg_intensity) else 0, 3),))
    jitter_list.append(round(round(jitter if not math.isnan(jitter) else 0, 3),))
    shimmer_list.append(round(round(shimmer if not math.isnan(shimmer) else 0, 3),))
    hnr_list.append(round(round(hnr if not math.isnan(hnr) else 0, 3),))

In [None]:
data_f = pd.DataFrame({'rms':rms, 'zcr':zcr, 'spectral_centroid':spectral_centroid, 
                       'spectral_bandwidth':spectral_bandwidth, 'spectral_contrast':spectral_contrast,
                       'spectral_rolloff':spectral_rolloff,'minF0':minF0_list,
                        'maxF0':maxF0_list,'avgF0':avgF0_list,'min_intensity':min_intensity_list,
                        'max_intensity':max_intensity_list,'avg_intensity':avg_intensity_list,
                        'jitter':jitter_list,'shimmer':shimmer_list,'hnr':hnr_list,
                        'path':path,'source':source, 'class':label})
data_f.to_csv(dataset_name+'_feature_set_2.csv', index=False)
data_f.head()

### Extract feature set 3

In [26]:
def describe_freq(freqs, path, source, label):
    mean = np.mean(freqs)
    std = np.std(freqs) 
    maxv = np.amax(freqs) 
    minv = np.amin(freqs) 
    median = np.median(freqs)
    skew = scipy.stats.skew(freqs)
    kurt = scipy.stats.kurtosis(freqs)
    q1 = np.quantile(freqs, 0.25)
    q3 = np.quantile(freqs, 0.75)
    mode = scipy.stats.mode(freqs)[0][0]
    iqr = scipy.stats.iqr(freqs)
    return [path, mean, std, maxv, minv, median, skew, kurt, q1, q3, mode, iqr, source, label]

In [None]:
all_list_fft = []
all_list_mfcc = []
for i in tqdm(range(len(TESS_df))):
    y, sr = librosa.load(TESS_df['path'][i])
    fft = np.fft.fftfreq(y.size)
    all_list_fft.append(describe_freq(fft, TESS_df['path'][i], TESS_df['source'][i], TESS_df['label'][i]))
    
    mfcc = librosa.feature.mfcc(y)
    all_list_mfcc.append(describe_freq(mfcc, TESS_df['path'][i], TESS_df['source'][i], TESS_df['label'][i]))
    
data_features_fft = pd.DataFrame(all_list_fft, columns=['path','mean_fft', 'std_fft', 'maxv_fft', 'minv_fft', 'median_fft', 'skew_fft', 'kurt_fft', 'q1_fft', 'q3_fft', 'mode_fft', 'iqr_fft', 'source', 'class'])
data_features_mfcc = pd.DataFrame(all_list_mfcc, columns=['path','mean_mfcc', 'std_mfcc', 'maxv_mfcc', 'minv_mfcc', 'median_mfcc', 'skew_mfcc', 'kurt_mfcc', 'q1_mfcc', 'q3_mfcc', 'mode_mfcc', 'iqr_mfcc', 'source', 'class'])

data_features_fft.to_csv(dataset_name+'_feature_set_3_fft.csv', index=False)

In [None]:
data_features_mfcc

In [None]:
skew = []
kurt = []
mode = []
for i in tqdm(range(len(data_features_mfcc))):
    skew.append(data_features_mfcc['skew_mfcc'][i].mean())
    kurt.append(data_features_mfcc['kurt_mfcc'][i].mean())
    mode.append(data_features_mfcc['mode_mfcc'][i].mean())
data_features_mfcc['skew_mfcc'] = skew
data_features_mfcc['kurt_mfcc'] = kurt
data_features_mfcc['mode_mfcc'] = mode
data_features_mfcc.to_csv(dataset_name+'_feature_set_3_mfcc.csv', index=False)

# **Aggregate all extratced features into one dataframe**

In [None]:
data_exfeat_1 = pd.read_csv(dataset_name+'_feature_set_1.csv')
data_exfeat_1.drop(columns=['path','source', 'class'],inplace=True)
data_exfeat_1.columns

In [None]:
data_exfeat_2 = pd.read_csv(dataset_name+'_feature_set_2.csv')
data_exfeat_2.drop(columns=['path','source', 'class'],inplace=True)
data_exfeat_2.columns

In [None]:
data_exfeat_3_fft = pd.read_csv(dataset_name+'_feature_set_3_fft.csv')
data_exfeat_3_fft.drop(columns=['path','source', 'class'],inplace=True)
data_exfeat_3_fft.columns

In [None]:
data_exfeat_3_mfcc = pd.read_csv(dataset_name+'_feature_set_3_mfcc.csv')
#data_handcrafted_3.drop(columns=['path','source', 'class'],inplace=True)
data_exfeat_3_mfcc.columns

In [None]:
all_data_exfeat = pd.concat([data_exfeat_1, data_exfeat_2,
                                  data_exfeat_3_fft, data_exfeat_3_mfcc], axis=1)
all_data_exfeat.columns

In [None]:
all_data_exfeat.to_csv(f'all_handcrafted_data_{dataset_name}.csv', index=False)