# Extract Audio Feautures


In [1]:
import sys
import pickle
import pathlib
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

import audiofile
import opensmile

from util import *


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
SoX could not be found!

    If you do not have SoX, proceed here:
     - - - http://sox.sourceforge.net/ - - -

    If you do (or think that you should) have SoX, double-check your
    path variables.
    


In [2]:
print("Version: ", tf.__version__)
# tf.compat.v1.enable_eager_execution() # Enable eager execution
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.config.experimental.list_physical_devices('GPU') else "NOT AVAILABLE")
# print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")

Version:  1.14.0
Eager mode:  False
GPU is NOT AVAILABLE


## File paths

In [3]:
HOME_DIR = pathlib.Path.cwd()

# feature_path = HOME_DIR / 'data' / 'processed'/ 'IEMOCAP' / 'extracted_feature.pk'
feature_path = 'D:/extracted_features.pk'

label_path = HOME_DIR / 'data' / 'processed' / 'IEMOCAP' / 'FC_label.txt'
processed_id_path = HOME_DIR / 'data' / 'processed' / 'IEMOCAP' / 'processed_ids.txt'
dataset_path = HOME_DIR / 'data' / 'raw'


In [4]:
def configure_openSmile(**kwargs):
    '''
    Configure OpenSmile.

        Parameters:
            folder_list (list): A list of folder paths to extract features

        Returns:
            smile (obj): Returns configured smile object
    '''
    
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.ComParE_2016,
        feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
        loglevel=4,
        logfile='smile.log',
        verbose=True,

    )
    return smile


In [5]:
def extract_feautre(folder_list):
    '''
    Extracts audio feature for each file in file path.

        Parameters:
            folder_list (list): A list of filepaths to extract audio features

        Returns:
            features (Dataframe): Extracted features from USC-Iemocap database
    '''
    
    smile = configure_openSmile()
    try:
        features = smile.process_files(folder_list)
        return features

    except FileNotFoundError as e:
        print(f'Error --> {e}')


In [6]:
def z_score(dataframe, inplace=False):
    '''
    Apply z-score (standardization) to the columns of the database.

        Parameters:
            data: Data to apply z-score
            inplace (bool): Modify the dataframe inplace. Default (False)

        Returns:
            dataframe: Z-scores applied on each columns of dataframe
    '''

    if not inplace:
        dataframe = dataframe.copy()

    # apply the z-score method
    for column in dataframe.columns:
        dataframe[column] = (dataframe[column] - dataframe[column].mean()
                             ) / dataframe[column].std()

    print('Finished standardizing (z-score) data')
    return dataframe


In [7]:
def pad_signal_data(data, full_path, maxlen=3409):
    '''
    Pad each signal with zeros for eqaul data dimenstions.

        Parameters:
            data (Pandas DataFrame): Signal list to be padded
            maxlen = Maximum length of sequence after pappding

        Returns:
            padded_dataframe (DataFrame): Padded dataframe
    '''

    padded_dataframe, frames = pd.DataFrame(columns=data.columns), []

    for file in full_path:
        x_ = data.loc[file]
        x_.reset_index(inplace=True, drop=True)  # Remove multi-index
        rows, cols = x_.shape

        if rows < maxlen:
            # rows = maxlen - rows

            zeros = np.zeros((maxlen, cols), dtype=float)
            zeros_df = pd.DataFrame(data=zeros, columns=data.columns)
            x_ = x_.combine_first(zeros_df)

        frames.append(x_)

    padded_dataframe = pd.concat(frames, keys=range(len(full_path)))

    print('Finished padding data')
    return padded_dataframe


In [8]:
def shuffle_dataset(features, labels):
    # random.seed(24)

    shuffled_data = list(zip(features, labels))
    random.shuffle(shuffled_data)
    shuf_features, shuf_label = zip(*shuffled_data)
    return shuf_features, shuf_label

## Extract and save features

In [8]:
list_files = []

for x in range(1, 6):
    sess_title = 'Session' + str(x)

    path = f'./data/raw/IEMOCAP_full_release/{sess_title}/sentences/wav'
#     path = dataset_path / 'IEMOCAP_full_release' / f'{sess_title}' / 'sentences' / 'wav'

    file_search(str(path), list_files)
    list_files = sorted(list_files)

    print(f"{sess_title}, # Num of files: {len(list_files)}")


Session1, # Num of files: 1819
Session2, # Num of files: 3630
Session3, # Num of files: 5766
Session4, # Num of files: 7869
Session5, # Num of files: 10039


In [None]:
# Extract audio features
features = extract_feautre(list_files)

# Save features
with open('D:/extracted_features.pk', 'wb') as feat:
    pickle.dump(features, feat)


## Load and prepare features

In [10]:
label = []

with open(str(feature_path), 'rb') as pickle_file:
    with open(str(label_path)) as f2:
        data = pickle.load(pickle_file)
        category = f2.readlines()

label = [y.strip() for y in category]

## Dataset info

In [11]:
full_ids = []
with open(str(processed_id_path)) as f:
    full_ids = f.readlines()
full_ids = [x.strip() for x in full_ids]

## Get list of filenames in category, (Angry, Happy, Sad, Neutral)

In [12]:
full_path = []

for file_ in full_ids:
    for file2 in list_files:
        if file_+'.wav' == file2.split('\\')[-1]:
            full_path.append(file2)
            break

In [13]:
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F0final_sma,voicingFinalUnclipped_sma,jitterLocal_sma,jitterDDP_sma,shimmerLocal_sma,logHNR_sma,audspec_lengthL1norm_sma,audspecRasta_lengthL1norm_sma,pcm_RMSenergy_sma,pcm_zcr_sma,...,mfcc_sma[5],mfcc_sma[6],mfcc_sma[7],mfcc_sma[8],mfcc_sma[9],mfcc_sma[10],mfcc_sma[11],mfcc_sma[12],mfcc_sma[13],mfcc_sma[14]
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
./data/raw/IEMOCAP_full_release/Session1/sentences/wav\Ses01F_impro01\Ses01F_impro01_F000.wav,00:00:00,00:00:00.060000,0.0,0.617895,0.0,0.0,0.0,-100.0,0.170156,0.988341,0.003741,0.084722,...,11.23224,-12.341435,10.037708,-20.395533,-1.811347,4.475477,18.6854,3.107538,20.465704,7.123697
./data/raw/IEMOCAP_full_release/Session1/sentences/wav\Ses01F_impro01\Ses01F_impro01_F000.wav,00:00:00.010000,00:00:00.070000,0.0,0.621465,0.0,0.0,0.0,-100.0,0.170515,0.988341,0.003776,0.08125,...,9.70452,-15.659091,8.531051,-18.295626,1.736485,4.171374,14.857246,-4.711551,15.682807,6.299347
./data/raw/IEMOCAP_full_release/Session1/sentences/wav\Ses01F_impro01\Ses01F_impro01_F000.wav,00:00:00.020000,00:00:00.080000,0.0,0.625829,0.0,0.0,0.0,-100.0,0.160326,0.988341,0.003653,0.077778,...,7.616863,-18.509539,11.879658,-9.947198,8.727133,3.999138,10.783359,-9.611413,10.285623,4.734458
./data/raw/IEMOCAP_full_release/Session1/sentences/wav\Ses01F_impro01\Ses01F_impro01_F000.wav,00:00:00.030000,00:00:00.090000,0.0,0.625295,0.0,0.0,0.0,-100.0,0.149779,0.988341,0.003427,0.079514,...,10.105798,-14.981258,14.579773,-7.128545,9.769948,-2.341131,5.902691,-12.417287,3.800602,1.058139
./data/raw/IEMOCAP_full_release/Session1/sentences/wav\Ses01F_impro01\Ses01F_impro01_F000.wav,00:00:00.040000,00:00:00.100000,0.0,0.603681,0.0,0.0,0.0,-100.0,0.136115,0.964558,0.003376,0.08125,...,11.671115,-9.982911,20.184065,-9.344695,6.967035,-7.63469,4.144184,-6.996481,3.401203,0.124158


# Pre-process extracted signal sequence

In [14]:
# Pad data
padded_data = pad_signal_data(data, full_path)


Finished padding data


In [15]:
# Z-norm data
data_norm = z_score(padded_data, inplace=True)

Finished standardizing (z-score) data


In [20]:
# Load label data
label = []

with open(str(label_path)) as f:
    category = f.readlines()

label = [y.strip() for y in category]


In [27]:
# Create label Dataframe
data_dict = {'label': label}
label_dataset = pd.DataFrame.from_dict(data_dict)

# Save label Dataframe
label_dataset.to_pickle('D:/label_dataset.pkl')


In [17]:
arr = data_norm.values.reshape(5531, 3409, 65)


In [19]:
# Save pre-processed data
np.savez_compressed('D:/processed_features.npz', arr)
