# Feature Engineering

In [1]:
# Import dependencies
import os
from pathlib import Path
import numpy as np
import pandas as pd

from pyAudioAnalysis import audioBasicIO, ShortTermFeatures

import matplotlib.pyplot as plt
import librosa
import librosa.display
from tqdm import tqdm
import tensorflow as tf
from keras.preprocessing.image import img_to_array
from PIL import Image

In [2]:
# Retrieve the audio files
audio_path = "../resources/audio_files/"
audio_files = os.listdir(audio_path)

# Ignore duplicates
duplicates = ['voice005.wav', 'voice006.wav', 'voice054.wav', 'voice055.wav']

## MFCCs

In [39]:
# Initialise a list to hold feature dictionaries
st_features_list = []

# Loop through each audio file
for file in audio_files[:3]:
    print(audio_path + file)

    # Only read .wav files
    if (file.endswith(".wav")) and (file not in duplicates):

        # Create a dictionary to hold features
        st_features = dict()

        # Get the ID
        voice_id = file.split(".")[0]

        # Load the file
        fs, signal_data = audioBasicIO.read_audio_file(audio_path + file)

        # Extract the features
        features, feature_names = ShortTermFeatures.feature_extraction(
            signal_data,
            fs,
            0.050 * fs,
            0.025 * fs
        )

        # Populate the dictionary
        st_features['id'] = voice_id
        # st_features['features'] = features
        # st_features['feature_names'] = feature_names

        for feature, name in zip(features, feature_names):
            # print(name, feature)

            # Convert spaces to underscore in name
            clean_name = name.replace(" ", "_")

            st_features[clean_name] = feature
        
        # Append the dictionary to the list
        st_features_list.append(st_features)

# Convert to a dataframe
st_features_df = pd.DataFrame(st_features_list)
st_features_df.head()

../resources/audio_files/voice094.wav
../resources/audio_files/voice080.wav
../resources/audio_files/voice057.wav


Unnamed: 0,id,zcr,energy,energy_entropy,spectral_centroid,spectral_spread,spectral_entropy,spectral_flux,spectral_rolloff,mfcc_1,...,delta_chroma_4,delta_chroma_5,delta_chroma_6,delta_chroma_7,delta_chroma_8,delta_chroma_9,delta_chroma_10,delta_chroma_11,delta_chroma_12,delta_chroma_std
0,voice094,"[0.0, 0.0, 0.0, 0.11027568922305764, 0.2932330...","[6.036956937561178e-07, 6.036956937561178e-07,...","[3.3219280948856316, 3.3219280948856316, 3.321...","[0.005000000000000018, 0.005000000000000018, 0...","[3.2557795197439587e-09, 3.2557795197439587e-0...","[1.3265869665663234e-10, 1.3265869665663234e-1...","[0.0, 0.0, 0.0, 0.9823966482484221, 0.00275064...","[0.0, 0.0, 0.0, 0.83, 0.82, 0.8, 0.8, 0.795, 0...","[-99.0018044569324, -99.0018044569324, -99.001...",...,"[0.0, 0.0, 0.0, 0.0027544917481623883, 0.00171...","[0.0, 0.0, 0.0, 0.00532080246284421, -0.002526...","[0.0, 0.0, 0.0, 0.0039407474502982836, -0.0001...","[0.0, 0.0, 0.0, 0.013393959774498507, -0.00565...","[0.0, 0.0, 0.0, 0.003022295285690943, 0.000637...","[0.0, 0.0, 0.0, 0.0029404764262557343, 0.00105...","[0.0, 0.0, 0.0, 0.012791614628449672, 0.041045...","[0.0, 0.0, 0.0, 0.014760672033384922, 0.001083...","[0.0, 0.0, 0.0, 0.005314421849018831, -0.00443...","[0.0, 0.0, 0.0, -0.04958891442584305, 0.008326..."
1,voice080,"[0.0, 0.0, 0.0, 0.06015037593984962, 0.1604010...","[6.725083325735506e-07, 6.725083325735506e-07,...","[3.321928094885808, 3.321928094885808, 3.32192...","[0.004999999999999999, 0.004999999999999999, 0...","[8.88178419700125e-19, 8.88178419700125e-19, 8...","[1.1908465736187648e-10, 1.1908465736187648e-1...","[0.0, 0.0, 0.0, 0.9994814676223645, 0.00566532...","[0.0, 0.0, 0.0, 0.355, 0.34, 0.345, 0.345, 0.3...","[-99.00180475419432, -99.00180475419432, -99.0...",...,"[0.0, 0.0, 0.0, 0.009700124142185344, -0.00230...","[0.0, 0.0, 0.0, 0.005302947095685554, -0.00335...","[0.0, 0.0, 0.0, 0.004176133080768391, -0.00149...","[0.0, 0.0, 0.0, 0.01649277734326786, -0.013773...","[0.0, 0.0, 0.0, 0.01660802640039016, -0.011421...","[0.0, 0.0, 0.0, 0.029250508744550208, 0.059908...","[0.0, 0.0, 0.0, 0.021764092159619556, -1.95623...","[0.0, 0.0, 0.0, 0.015624512862529279, 0.001762...","[0.0, 0.0, 0.0, 0.0043743300147374205, -0.0036...","[0.0, 0.0, 0.0, -0.047609369636007995, 0.01621..."
2,voice057,"[0.0, 0.0, 0.0, 0.03007518796992481, 0.1102756...","[3.999052524692604e-07, 3.999052524692604e-07,...","[3.321928094884751, 3.321928094884751, 3.32192...","[0.005000000000000011, 0.005000000000000011, 0...","[2.5518505225999377e-09, 2.5518505225999377e-0...","[2.002612468441392e-10, 2.002612468441392e-10,...","[0.0, 0.0, 0.0, 1.0034464485520185, 0.00921859...","[0.0, 0.0, 0.0, 0.19, 0.195, 0.19, 0.18, 0.18,...","[-99.00180460556335, -99.00180460556335, -99.0...",...,"[0.0, 0.0, 0.0, 0.015508486456250989, -0.01190...","[0.0, 0.0, 0.0, 0.018390024663314568, 0.028770...","[0.0, 0.0, 0.0, 0.019171803974693716, -0.00841...","[0.0, 0.0, 0.0, 0.026982139863539388, -0.00534...","[0.0, 0.0, 0.0, 0.021438500347018444, 0.009747...","[0.0, 0.0, 0.0, 0.025872793439917253, -0.02343...","[0.0, 0.0, 0.0, 0.013803645582255043, -0.01232...","[0.0, 0.0, 0.0, 0.011713748592890092, 0.005227...","[0.0, 0.0, 0.0, 0.007547145369114183, 0.015877...","[0.0, 0.0, 0.0, -0.04901123008818458, 0.007792..."


In [40]:
st_features_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 69 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   id                       3 non-null      object
 1   zcr                      3 non-null      object
 2   energy                   3 non-null      object
 3   energy_entropy           3 non-null      object
 4   spectral_centroid        3 non-null      object
 5   spectral_spread          3 non-null      object
 6   spectral_entropy         3 non-null      object
 7   spectral_flux            3 non-null      object
 8   spectral_rolloff         3 non-null      object
 9   mfcc_1                   3 non-null      object
 10  mfcc_2                   3 non-null      object
 11  mfcc_3                   3 non-null      object
 12  mfcc_4                   3 non-null      object
 13  mfcc_5                   3 non-null      object
 14  mfcc_6                   3 non-null      objec

In [42]:
len(st_features_df['delta_chroma_std'][0])

189

In [34]:
# Export to CSV
st_features_df.to_csv(
    f'../resources/clean_data/stfeatures.csv',
    encoding = 'utf8',
    index = False
)

## Spectrograms

In [3]:
# Create the spectrograms
for file in tqdm(audio_files, desc="Creating spectrograms"):
    
    # Only read .wav files
    if (file.endswith(".wav")) and (file not in duplicates):
        
        # Load the file
        y, sr = librosa.load(
            audio_path + file, # full file path
            sr = None # preserve sampling rate
        )

        # Plot the spectrogram
        D = librosa.amplitude_to_db(
            np.abs(librosa.stft(y)),
            ref = np.max
        )

        # Plot the spectrogram
        librosa.display.specshow(
            D,
            sr = sr,
            x_axis = 'time',
            y_axis = 'linear' # can also choose: linear
        )
        
        # Define the filename
        filename = file.split(".")[0]

        # Remove labels and border
        plt.tight_layout()
        plt.axis('off')
        
        # Export image
        plt.savefig(
            f'../resources/spectrograms/linear/{filename}.png',
            bbox_inches = 'tight',
            pad_inches = 0
        )
        
        # Close the figure to avoid runtime warning
        plt.close()

Creating spectrograms: 100%|██████████████████| 209/209 [00:15<00:00, 13.43it/s]


In [4]:
# Define the image path and files
image_path = "../resources/spectrograms/linear/"
image_files = os.listdir(image_path)

In [5]:
# Resize each image
new_width, new_height = 305, 225

for image_name in tqdm(image_files, desc="Resizing spectrograms"):
    
    # Only read .png files
    if image_name.endswith(".png"):
        
        # Open the image file
        img = Image.open(image_path + image_name)
        
        # Resize
        resized = img.resize((new_width, new_height))
        
        # Create a new figure
        plt.figure(figsize=(new_width / 100, new_height / 100))
        
        # Plot the resized image
        plt.imshow(resized)

        # Define the filename
        filename = image_name.split(".")[0]
        
        # Remove labels and border
        plt.tight_layout()
        plt.axis('off')
        
        # Export image
        plt.savefig(
            f'../resources/spectrograms/resized/{filename}.png',
            bbox_inches = 'tight',
            pad_inches = 0
        )
        
        # Close the figure to avoid runtime warning
        plt.close()

Resizing spectrograms: 100%|██████████████████| 205/205 [00:09<00:00, 20.98it/s]


In [6]:
# Define the resized image path and files
resized_path = "../resources/spectrograms/resized/"
resized_files = os.listdir(resized_path)

In [7]:
# Initialise a list to hold the dictionaries
spectro_list = []
id_list = []
r_list = []
g_list = []
b_list = []
a_list = []

# Loop through each image
for resized_image in resized_files:
    
    # Only read .png files
    if resized_image.endswith(".png"):
        
        # Initialise a dictionary to hold the pixels
        spectro_dict = dict()

        # Open the image file
        img = Image.open(resized_path + resized_image)

        # Convert image to array format
        img_array = img_to_array(img)
        
        # Add image attributes and array to dictionary
        spectro_dict['id'] = resized_image.split(".")[0]
        spectro_dict['format'] = img.format
        spectro_dict['mode'] = img.mode
        spectro_dict['width_px'] = img.width
        spectro_dict['height_px'] = img.height
        
        id_list.append(resized_image.split(".")[0])
        r_list.append(img_array[:, :, 0].flatten().astype(int))
        g_list.append(img_array[:, :, 1].flatten().astype(int))
        b_list.append(img_array[:, :, 2].flatten().astype(int))
        a_list.append(img_array[:, :, 3].flatten().astype(int))
        
        spectro_list.append(spectro_dict)

# Create a list of RGBA lists
rgba_list = [r_list, g_list, b_list, a_list]

# Convert the list to a DataFrame
spectro_df = pd.DataFrame(spectro_list)
spectro_df.head()

Unnamed: 0,id,format,mode,width_px,height_px
0,voice156,PNG,RGBA,225,166
1,voice142,PNG,RGBA,225,166
2,voice195,PNG,RGBA,225,166
3,voice181,PNG,RGBA,225,166
4,voice022,PNG,RGBA,225,166


In [8]:
# Colour reference list
colours = ['r', 'g', 'b', 'a']

# Loop through each file
for idx, colour_list in tqdm(enumerate(rgba_list), desc="Exporting as CSV"):
    
    # Create a dataframe of each colour
    df = pd.DataFrame(colour_list)
    
    # Use id as the index to the dataframe
    df.index = id_list
    
    # Export to CSV
    df.transpose().to_csv(
        f'../resources/clean_data/{colours[idx]}val.csv',
        encoding = 'utf8',
        index = False
    )

Exporting as CSV: 4it [00:36,  9.15s/it]


In [9]:
# Prevent truncation
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [10]:
# Confirm the first 3 arrays
for resized_image in resized_files[:3]:
    
    # Only read .png files
    if resized_image.endswith(".png"):
        
        # Initialise a dictionary to hold the pixels
        spectro_dict = dict()

        # Open the image file
        img = Image.open(resized_path + resized_image)

        # Convert image to array format
        img_array = img_to_array(img)
        print(img_array)

[[[ 47.  17.  99. 255.]
  [ 47.  17.  99. 255.]
  [ 43.  16.  93. 255.]
  ...
  [ 35.  11.  70. 255.]
  [ 47.  16.  90. 255.]
  [ 49.  17.  93. 255.]]

 [[ 45.  17.  97. 255.]
  [ 45.  17.  98. 255.]
  [ 41.  16.  90. 255.]
  ...
  [ 49.  15.  93. 255.]
  [ 59.  16. 108. 255.]
  [ 60.  15. 111. 255.]]

 [[ 46.  17.  98. 255.]
  [ 46.  17.  99. 255.]
  [ 43.  16.  92. 255.]
  ...
  [ 29.  13.  68. 255.]
  [ 51.  16.  99. 255.]
  [ 54.  16. 104. 255.]]

 ...

 [[215.  69. 107. 255.]
  [215.  69. 107. 255.]
  [211.  67. 109. 255.]
  ...
  [ 76.  18. 120. 255.]
  [184.  56. 115. 255.]
  [201.  62. 114. 255.]]

 [[235.  90.  96. 255.]
  [235.  90.  96. 255.]
  [231.  87.  98. 255.]
  ...
  [ 91.  20. 125. 255.]
  [206.  70. 105. 255.]
  [225.  77. 101. 255.]]

 [[250. 158. 116. 255.]
  [250. 158. 116. 255.]
  [249. 157. 116. 255.]
  ...
  [162. 103. 138. 255.]
  [236. 150. 123. 255.]
  [248. 157. 120. 255.]]]
[[[ 47.  17.  99. 255.]
  [ 47.  17.  99. 255.]
  [ 43.  16.  93. 255.]
  ...
  [ 