### **Intro**
We're going to start off by doing some data preprocessing for an emotion classifier model that takes a piece of audio data and labels it.

In [None]:
# Libraries to install
%pip install librosa
%pip install matplotlib==3.3.4
%pip install datasets[audio]

In [14]:
# Import statements
import os
import librosa
import matplotlib.pyplot as plt
import librosa.display as display
import pandas as pd

import Lib.labels as labels
import numpy as np
import soundfile as sf

In [15]:
# Let's analyze some of our audio files
audio_folder_name = "Data\SAVEE"
audio_files = os.listdir(audio_folder_name)

for file_name in audio_files[:5]:
    file_path = os.path.join(audio_folder_name, file_name)
    y, sr = librosa.load(file_path, sr=None)

    # Info about sampling rate
    print('Length of the CREMA audio files: {:.2f} seconds'.format(len(y) / sr))
    print('Number of samples: {}'.format(len(y)))
    print('Sampling rate: {} Hz'.format(sr))
    print('Checking SR again: {}'.format(librosa.get_samplerate(file_path)))

    # Extracting features
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    rms_energy = librosa.feature.rms(y=y)

    # Displaying some of the extracted features
    print("MFCCs:", mfccs)
    print("Spectral Centroid:", spectral_centroid)
    print("Chroma:", chroma)
    print("Spectral Contrast:", spectral_contrast)
    print("Spectral Rolloff:", spectral_rolloff)
    print("Zero-Crossing Rate:", zero_crossing_rate)
    print("Tempo:", tempo)
    print("RMS Energy:", rms_energy)
    print()

    # About mfccs
    # mfccs = librosa.feature.mfcc(y=array, sr=sampling_rate)
    # print(mfccs)
    # df = pd.DataFrame(mfccs)
    # print(df.head())

Length of the CREMA audio files: 3.65 seconds
Number of samples: 160868
Sampling rate: 44100 Hz
Checking SR again: 44100
MFCCs: [[-3.4192041e+02 -3.9435593e+02 -4.7589597e+02 ... -4.7595450e+02
  -4.2141251e+02 -3.5766345e+02]
 [ 1.2249877e+02  1.0463106e+02  3.2213448e+01 ...  2.8828905e+01
   8.3709961e+01  1.1964099e+02]
 [ 2.0645851e+01  3.5489052e+01  2.9343201e+01 ...  2.2902699e+01
   3.8500298e+01  2.2846634e+01]
 ...
 [ 3.8999951e+00  4.3970547e+00  5.6224837e+00 ...  3.7707844e+00
   6.2196670e+00  3.4479222e+00]
 [ 3.6335943e+00  4.6504693e+00  4.2221808e+00 ...  1.3876611e+00
   5.2211118e+00  4.6832504e+00]
 [ 3.4357426e+00  4.5474901e+00  2.9926744e+00 ... -4.2038485e-01
   2.0277987e+00  2.8178248e+00]]
Spectral Centroid: [[2552.52558988 1549.01141388  115.28987875  117.71039893  118.50355452
   116.62128293  119.5608443   120.71075055  115.55386326  117.63583719
   117.09692269  120.62337287  122.96089072  122.7982933   120.26669244
   121.29184284  115.98495713  117.70

### About Datasets
**TESS**: Sampling rate of 24 414 Hz, audio files are between 1-2 seconds long

**SAVEE**: Sampling rate of 48 000 Hz, audio files are between 3-4 seconds long

**RAVDESS**: Sampling rate of 48 000 Hz, audio files are also between 3-4 seconds long

**CREMA**: Sampling rate of 16 000 Hz, audio files 1-2 secs long

#### Testing the new labeling functions

In [16]:
dictionary = labels.SAVEE()

df = pd.DataFrame(dictionary)
print(df)

                 audio path dataset label  label
0     Data\SAVEE\DC_a01.wav             a      5
1     Data\SAVEE\DC_a02.wav             a      5
2     Data\SAVEE\DC_a03.wav             a      5
3     Data\SAVEE\DC_a04.wav             a      5
4     Data\SAVEE\DC_a05.wav             a      5
..                      ...           ...    ...
475  Data\SAVEE\KL_su11.wav            su      8
476  Data\SAVEE\KL_su12.wav            su      8
477  Data\SAVEE\KL_su13.wav            su      8
478  Data\SAVEE\KL_su14.wav            su      8
479  Data\SAVEE\KL_su15.wav            su      8

[480 rows x 3 columns]


#### Resampling data

In [27]:
def resample_data(file_path, target_sr):
    audio, sr = librosa.load(file_path)
    audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
    return audio_resampled

In [28]:
CREMA_dict = labels.CREMA()
RAVDESS_dict = labels.RAVDESS()
SAVEE_dict = labels.SAVEE()
TESS_dict = labels.TESS()

# Looking at one example dict
print(CREMA_dict)
df = pd.DataFrame(RAVDESS_dict)
print(df)

{'audio path': ['Data\\CREMA\\1001_DFA_ANG_XX.wav', 'Data\\CREMA\\1001_DFA_DIS_XX.wav', 'Data\\CREMA\\1001_DFA_FEA_XX.wav', 'Data\\CREMA\\1001_DFA_HAP_XX.wav', 'Data\\CREMA\\1001_DFA_NEU_XX.wav', 'Data\\CREMA\\1001_DFA_SAD_XX.wav', 'Data\\CREMA\\1001_IEO_ANG_HI.wav', 'Data\\CREMA\\1001_IEO_ANG_LO.wav', 'Data\\CREMA\\1001_IEO_ANG_MD.wav', 'Data\\CREMA\\1001_IEO_DIS_HI.wav', 'Data\\CREMA\\1001_IEO_DIS_LO.wav', 'Data\\CREMA\\1001_IEO_DIS_MD.wav', 'Data\\CREMA\\1001_IEO_FEA_HI.wav', 'Data\\CREMA\\1001_IEO_FEA_LO.wav', 'Data\\CREMA\\1001_IEO_FEA_MD.wav', 'Data\\CREMA\\1001_IEO_HAP_HI.wav', 'Data\\CREMA\\1001_IEO_HAP_LO.wav', 'Data\\CREMA\\1001_IEO_HAP_MD.wav', 'Data\\CREMA\\1001_IEO_NEU_XX.wav', 'Data\\CREMA\\1001_IEO_SAD_HI.wav', 'Data\\CREMA\\1001_IEO_SAD_LO.wav', 'Data\\CREMA\\1001_IEO_SAD_MD.wav', 'Data\\CREMA\\1001_IOM_ANG_XX.wav', 'Data\\CREMA\\1001_IOM_DIS_XX.wav', 'Data\\CREMA\\1001_IOM_FEA_XX.wav', 'Data\\CREMA\\1001_IOM_HAP_XX.wav', 'Data\\CREMA\\1001_IOM_NEU_XX.wav', 'Data\\CREMA

In [21]:
# Resample datasets
target_sampling_rate = 24000

dataset_names = ['CREMA', 'RAVDESS', 'TESS', 'SAVEE']
datasets = {'CREMA': CREMA_dict, 'RAVDESS': RAVDESS_dict, 'TESS': TESS_dict, 'SAVEE': SAVEE_dict}

for name in dataset_names:
    for index, audio_path in enumerate(datasets[name]['audio path']):
        audio_resampled = resample_data(audio_path, target_sampling_rate)

        # Making directory to store audio files
        dir_name = "Data/resampled"
        os.makedirs(f"{dir_name}/{name}", exist_ok=True)
        path_name = f"{dir_name}/{name}/{name}_resampled_{index}_emotion_{CREMA_dict['label'][index]}.wav"

        # Save audio output as wav file
        sf.write(path_name, audio_resampled, target_sampling_rate)
        librosa.get_samplerate(path_name)
        datasets[name]['audio resampled path'] = path_name

        print(datasets[name])


{'audio path': ['Data\\CREMA\\1001_DFA_ANG_XX.wav', 'Data\\CREMA\\1001_DFA_DIS_XX.wav', 'Data\\CREMA\\1001_DFA_FEA_XX.wav', 'Data\\CREMA\\1001_DFA_HAP_XX.wav', 'Data\\CREMA\\1001_DFA_NEU_XX.wav', 'Data\\CREMA\\1001_DFA_SAD_XX.wav', 'Data\\CREMA\\1001_IEO_ANG_HI.wav', 'Data\\CREMA\\1001_IEO_ANG_LO.wav', 'Data\\CREMA\\1001_IEO_ANG_MD.wav', 'Data\\CREMA\\1001_IEO_DIS_HI.wav', 'Data\\CREMA\\1001_IEO_DIS_LO.wav', 'Data\\CREMA\\1001_IEO_DIS_MD.wav', 'Data\\CREMA\\1001_IEO_FEA_HI.wav', 'Data\\CREMA\\1001_IEO_FEA_LO.wav', 'Data\\CREMA\\1001_IEO_FEA_MD.wav', 'Data\\CREMA\\1001_IEO_HAP_HI.wav', 'Data\\CREMA\\1001_IEO_HAP_LO.wav', 'Data\\CREMA\\1001_IEO_HAP_MD.wav', 'Data\\CREMA\\1001_IEO_NEU_XX.wav', 'Data\\CREMA\\1001_IEO_SAD_HI.wav', 'Data\\CREMA\\1001_IEO_SAD_LO.wav', 'Data\\CREMA\\1001_IEO_SAD_MD.wav', 'Data\\CREMA\\1001_IOM_ANG_XX.wav', 'Data\\CREMA\\1001_IOM_DIS_XX.wav', 'Data\\CREMA\\1001_IOM_FEA_XX.wav', 'Data\\CREMA\\1001_IOM_HAP_XX.wav', 'Data\\CREMA\\1001_IOM_NEU_XX.wav', 'Data\\CREMA