In [None]:
# General libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Specific to this notebook
# 1.1
import os
# 1.2
import librosa as li
import wave

# 1. Preprocessing

## 1.1 Create df and export it as CSV file

In [4]:
# Créer le dataframe initial listant l'ensemble des fichiers audios de la BDD

base_dir = 'raw_data'
all_files = []

for dirpath, dirnames, files in os.walk(base_dir):
    for file in files:
        if file.endswith('.wav'):
            all_files.append(file)

df = pd.DataFrame(all_files, columns=['Filename'])
print(df.head(2))

                   Filename
0  03-01-05-02-01-01-04.wav
1  03-01-02-01-01-02-04.wav


In [5]:
# Compléter le df avec les features inscrites dans le nom des fichiers

# 1. Création d'un df_features contenant les features en colonnes
df_features = df['Filename'].str
df_features = df_features.replace('.wav', '')
df_features = df_features.str.split('-', expand=True)

# 2. Ajout des titres de colonnes
df_features.columns = list(['Modality','Vocal_channel','Emotion','Intensity','Statement','Repetition','Actor'])

In [6]:
# 3. Conversion des valeurs (str) en nb (int)
df_features = df_features[df_features.columns[1:]].astype(int)

In [7]:
# 4. Ajout d'une colonne 'Gender' (1=Woman) (+ une colonne 'Output')
df_features['Gender'] = df_features['Actor'].apply(lambda x: 1 if x%2==0 else 0)
#df_features['Output'] = df_features.apply(lambda row: [row['Gender'], row['Emotion']], axis=1)

In [8]:
# 5. Concaténation de df et df_features
df = pd.concat([df, df_features], axis=1)

In [9]:
df.head(3)

Unnamed: 0,Filename,Vocal_channel,Emotion,Intensity,Statement,Repetition,Actor,Gender
0,03-01-05-02-01-01-04.wav,1,5,2,1,1,4,1
1,03-01-02-01-01-02-04.wav,1,2,1,1,2,4,1
2,03-01-08-01-01-01-04.wav,1,8,1,1,1,4,1


In [10]:
# Export as csv
#df.to_csv('all_files.csv', index=False)

## 1.2 Convert .wav in image via spectrogram

In [None]:
# Understandig the frequency of 1 audio file (before using rs with librosa)
one_path_file = 'raw_data/Actor_01/03-01-01-01-01-01-01.wav'
with wave.open(one_path_file, 'rb') as wav_file:
    print("Sampling frequency:", wav_file.getframerate())

Sampling frequency: 48000


In [58]:
# Vizualize all directories in raw_data
print(sorted(os.listdir(base_dir))[:6])
print()

# Collect all paths of .wav files in raw_data
all_paths = []
for dirpath, dirnames, files in os.walk(base_dir):
    for file in files:
        if file.endswith('.wav'):
            all_paths.append(f"{dirpath}/{file}")
print(sorted(all_paths[:2]))

['Actor_01', 'Actor_02', 'Actor_03', 'Actor_04', 'Actor_05', 'Actor_06']

['raw_data/Actor_04/03-01-02-01-01-02-04.wav', 'raw_data/Actor_04/03-01-05-02-01-01-04.wav']


In [65]:
# Create the spectograms (without silences)

# 1. Load the audio into librosa
""" y is a NumPy array witht he waveform data
sr is the sample rate (16khz)"""
y, sr = li.load(one_path_file, sr=16000)

# 2.1 Scale the waveform so that the maximum value is 1
y = y / np.abs(y).max()

 # 2.2 Remove silence
y, _ = li.effects.trim(y)

# 3. Compute the mel spectogram
# it's a 2d array (rows = frequency ; columns = time ; values = intensity (brightness))
S = li.feature.melspectrogram(y=y, sr=sr)

# 4. Convert the spectogram into decibel scale (log scale)
S_dB = li.power_to_db(S, ref=np.max)

# 4. Create image path
image_path = one_path_file.replace('.wav', '.jpg')
image_path

'raw_data/Actor_01/03-01-01-01-01-01-01.jpg'

In [67]:
# 5. Afficher et sauvegarder l’image
plt.figure(figsize=(4, 4))
li.display.specshow(S_dB, sr=sr, cmap='magma', x_axis=None, y_axis=None)
plt.axis('off')  # Pas d'axes
plt.tight_layout(pad=0)
plt.savefig(image_path, format='jpg', bbox_inches='tight', pad_inches=0, dpi=100)
plt.close()

print(f"Image sauvegardée à : {image_path}")

Image sauvegardée à : raw_data/Actor_01/03-01-01-01-01-01-01.jpg


In [63]:
S.shape

(128, 69)

In [69]:
S[1]

array([3.5242591e-04, 8.3575887e-04, 1.4302258e-03, 5.2133640e-03,
       8.6860508e-03, 7.0904200e-03, 1.2727024e-02, 2.8718293e-02,
       4.1899458e-02, 1.9749267e-02, 2.1006376e-02, 5.6493830e-02,
       1.1896464e-01, 9.9538639e-02, 4.6788711e-02, 4.5376599e-02,
       5.6517005e-02, 1.6659324e-01, 1.3107836e-01, 2.6830114e-02,
       2.2507388e-02, 1.8080717e-02, 2.0819619e-02, 4.2292789e-02,
       4.1725218e-02, 1.2604309e-02, 3.6640116e-03, 1.5036652e-02,
       2.2750063e-02, 1.5074500e-02, 8.3217816e-03, 4.1934648e-03,
       2.3894832e-03, 1.8414096e-03, 4.4133444e-03, 1.8781128e-03,
       1.1645924e-02, 2.1854168e-02, 8.7348670e-02, 5.9406370e-02,
       5.5251839e-03, 2.8724896e-03, 6.2890588e-03, 1.2399041e-02,
       1.3086980e-02, 9.5005557e-03, 8.0325138e-03, 1.3402860e-02,
       7.2397827e-03, 1.0103931e-02, 7.1204021e-03, 6.1435755e-03,
       2.5794853e-03, 1.8183287e-03, 2.3092683e-03, 1.1226098e-03,
       6.9355592e-04, 8.3777675e-04, 9.1369922e-04, 1.6964790e