Perform the SA with the image of the audio file

Workflow to covert spectogram to images:
1. Load an audio file.

2. Compute its mel-spectrogram.

3. Convert the spectrogram to decibel scale for better visualization.

4. Normalize and scale it into an image-like format.

5. Save the spectrogram as a .jpg file.

In [None]:
import numpy as np
import os
from skimage.io import imread
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
# import cv2

In [2]:
def mono_to_color(X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6):
    """
    Convert a 1-channel image to a 3-channel image
    Arguments:
        X: the input image as a numpy array (matrix) -> spectogram
        mean: (optional) the mean value of the input image. If None, calculate from X
        std: (optional) the standard deviation of the input image. If None, calculate from X
        norm_max: (optional) the maximum value of the output image. If None, calculate from X
        norm_min: (optional) the minimum value of the output image. If None, calculate from X
        eps: a small number to prevent dividing by zero
    Returns:
        A 3-channel image as a numpy array
    """
    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    # Min-Max normalization
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    # Normalize to [0, 255]
    if (_max - _min) > eps:
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(Xstd, dtype=np.uint8)
    # uint8 matrix scaled to the range [0, 255]., as an 8-bit grayscale image.
    return V 

In [26]:
audio_path = "../Dataset/all/1_4666.wav"
y, sr = librosa.load(audio_path, sr=None)
import IPython.display as ipd
ipd.Audio(y, rate=sr)

In [18]:
# train_img_dir_blu = r'C:\Users\alice\Desktop\UNIBO\2_Anno\2_semestre\PW-Deep\SoundSentimentClassification\src\train_images\blu'

# y, sr = librosa.load(audio_path)
# # Convert the waveform to a mel-spectogram (mel scale: mel scale approximates human auditory perception)
# M = librosa.feature.melspectrogram(y=y, sr=sr)
# # Converts the mel-spectrogram's power values to db scale, making it more visually interpretable.
# M = librosa.power_to_db(M)
# # Transform the mel-spectogram into an image
# M = mono_to_color(M)
# # Save the image
# # os.mkdir(train_img_dir_blu)
# cv2.imwrite(f"{train_img_dir_blu}\\{audio_path}.jpg", M, [int(cv2.IMWRITE_JPEG_QUALITY), 85]) # Sets the JPEG quality to 85 for compression while retaining good quality
# # Display the image
# plt.imshow(M)

In [None]:
def saveFeatureToImage(path, saveDir, avgFeat=0):
    files = sorted(os.listdir(path))
    print("Scanning", path)

    for i, fp in enumerate(files):
        print(i, fp)
        X, sr = librosa.load(os.path.join(path, fp))
        f = librosa.feature.melspectrogram(y=X)
        f = librosa.amplitude_to_db(f, ref=np.max)
        img = np.zeros((f.shape[0], avgFeat))
        xWidth = min(f.shape[1],avgFeat)
        img[:, :xWidth] = f[:,:xWidth]
        fname = os.path.join(saveDir, fp.split('.')[0] + '.png')
        print(fname)
        saveImg(img, fname)

In [None]:
# f_dim = 128
# audio_file_path = "../Dataset/all/val"
# images_path = '../Dataset/images/val'

# saveFeatureToImage(audio_file_path, images_path, f_dim)

Scanning ../Dataset/all/val
0 1_0.wav
../Dataset/images/val\1_0.png
1 1_1.wav
../Dataset/images/val\1_1.png
2 1_2.wav
../Dataset/images/val\1_2.png
3 1_3.wav
../Dataset/images/val\1_3.png


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [36]:
def saveImg(f, fp):
    f = np.flip(f, axis=0)
    plt.figure()
    plt.axis('off')
    plt.imsave(fp, f, format='png')
    plt.clf()

def showImg(f):
    f = np.flip(f, axis=0)
    plt.figure()
    plt.axis('off')
    plt.clf()
    plt.imshow(f)

In [37]:
def from_audio_to_image(audio_path, save_dir):
    file_name = audio_path.split('/')[3].split('.')[0] + '.png'
    fname = os.path.join(save_dir, file_name)
    print(fname)

    # Check if the image already exists
    if os.path.exists(fname):
        print(f"File {fname} already exists. \tSkipping...")
        return

    f_dim = 128
    y, sr = librosa.load(audio_path, sr=None)
    M = librosa.feature.melspectrogram(y=y, sr=sr)
    f = librosa.amplitude_to_db(M, ref=np.max)
    img = np.zeros((f.shape[0], f_dim))
    xWidth = min(f.shape[1], f_dim)
    img[:, :xWidth] = f[:,:xWidth]
    print("saving image:\t", fname, "\n")
    saveImg(img, fname)

In [38]:
def from_audio_to_image_df(df, save_dir):
    # Convert the wav audio file into the mel spectrogram image
    for audio_path in df['audio_path']:
        from_audio_to_image(audio_path, save_dir)

In [39]:
# def from_audio_to_image_df(df, save_dir, f_dim):
#     # Convert the wav audio file into the mel spectrogram image
#     for audio_path in df['audio_path']:
#         file_name = audio_path.split('/')[3].split('.')[0] + '.png'
#         fname = os.path.join(save_dir, file_name)

#         # Check if the image already exists
#         if os.path.exists(fname):
#             print(f"File {fname} already exists. \tSkipping...")
#             continue

#         y, sr = librosa.load(audio_path, sr=None)
#         M = librosa.feature.melspectrogram(y=y, sr=sr)
#         f = librosa.amplitude_to_db(M, ref=np.max)
#         img = np.zeros((f.shape[0], f_dim))
#         xWidth = min(f.shape[1], f_dim)
#         img[:, :xWidth] = f[:,:xWidth]
#         print("saving image:\t", fname, "\n")
#         saveImg(img, fname)

In [None]:
val_dataset = pd.read_excel('../Dataset/val.xlsx')
save_dir = "../Dataset/images/val"

print("VALIDATION")
from_audio_to_image_df(val_dataset, save_dir)
# Amount of images in the save_dir folder
print("\n-->DONE: ", len(os.listdir(save_dir)), " images")

VALIDATION
../Dataset/images/val\1_1856.png
File ../Dataset/images/val\1_1856.png already exists. 	Skipping...
../Dataset/images/val\1_1926.png
File ../Dataset/images/val\1_1926.png already exists. 	Skipping...
../Dataset/images/val\1_1997.png
File ../Dataset/images/val\1_1997.png already exists. 	Skipping...
../Dataset/images/val\1_14480.png
File ../Dataset/images/val\1_14480.png already exists. 	Skipping...
../Dataset/images/val\1_4414.png
File ../Dataset/images/val\1_4414.png already exists. 	Skipping...
../Dataset/images/val\2_879.png
File ../Dataset/images/val\2_879.png already exists. 	Skipping...
../Dataset/images/val\1_10942.png
File ../Dataset/images/val\1_10942.png already exists. 	Skipping...
../Dataset/images/val\1_449.png
File ../Dataset/images/val\1_449.png already exists. 	Skipping...
../Dataset/images/val\1_9399.png
File ../Dataset/images/val\1_9399.png already exists. 	Skipping...
../Dataset/images/val\1_11298.png
File ../Dataset/images/val\1_11298.png already exists. 

In [46]:
test_dataset = pd.read_excel('../Dataset/test.xlsx')
save_dir = "../Dataset/images/test"

print("TEST")
from_audio_to_image_df(test_dataset, save_dir)
# Amount of images in the save_dir folder
print("\n-->DONE: ", len(os.listdir(save_dir)), " images")

TEST
../Dataset/images/test\1_9764.png
File ../Dataset/images/test\1_9764.png already exists. 	Skipping...
../Dataset/images/test\1_13881.png
File ../Dataset/images/test\1_13881.png already exists. 	Skipping...
../Dataset/images/test\1_11118.png
File ../Dataset/images/test\1_11118.png already exists. 	Skipping...
../Dataset/images/test\1_6890.png
File ../Dataset/images/test\1_6890.png already exists. 	Skipping...
../Dataset/images/test\1_798.png
File ../Dataset/images/test\1_798.png already exists. 	Skipping...
../Dataset/images/test\1_4362.png
File ../Dataset/images/test\1_4362.png already exists. 	Skipping...
../Dataset/images/test\1_13420.png
File ../Dataset/images/test\1_13420.png already exists. 	Skipping...
../Dataset/images/test\1_5680.png
File ../Dataset/images/test\1_5680.png already exists. 	Skipping...
../Dataset/images/test\1_1370.png
File ../Dataset/images/test\1_1370.png already exists. 	Skipping...
../Dataset/images/test\1_14352.png
File ../Dataset/images/test\1_14352.pn

In [53]:
train_dataset = pd.read_excel('../Dataset/train_balanced.xlsx')
save_dir = "../Dataset/images/train"

print(len(train_dataset['audio_path'].unique()), " unique audio files in the dataset\n")


print("TRAIN (balanced)")
from_audio_to_image_df(train_dataset, save_dir)
# Amount of images in the save_dir folder
print("\n-->DONE: ", len(os.listdir(save_dir)), " images")

4908  unique audio files in the dataset

TRAIN (balanced)
../Dataset/images/train\1_3956.png
File ../Dataset/images/train\1_3956.png already exists. 	Skipping...
../Dataset/images/train\1_4127.png
File ../Dataset/images/train\1_4127.png already exists. 	Skipping...
../Dataset/images/train\1_1880.png
File ../Dataset/images/train\1_1880.png already exists. 	Skipping...
../Dataset/images/train\2_550.png
File ../Dataset/images/train\2_550.png already exists. 	Skipping...
../Dataset/images/train\2_1700.png
File ../Dataset/images/train\2_1700.png already exists. 	Skipping...
../Dataset/images/train\1_6609.png
File ../Dataset/images/train\1_6609.png already exists. 	Skipping...
../Dataset/images/train\1_223.png
File ../Dataset/images/train\1_223.png already exists. 	Skipping...
../Dataset/images/train\2_1696.png
File ../Dataset/images/train\2_1696.png already exists. 	Skipping...
../Dataset/images/train\1_13511.png
File ../Dataset/images/train\1_13511.png already exists. 	Skipping...
../Datas