In [1]:
import numpy as np
import scipy
from scipy.io.wavfile import read as read_wav
from matplotlib import pyplot as plt
import tqdm.notebook as tqdm
import os

import audb
import audiofile
from scipy import signal
from scipy.fft import fftshift
import PIL
from PIL import Image



In [2]:
def get_max_shape(path, list_with_files):
    all_shapes =[]
    for f in list_with_files:
        file= path + f
        sampling_rate, data = read_wav(file)
        all_shapes.append(data.shape)
    max_value= max(matrix for matrix in all_shapes)
    return max_value

def get_spectrogram(name_of_file, max_value):
    path= "full_data/audios/" #modify path as needed
    file= path + name_of_file
    sampling_rate, data = read_wav(file) #gets sampling rate and dimensions of data
    
    #Zero padding 
    pad_width = ((0, max_value[0] - data.shape[0]), (0, 0))
    padded_matrix = np.pad(data, pad_width, mode='constant', constant_values=0)

    # Gets spectrogram
    f, t, Spec_1 = signal.spectrogram(padded_matrix[:, 0], fs=sampling_rate, window='boxcar', nperseg=2**12, mode='psd') #channel 1
    Spec_dB_1 = 10*np.log10(Spec_1+1e-7)
    f, t, Spec_2 = signal.spectrogram(padded_matrix[:, 1], fs=sampling_rate, window='boxcar', nperseg=2**12, mode='psd') #channel 2 
    Spec_dB_2 = 10*np.log10(Spec_2+1e-7)
    z = np.zeros_like(Spec_dB_2) #third dimension in B
    final = np.stack([Spec_dB_1, Spec_dB_2, z], axis=2) #3d array with spectrogram

    return final

def spectrogram_to_image(name_of_file, spectrogram_data):
    path= "data/spectrograms/" #modify path as needed 
    file=  os.path.splitext(name_of_file)[0]
    spec_data= spectrogram_data

    # Normalize the array values to be within the range [0, 255]. 
    normalized_data = ((spectrogram_data - spectrogram_data.min()) / (spectrogram_data.max() - spectrogram_data.min()) * 255).astype(np.uint8)
    # Convert the normalized array to an image
    image = Image.fromarray(normalized_data)
    # Save the image
    image_f_name= path + file + '.png'
    image.save(image_f_name)


In [3]:
#Paths of directiories 
path_dir= 'full_data/audios/'
files_dir= os.listdir(path_dir)

In [4]:
#Getting max_shape we will need for zero padding audios 

max_shape= get_max_shape(path_dir, files_dir)


In [5]:
#Getting spectrograms and images

for name in files_dir:
    spec= get_spectrogram(name, max_shape)
    spectrogram_to_image(name, spec)

