In [1]:
from tensorflow.python.keras.layers.core import Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import layers, losses
from sklearn.metrics import accuracy_score, precision_score, recall_score
import tensorflow.keras as keras
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import pathlib
import librosa
import librosa.display
import concurrent.futures

In [None]:
'''
1. Extract spectrograms from wav files
'''
SOURCE = "C:/workspace/autoencoder/autoencoder/data/in"
TARGET = "C:/workspace/autoencoder/autoencoder/data/out"
FIG_SIZE = (20, 20)
args = [SOURCE, TARGET, FIG_SIZE]

import time
start = time.perf_counter()

extractor = SpectrogramExtractor()
extractor.extract(SOURCE, TARGET, FIG_SIZE)

finish = time.perf_counter()
print(f'Finished in {round(finish-start, 2)} second(s)')

In [None]:
os.chdir(SOURCE)
for file in os.listdir(SOURCE):
    # check file extention
    if file.endswith(".wav"):
        # load audio file with Librosa
        signal, sample_rate = librosa.load(file, sr=22050)

        # perform Fourier transform (FFT -> power spectrum)
        fft = np.fft.fft(signal)

        # calculate abs values on complex numbers to get magnitude
        spectrum = np.abs(fft)

        # create frequency variable
        f = np.linspace(0, sample_rate, len(spectrum))

        # take half of the spectrum and frequency
        left_spectrum = spectrum[:int(len(spectrum)/2)]
        left_f = f[:int(len(spectrum)/2)]

        # STFT -> spectrogram
        hop_length = 512  # in num. of samples
        n_fft = 2048  # window in num. of samples

        # calculate duration hop length and window in seconds
        hop_length_duration = float(hop_length)/sample_rate
        n_fft_duration = float(n_fft)/sample_rate

        # perform stft
        stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)

        # calculate abs values on complex numbers to get magnitude
        spectrogram = np.abs(stft)  # np.abs(stft) ** 2

        # apply logarithm to cast amplitude to Decibels
        log_spectrogram = librosa.amplitude_to_db(spectrogram)

        # Matplotlib plots: removing axis, legends and white spaces
        plt.figure(figsize=FIG_SIZE)
        plt.axis('off')
        librosa.display.specshow(
            log_spectrogram, sr=sample_rate, hop_length=hop_length)
        data_path = pathlib.Path(TARGET)
        file_name = f'{file[0:-4]}.jpg'
        full_name = str(pathlib.Path.joinpath(data_path, file_name))
        plt.savefig(str(full_name), bbox_inches='tight', pad_inches=0)