In [22]:
### Import necessary library

import pandas as pd
import numpy as np
import tensorflow as tf
import IPython.display as ipd
import matplotlib.pyplot as plt
import librosa
import librosa.display
import os
from tqdm import tqdm

%matplotlib inline

In [23]:
### Preprocess Audio File -> reading the melspectogram and cqt -> 'di pa actual na processing, more on training phase

def preprocess_audio_path(audio_file_path, 
                          melspect_output_path, 
                          cqt_output_path, sr=22050, 
                          n_fft=2048, hop_length=512, 
                          n_mels=128, batch_size=10):

    for root, dirs, files in os.walk(audio_file_path):
        for dir in dirs:
            input_dir = os.path.join(root, dir)

            for file_name in os.listdir(input_dir):
                ### Need to implement try-catch here or other way to convert other .file extension to .wav
                if file_name.endswith('.wav'):
                    print("Processing File:", file_name)
                    mel_output_dir = os.path.join(melspect_output_path, dir)
                    cqt_output_dir = os.path.join(cqt_output_path, dir)
                    os.makedirs(mel_output_dir, exist_ok=True)
                    os.makedirs(cqt_output_dir, exist_ok=True)

                    ### Load the audio file
                    file_path = os.path.join(input_dir, file_name)
                    audio, _ = librosa.load(file_path, sr=sr)

                    ### Check if the audio is not mono
                    if len(audio.shape) > 1 and audio.shape[0] == 2:
                        audio = librosa.to_mono(audio)

                    ### Normalize the audio this is optional
                    normalized_audio = librosa.util.normalize(audio)

                    # Compute Mel Spectogram
                    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
                    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

                    # Compute CQT Spectegoram
                    cqt_spec = librosa.cqt(audio, sr=sr, hop_length=hop_length)
                    cqt_mag_spec = np.abs(cqt_spec)
                    cqt_mag_spec_db = librosa.amplitude_to_db(cqt_mag_spec, ref=np.max)

                    # Save Mel Spectogram as PNG Image to Mel output folder
                    mel_output_file_path = os.path.join(mel_output_dir, file_name.replace('.wav', '_mel.png'))
                    plt.figure(figsize=(10,4))
                    librosa.display.specshow(mel_spec_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel')
                    plt.colorbar(format='%+2.0f dB')
                    plt.savefig(mel_output_file_path, bbox_inches='tight')
                    plt.close()

                    # Save CQT 
                    cqt_output_file_path = os.path.join(cqt_output_dir, file_name.replace('.wav', '_cqt.png'))
                    plt.figure(figsize=(10,4))
                    librosa.display.specshow(cqt_mag_spec_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='cqt_note')
                    plt.colorbar(format='%+2.0f dB')
                    plt.savefig(cqt_output_file_path, bbox_inches='tight')
                    plt.close()

                    del audio, mel_spec, mel_spec_db, cqt_spec, cqt_mag_spec, cqt_mag_spec_db

In [None]:
audio_folder_path = '../data/raw/'
melspect_output_path = '../data/raw/Mel Feature'
cqt_output_path = '../data/raw/CQT Spect'

preprocess_audio_path(audio_file_path=audio_folder_path, melspect_output_path=melspect_output_path, cqt_output_path=cqt_output_path)

In [21]:
for root, dirs, files in os.walk('../data/raw/'):
    for dir in dirs:
        input_dir = os.path.join(root, dir)
        print(input_dir)

../data/raw/burping
../data/raw/hungry
../data/raw/belly_pain
../data/raw/discomfort
../data/raw/tired
