In [None]:
'''
This notebook basically explains- 
A. How to extract the Audio features in form of audio MelSpectogram and save it in Image format.
B. How to Iterate over Urban/Flicker/Spear audio dataset and convert them into MelSpectogram-Images
C. How to mix Urban/Flicker audio dataset with noise signal and convert them into MelSpectogram-Images

'''

In [44]:
from IPython.display import Audio
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import librosa
import numpy as np
import os
import glob
import shutil
import librosa.display
from pathlib import Path 
import random
import math

In [71]:
def get_noise_from_sound(signal,noise,SNR):
    try:
        RMS_s=math.sqrt(np.mean(signal**2))
        #required RMS of noise
        RMS_n=math.sqrt(RMS_s**2/(pow(10,SNR/20)))

        #current RMS of noise
        RMS_n_current=math.sqrt(np.mean(noise**2))
        noise=noise*(RMS_n/RMS_n_current)
    
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None
    
    return noise

In [None]:
# This section is to convert single audio file into MelSpectogram image

audio_file = '101415-3-0-2-gun-shot.wav'
samples,sample_rate = librosa.load(audio_file, sr=22050, res_type='kaiser_fast')

fig = plt.figure(figsize=[0.72,0.72])
ax = fig.add_subplot(111)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
ax.set_frame_on(False)

filename  = audio_file.replace('.wav','.png')
S = librosa.feature.melspectrogram(y=samples, sr=sample_rate)
librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
plt.close('all')

In [None]:
# This section is to convert Spear audio data into MelSpectogram images
audio_parent_dir = Path('/SpEAR-speech-database-master/data')
spectorgram_parent_dir = Path('/SpEAR-speech-database-master/spectorgram')

sub_dirs= ['Lombard', 'Monaural', 'Noisy_Recordings', 'TIMIT']
sub_dirs= ['confusion']
file_ext = "*.wav"

file_names = []
for l, sub_dir in enumerate(sub_dirs):
    print('Processing folder: ', sub_dir)
    os.mkdir(spectorgram_parent_dir/sub_dir)
    for fn in glob.glob(os.path.join(audio_parent_dir, sub_dir, file_ext)):
        samples, sample_rate = librosa.load(fn)
        fig = plt.figure(figsize=[0.72,0.72])
        ax = fig.add_subplot(111)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        ax.set_frame_on(False)
        filename  = spectorgram_parent_dir/sub_dir/Path(fn).name.replace('.wav','.png')
        S = librosa.feature.melspectrogram(y=samples, sr=sample_rate)
        librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
        plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
        plt.close('all')
        

In [69]:
# This section is to convert Urban audio data into MelSpectogram images
def create_urban_mel_spectrograms(fold):
    spectrogram_path = Path('/spectrogram/noise/')  
    audio_path = Path('/audio_data/noise/')  
    print(f'Processing fold {fold}')
    
    files= list(Path(audio_path/f'{fold}').glob('*.wav'))
    os.mkdir(spectrogram_path/fold)
    for audio_file in files:
        samples, sample_rate = librosa.load(audio_file)
        fig = plt.figure(figsize=[0.72,0.72])
        ax = fig.add_subplot(111)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        ax.set_frame_on(False)
        filename  = spectrogram_path/fold/Path(audio_file).name.replace('.wav','.png')
        S = librosa.feature.melspectrogram(y=samples, sr=sample_rate)
        librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
        plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
        plt.close('all')
        
for i in range(1, 11):
    create_urban_mel_spectrograms(str(i))

In [82]:
# This section is to mix Urban audio data with nose and then convert into MelSpectogram images

def create_urban_spectrograms_with_noise(fold, noisePath, noisetype):
    spectrogram_path = Path('/spectrogram/noise/audio_mixed_noise/')  
    audio_path = Path('/audio_data/good/')  
    noise_file_path = '/audio_data/noise/only_noise/'
    
    crackle_noise_files = ['Crackle_02.wav', 'Crackling_Fireplace.wav', 'Tape_Noise_02.wav']
    electric_noise_files= ['Air_FX_01.wav', 'electriccurrent.wav', 'EMOTOR.wav', 'ESPARK1.wav', 'fire.wav', 'Gate_Filtered_01(130BPM).wav', 'ha.wav', 'hi-tensionpower.wav', 'Juno_60_Raw_b.wav']
    other_noise_files = ['Noise_09.wav', 'Noise_Hit_01.wav', 'Perc_Hit_06.wav']
    white_pink_brown_noise_files = ['brown.wav', 'pink.wav', 'white.wav', 'noise.wav']
    spear_noise_files= ['f16noiseR2_16.wav', 'factoryR1_16.wav', 'pinkR5_16.wav', 'volvoR1_16.wav']
    all_noise_files = ['f16noiseR2_16.wav', 'factoryR1_16.wav', 'pinkR5_16.wav', 'brown.wav', 'pink.wav', 'white.wav', 'noise.wav', 'Noise_09.wav','Crackling_Fireplace.wav','Air_FX_01.wav', 'electriccurrent.wav', 'EMOTOR.wav', 'ESPARK1.wav', 'fire.wav', 'Gate_Filtered_01(130BPM).wav', 'hi-tensionpower.wav', 'Juno_60_Raw_b.wav']
    
    print(f'Processing fold {fold} and NoiseType {noisetype}')
    
    filenames = random.sample(os.listdir(Path(audio_path/f'{fold}')), 10)
    for audio_file in filenames:
        try:
            
            samples, sample_rate = librosa.load(Path(audio_path/f'{fold}'/audio_file))

            if noisetype == 'crackle_noise_files':
                noise_file= str(random.choice(crackle_noise_files))
            elif noisetype == 'electric_noise_files':
                noise_file= str(random.choice(electric_noise_files))
            elif noisetype == 'other_noise_files':
                noise_file= str(random.choice(other_noise_files))
            elif noisetype == 'white_pink_brown_noise_files':
                noise_file= str(random.choice(white_pink_brown_noise_files))
            else:
                noise_file= str(random.choice(all_noise_files))

            noise_sample, sr1 = librosa.load(noise_file_path +str(noise_file), res_type='kaiser_fast')
            if(len(noise_sample) > len(samples)):
                  noise_sample=noise_sample[0:len(samples)]

            if(len(noise_sample) < len(samples)):
                samples=samples[0:len(noise_sample)]

            mixed_noise_signal1 = get_noise_from_sound(samples,noise_sample,SNR=10)
            mixed_noise_signal = samples + mixed_noise_signal1
            
            trim_signal = librosa.effects.trim(mixed_noise_signal)
            total_duration = 4*sample_rate
            split_signal = trim_signal[0] 
            if len(trim_signal[0]) > total_duration:
                split_signal=trim_signal[0][0:total_duration]
            
            # For Generating wav file 
            fileName = Path(audio_file).name
            librosa.output.write_wav(fileName, split_signal, sample_rate)
            
            
            # for Generating Spectrogram for noise image
            fig = plt.figure(figsize=[0.72,0.72])
            ax = fig.add_subplot(111)
            ax.axes.get_xaxis().set_visible(False)
            ax.axes.get_yaxis().set_visible(False)
            ax.set_frame_on(False)
            filename  = spectrogram_path/Path(audio_file).name.replace('.wav','.png')
            S = librosa.feature.melspectrogram(y=split_signal, sr=sample_rate)
            librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
            plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
            plt.close('all')
            
        except Exception as e:
            print("Error encountered while parsing file: ", audio_file) 

create_urban_spectrograms_with_noise('flicker', 'Electric_noise', 'all_noise_files' )
create_urban_spectrograms_with_noise('spear_good', 'other', 'all_noise_files' )
create_urban_spectrograms_with_noise('urban', 'spear_noise', 'all_noise_files' )

In [50]:
# This section is to convert Flciker audio data into MelSpectogram images

def create_flicker_spectrograms():
    spectrogram_path = Path('/flickr_audio.tar/spectrogram/')  
    audio_path = Path('/flickr_audio.tar/flickr_audio/wavs/')  
    
    for audio_file in list(Path(audio_path).glob('*.wav')):
        samples, sample_rate = librosa.load(audio_file)
        fig = plt.figure(figsize=[0.72,0.72])
        ax = fig.add_subplot(111)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        ax.set_frame_on(False)
        filename  = spectrogram_path/Path(audio_file).name.replace('.wav','.png')
        S = librosa.feature.melspectrogram(y=samples, sr=sample_rate)
        librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
        plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
        plt.close('all')
        
create_flicker_spectrograms()

In [51]:
# This section is to mix Urban audio with noise data and convert them into MelSpectogram images

def create_flciker_spectrograms_with_noise():
    spectrogram_path = Path('D:/Abhishek/Machine Learning Models/Audio Data Analysis/flickr_audio.tar/spectrogram_noise/')  
    audio_path = Path('D:/Abhishek/Machine Learning Models/Audio Data Analysis/flickr_audio.tar/flickr_audio/wavs/') 
    all_noise_files = ['f16noiseR2_16.wav', 'factoryR1_16.wav', 'pinkR5_16.wav', 'brown.wav', 'pink.wav', 'white.wav', 'noise.wav', 'Noise_09.wav','Crackling_Fireplace.wav','Air_FX_01.wav', 'electriccurrent.wav', 'EMOTOR.wav', 'ESPARK1.wav', 'fire.wav', 'Gate_Filtered_01(130BPM).wav', 'ha.wav', 'hi-tensionpower.wav', 'Juno_60_Raw_b.wav']
    noise_file_path = 'D:/Abhishek/Machine Learning Models/Audio Data Analysis/Final_Audio_Signals/noise/'
    
    filenames = random.sample(os.listdir(Path(audio_path)), 4001)
    print(len(filenames))
    for audio_file in filenames:
        try:
            
            samples, sample_rate = librosa.load(Path(audio_path/audio_file))

            noise_file= str(random.choice(all_noise_files))
            noise_sample, sr1 = librosa.load(noise_file_path +str(noise_file), res_type='kaiser_fast')
            
            if(len(noise_sample) > len(samples)):
                  noise_sample=noise_sample[0:len(samples)]

            if(len(noise_sample) < len(samples)):
                samples=samples[0:len(noise_sample)]

            mixed_noise_signal1 = get_noise_from_sound(samples,noise_sample,SNR=10)
            mixed_noise_signal = samples + mixed_noise_signal1
            
            trim_signal = librosa.effects.trim(mixed_noise_signal)
            total_duration = 4*sample_rate
            split_signal = trim_signal[0] 
            if len(trim_signal[0]) > total_duration:
                split_signal=trim_signal[0][0:total_duration]


            fig = plt.figure(figsize=[0.72,0.72])
            ax = fig.add_subplot(111)
            ax.axes.get_xaxis().set_visible(False)
            ax.axes.get_yaxis().set_visible(False)
            ax.set_frame_on(False)
            filename  = spectrogram_path/Path(audio_file).name.replace('.wav','.png')
            S = librosa.feature.melspectrogram(y=split_signal, sr=sample_rate)
            librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
            plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
            plt.close('all')
        except Exception as e:
              print("Error encountered while parsing file: ", audio_file) 

create_flciker_spectrograms_with_noise()

4001
