# MEL SPECTROGRAM EXTRACTION

In [1]:
import os
import scipy as sp
import numpy as np
import librosa as lb
import noisereduce as nr
import librosa.display
import matplotlib.pyplot as plt
from scipy.signal import butter, lfilter
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import matplotlib
matplotlib.use('Agg')

In [3]:
### Para di magpakita warning message sa mel_spec ###
import warnings
warnings.filterwarnings('ignore')

<hr>

## File Paths, Initializations, and Creating Folders

In [16]:
# spec_path

In [4]:
# path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 5 seconds"
path = "D:\\Aj\\BirdClassification\\NEW DATASET"

# wavfiles_path = "D:\\Aj\\BirdClassification\\audio dataset\\original"
# wavfiles_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 7 seconds\\noise reduced wavfiles"
# wavfiles_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 7 seconds\\noise injected wavfiles"
wavfiles_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\audio dataset\\original"

# spec_path = os.path.join(path, 'Mel Spectrogram no filter/')
# spec_path = os.path.join(path, 'Mel Spectrogram 20 classes median clipping original/')
# spec_path = os.path.join(path, 'Mel Spectrogram 20 classes bandpass filter only/')
# spec_path = os.path.join(path, 'Mel Spectrogram 20 classes with noise reduce only/')
# spec_path = os.path.join(path, 'Mel Spectrogram with noise reduced and horizontal shift augmentation') # ETO YUNG GINAMIT SA THESIS
spec_path = os.path.join(path, 'FINAL MEL SPEC')
spec_path_orig = os.path.join(spec_path, 'original')
spec_path_timestretch = os.path.join(spec_path, 'timestretch')
spec_path_pitchshift = os.path.join(spec_path, 'pitchshift')
spec_path_noiseinjected = os.path.join(spec_path, 'noiseinjectedFINAL')
spec_path_noisereduced = os.path.join(spec_path, 'noisereduced')
spec_path_horizontalshift = os.path.join(spec_path, 'horizontalshift')

In [5]:
FRAME_SIZE = 2048
HOP_SIZE = 512
SR = 22050
N_MELS = 128
LOWCUT = 1000
HIGHCUT = 11000
DURATION = 5.0

species = os.listdir(wavfiles_path)
species = sorted(species)
print(species, len(species))
# species = ["ZebraDove"]

['AsianGlossyStarling', 'Black-crownedNightHeron', 'Black-napedOriole', 'Blue-tailedBee-eater', 'BrownShrike', 'ChestnutMunia', 'CollaredKingfisher', 'EurasianTreeSparrow', 'Grey-backedTailorbird', 'GreyWagtail', 'MangroveBlueFlycatcher', 'Olive-backedSunbird', 'PhilippineMagpie-Robin', 'PhilippinePiedFantail', 'PiedBushChat', 'Red-keeledFlowerpecker', 'White-breastedWaterhen', 'Yellow-ventedBulbul', 'ZebraDove'] 19


In [19]:
# speciesfreq_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 7 seconds\\species frequencies.txt"

# with open(speciesfreq_path, 'r') as f:
#     species_freq = f.readlines()
#     species_freq = species_freq[0]
#     species_freq = species_freq.strip("[]")
#     species_freq = species_freq.split(",")
    
# for idx, line in enumerate(species_freq):
#     species_freq[idx] = float(line)

# print(species_freq, len(species_freq))

<hr>

## Functions

In [12]:
def butter_bandpass(lowcut, highcut, fs, order=6):
    return butter(order, [lowcut, highcut], fs=fs, btype='band')

def butter_bandpass_filter(data, lowcut, highcut, fs, order=6):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [13]:
def split_audio(file):
    x = lb.get_duration(filename=file)
    x = np.floor(x)
    splits = int(x // DURATION)
    return splits

In [14]:
def find_frequency(audio):
    w = np.fft.fft(audio)
    freqs = np.fft.fftfreq(len(w))
    idx = np.argmax(np.abs(w))
    freq = freqs[idx]
    freq_in_hertz = abs(freq * SR)
    return freq_in_hertz

In [15]:
def extract_spectrogram_original(file, splits):
    offset = 0
    duration = DURATION * SR
    duration_in_seconds = duration / SR   # This is equal to 5 seconds
    
    if (splits != 0):
    ## Loading the audio into individual 5-second segments and preprocess from there
        for i in range(1, splits+1):
            save_path = spec_path_orig + '/' + foldername + '/' + filename + '-' + str(i) + '.png' # path for audio files that are not split
            audio, _ = librosa.load(file, offset=offset, duration=DURATION)
            offset += DURATION
            audio = butter_bandpass_filter(audio, LOWCUT, HIGHCUT, SR)
            # audio = nr.reduce_noise(y=audio, sr=SR)
            # audio = butter_bandpass_filter(audio, birdfreq-bound, birdfreq+bound, SR)
            if np.all(np.isnan(audio) == True) or np.all(audio == 0):
                continue
            
            ## original
            mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
            log_mel = lb.power_to_db(mel)
            fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
            fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
            fig.remove()
            plt.close()
            
            ## pitch_shift
            save_path = spec_path_pitchscale + '/' + foldername + '/' + filename + '_pitchscaled' + '-' + str(i) + '.png' # path for audio files that are not split
            # num_semitones = np.random.normal(1.5, .25, size=None)
            num_semitones = np.random.normal(1.75, .25, size=None)
            audio_ps = librosa.effects.pitch_shift(audio, sr=SR, n_steps=num_semitones)
            mel = lb.feature.melspectrogram(audio_ps, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
            log_mel = lb.power_to_db(mel)
            fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
            fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
            fig.remove()
            plt.close()
            
            ## Time-stretched
            save_path = spec_path_timestretch + '/' + foldername + '/' + filename + '_timestretched' + '-' + str(i) + '.png' # path for audio files that are not split
            # time_stretch_rate = np.random.normal(1, 0.05, size=None)
            time_stretch_rate = np.random.normal(1.5, 0.05, size=None)
            audio_ts = librosa.effects.time_stretch(audio, rate=time_stretch_rate)
            mel = lb.feature.melspectrogram(audio_ts, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
            log_mel = lb.power_to_db(mel)
            fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
            fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
            fig.remove()
            plt.close()
            
    else:
        save_path = spec_path_orig + '/' + foldername + '/' + filename  + '.png' # path for audio files that are not split
        audio, _ = librosa.load(file)
        audio = butter_bandpass_filter(audio, LOWCUT, HIGHCUT, SR)
        # audio = nr.reduce_noise(y=audio, sr=SR)
        # audio = butter_bandpass_filter(audio, birdfreq-bound, birdfreq+bound, SR)
        if np.all(np.isnan(audio) == True) or np.all(audio == 0):
            next
        audio = librosa.util.pad_center(audio, size=duration)
        
        ## original
        mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
        log_mel = lb.power_to_db(mel)
        fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
        fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
        fig.remove()
        plt.close()
        
        ## Pitch shift
        save_path = spec_path_pitchscale + '/' + foldername + '/' + filename + '_pitchscaled' + '.png' # path for audio files that are not split
        num_semitones = np.random.normal(1.5, .25, size=None)
        audio_ps = librosa.effects.pitch_shift(audio, sr=SR, n_steps=num_semitones)
        mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
        log_mel = lb.power_to_db(mel)
        fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
        fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
        fig.remove()
        plt.close()
        
        ## Time-stretched
        save_path = spec_path_timestretch + '/' + foldername + '/' + filename + '_timestretched' + '.png' # path for audio files that are not split
        time_stretch_rate = np.random.normal(1.5, 0.05, size=None)
        audio_ts = librosa.effects.time_stretch(audio, rate=time_stretch_rate)
        mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
        log_mel = lb.power_to_db(mel)
        fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
        fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
        fig.remove()
        plt.close()

In [17]:
# create folder for mel spectrograms
if not os.path.exists(spec_path):
    os.mkdir(spec_path)
    os.mkdir(spec_path_orig)
    os.mkdir(spec_path_pitchscale)
    os.mkdir(spec_path_timestretch)
    os.mkdir(spec_path_noiseinjected)
    os.mkdir(spec_path_noisereduced)
    os.mkdir(spec_path_horizontalshift)
    
os.mkdir(spec_path_noiseinjected)
for i in species:
    if not os.path.exists(spec_path_orig + '/' + i):
        os.mkdir(spec_path_orig + '/' + i)
    if not os.path.exists(spec_path_timestretch + '/' + i):
        os.mkdir(spec_path_timestretch + '/' + i)
    if not os.path.exists(spec_path_pitchscale + '/' + i):
        os.mkdir(spec_path_pitchscale + '/' + i)
    if not os.path.exists(spec_path_noiseinjected + '/' + i):
        os.mkdir(spec_path_noiseinjected + '/' + i)
    # if not os.path.exists(spec_path_noisereduced + '/' + i):
        # os.mkdir(spec_path_noisereduced + '/' + i)
    if not os.path.exists(spec_path_horizontalshift + '/' + i):
        os.mkdir(spec_path_horizontalshift + '/' + i)

In [29]:
import time

## Original + Pitch Shift + Time Stretch

In [41]:
folder_idx = 0
time1 = time.time()

for folder_idx, foldername in enumerate(species):
    start_time = time.time()
    os.chdir(os.path.join(wavfiles_path, foldername))
    files = os.listdir(os.path.join(wavfiles_path, foldername))
    print(foldername)
    
    for file in files:
        filename, _ = os.path.splitext(file)
        splits = split_audio(file)
        extract_spectrogram_original(file, splits)
        
    folder_idx += 1
    end_time = time.time()
    print(f'Total time to preprocess {foldername}: {np.round(end_time-start_time, 2)}')

time2 = time.time()
print(f"Total Time: {np.round((time2 - time1),2) / 60} minutes")

AsianGlossyStarling
Total time to preprocess AsianGlossyStarling: 145.98
Black-crownedNightHeron
Total time to preprocess Black-crownedNightHeron: 116.98
Black-napedOriole
Total time to preprocess Black-napedOriole: 176.89
Blue-headedFantail
Total time to preprocess Blue-headedFantail: 34.57
Blue-tailedBee-eater
Total time to preprocess Blue-tailedBee-eater: 123.8
BrownShrike
Total time to preprocess BrownShrike: 185.05
ChestnutMunia
Total time to preprocess ChestnutMunia: 63.54
CollaredKingfisher
Total time to preprocess CollaredKingfisher: 323.35
EurasianTreeSparrow
Total time to preprocess EurasianTreeSparrow: 962.56
Grey-backedTailorbird
Total time to preprocess Grey-backedTailorbird: 136.01
GreyWagtail
Total time to preprocess GreyWagtail: 168.6
MangroveBlueFlycatcher
Total time to preprocess MangroveBlueFlycatcher: 252.13
Olive-backedSunbird
Total time to preprocess Olive-backedSunbird: 367.37
PhilippineMagpie-Robin
Total time to preprocess PhilippineMagpie-Robin: 84.58
Philippin

#### Noise Injected Audio Files

In [19]:
def extract_spectrogram_noise_injected(file, splits):
    offset = 0
    duration = DURATION * SR
    duration_in_seconds = duration / SR   # This is equal to 5 seconds
    
    if (splits != 0):
        ## Loading the audio into individual 5-second segments and preprocess from there
        for i in range(1, splits+1):
            save_path = spec_path_noiseinjected + '/' + foldername + '/' + filename + '_noiseinjected' + '-' + str(i) + '.png' # path for audio files that are not split
            offset += DURATION
            audio, _ = librosa.load(file, offset=offset, duration=DURATION)
            if len(audio) != duration:
                continue
            audio = butter_bandpass_filter(audio, LOWCUT, HIGHCUT, SR)
            mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
            log_mel = lb.power_to_db(mel)
            fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
            fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
            fig.remove()
            plt.close()
    else:
        save_path = spec_path_noiseinjected + '/' + foldername + '/' + filename + '_noiseinjected' + '.png' # path for audio files that are not split
        audio, _ = librosa.load(file)
        audio = butter_bandpass_filter(audio, LOWCUT, HIGHCUT, SR)
        audio = librosa.util.pad_center(audio, size=duration)
        mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
        log_mel = lb.power_to_db(mel)
        fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
        fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
        fig.remove()
        plt.close()

In [7]:
# wavfiles_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\audio dataset\\noise injected wavfiles"
wavfiles_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\NEW AUDIOS"

In [8]:
import time

In [20]:
folder_idx = 0

for folder_idx, foldername in enumerate(species):
    start_time = time.time()
    os.chdir(os.path.join(wavfiles_path, foldername))
    files = os.listdir(os.path.join(wavfiles_path, foldername))
    print(foldername)
    
    for file in files:
        filename, _ = os.path.splitext(file)
        splits = split_audio(file)
        # birdfreq = species_freq[folder_idx]
        # bound = birdfreq * 0.25
        extract_spectrogram_noise_injected(file, splits)
        
    folder_idx += 1
    end_time = time.time()
    print(f'Total time to preprocess {foldername}: {np.round(end_time-start_time, 2)}')

AsianGlossyStarling
Total time to preprocess AsianGlossyStarling: 21.14
Black-crownedNightHeron
Total time to preprocess Black-crownedNightHeron: 12.85
Black-napedOriole
Total time to preprocess Black-napedOriole: 22.57
Blue-tailedBee-eater
Total time to preprocess Blue-tailedBee-eater: 15.61
BrownShrike
Total time to preprocess BrownShrike: 23.91
ChestnutMunia
Total time to preprocess ChestnutMunia: 7.45
CollaredKingfisher
Total time to preprocess CollaredKingfisher: 39.21
EurasianTreeSparrow
Total time to preprocess EurasianTreeSparrow: 98.48
Grey-backedTailorbird
Total time to preprocess Grey-backedTailorbird: 13.56
GreyWagtail
Total time to preprocess GreyWagtail: 16.65
MangroveBlueFlycatcher
Total time to preprocess MangroveBlueFlycatcher: 25.18
Olive-backedSunbird
Total time to preprocess Olive-backedSunbird: 40.43
PhilippineMagpie-Robin
Total time to preprocess PhilippineMagpie-Robin: 9.06
PhilippinePiedFantail
Total time to preprocess PhilippinePiedFantail: 15.22
PiedBushChat
T

## Noise Reduced

In [46]:
def extract_spectrogram_noise_reduced(file, splits):
    offset = 0
    duration = DURATION * SR
    duration_in_seconds = duration / SR   # This is equal to 5 seconds
    
    if (splits != 0):
        ## Loading the audio into individual 5-second segments and preprocess from there
        for i in range(1, splits+1):
            save_path = spec_path_noisereduced + '/' + foldername + '/' + filename + '_noisereduced' + '-' + str(i) + '.png' # path for audio files that are not split
            offset += DURATION
            audio, _ = librosa.load(file, offset=offset, duration=DURATION)
            if len(audio) != duration:
                continue
            mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
            log_mel = lb.power_to_db(mel)
            fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
            fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
            fig.remove()
            plt.close()
    else:
        save_path = spec_path_noisereduced + '/' + foldername + '/' + filename + '_noisereduced' + '.png' # path for audio files that are not split
        audio, _ = librosa.load(file)
        audio = librosa.util.pad_center(audio, size=duration)
        mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
        log_mel = lb.power_to_db(mel)
        fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
        fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
        fig.remove()
        plt.close()

In [49]:
wavfiles_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\audio dataset\\noise reduced wavfiles"

In [51]:
folder_idx = 0

for folder_idx, foldername in enumerate(species):
    start_time = time.time()
    os.chdir(os.path.join(wavfiles_path, foldername))
    files = os.listdir(os.path.join(wavfiles_path, foldername))
    print(foldername)
    
    for file in files:
        filename, _ = os.path.splitext(file)
        splits = split_audio(file)
        # birdfreq = species_freq[folder_idx]
        # bound = birdfreq * 0.25
        extract_spectrogram_noise_reduced(file, splits)
        
    folder_idx += 1
    end_time = time.time()
    print(f'Total time to preprocess {foldername}: {np.round(end_time-start_time, 2)}')

AsianGlossyStarling
Total time to preprocess AsianGlossyStarling: 17.28
Black-crownedNightHeron
Total time to preprocess Black-crownedNightHeron: 13.54
Black-napedOriole
Total time to preprocess Black-napedOriole: 18.82
Blue-headedFantail
Total time to preprocess Blue-headedFantail: 3.57
Blue-tailedBee-eater
Total time to preprocess Blue-tailedBee-eater: 13.22
BrownShrike
Total time to preprocess BrownShrike: 23.96
ChestnutMunia
Total time to preprocess ChestnutMunia: 7.66
CollaredKingfisher
Total time to preprocess CollaredKingfisher: 42.19
EurasianTreeSparrow
Total time to preprocess EurasianTreeSparrow: 89.7
Grey-backedTailorbird
Total time to preprocess Grey-backedTailorbird: 12.9
GreyWagtail
Total time to preprocess GreyWagtail: 16.57
MangroveBlueFlycatcher
Total time to preprocess MangroveBlueFlycatcher: 23.89
Olive-backedSunbird
Total time to preprocess Olive-backedSunbird: 32.91
PhilippineMagpie-Robin
Total time to preprocess PhilippineMagpie-Robin: 7.95
PhilippinePiedFantail
T

## Horizontal Shift

In [11]:
import random

In [25]:
def extract_horizontal_shift(file, splits):
    offset = 0
    duration = DURATION * SR
    duration_in_seconds = duration / SR   # This is equal to 5 seconds
    
    if (splits != 0):
        ## Loading the audio into individual 5-second segments and preprocess from there
        for i in range(1, splits+1):
            save_path = spec_path_horizontalshift + '/' + foldername + '/' + filename + '_shifted' + '-' + str(i) + '.png' # path for audio files that are not split
            offset += DURATION
            horizontal_shift = np.round(random.uniform(1.0, 4.0), 2)  # Randomize a number from 1 to 4 seconds with two decimal places. This will be the offset for shifting the audio.
            ## Try-Except statement is for shifting until the end of the audio recording. Once it reaches the end, it will result to an error since the length is now 0 and it continue function will be used. 
            try:
                audio, _ = librosa.load(file, offset=offset+horizontal_shift, duration=DURATION)
                if len(audio) < duration:
                    padding = np.zeros(int(duration)-len(audio))
                    audio = np.concatenate((audio, padding), axis=None)
            except:
                continue
                
            audio = butter_bandpass_filter(audio, 1000, 11000, SR)
            mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
            log_mel = lb.power_to_db(mel)
            fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
            fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
            fig.remove()
            plt.close()
    else:
        save_path = spec_path_horizontalshift + '/' + foldername + '/' + filename + '_shifted' + '.png' # path for audio files that are not split
        audio, _ = librosa.load(file)
        audio = librosa.util.pad_center(audio, size=duration)
        audio = butter_bandpass_filter(audio, 1000, 11000, SR)
        mel = lb.feature.melspectrogram(audio, sr=SR, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, n_mels=N_MELS)
        log_mel = lb.power_to_db(mel)
        fig = librosa.display.specshow(log_mel, cmap='jet', sr=SR)
        fig.figure.savefig(save_path, bbox_inches='tight', pad_inches=0.0)
        fig.remove()
        plt.close()

In [27]:
folder_idx = 0

for folder_idx, foldername in enumerate(species):
    start_time = time.time()
    os.chdir(os.path.join(wavfiles_path, foldername))
    files = os.listdir(os.path.join(wavfiles_path, foldername))
    print(foldername)
    
    for file in files:
        filename, _ = os.path.splitext(file)
        splits = split_audio(file)
        extract_horizontal_shift(file, splits)
        
    folder_idx += 1
    end_time = time.time()
    print(f'Total time to preprocess {foldername}: {np.round(end_time-start_time, 2)}')

AsianGlossyStarling
Total time to preprocess AsianGlossyStarling: 58.69
Black-crownedNightHeron
Total time to preprocess Black-crownedNightHeron: 44.55
Black-napedOriole
Total time to preprocess Black-napedOriole: 89.21
Blue-headedFantail
Total time to preprocess Blue-headedFantail: 15.78
Blue-tailedBee-eater
Total time to preprocess Blue-tailedBee-eater: 56.35
BrownShrike
Total time to preprocess BrownShrike: 79.3
ChestnutMunia
Total time to preprocess ChestnutMunia: 29.54
CollaredKingfisher
Total time to preprocess CollaredKingfisher: 144.22
EurasianTreeSparrow
Total time to preprocess EurasianTreeSparrow: 395.82
Grey-backedTailorbird
Total time to preprocess Grey-backedTailorbird: 65.42
GreyWagtail
Total time to preprocess GreyWagtail: 65.71
MangroveBlueFlycatcher
Total time to preprocess MangroveBlueFlycatcher: 101.5
Olive-backedSunbird
Total time to preprocess Olive-backedSunbird: 158.98
PhilippineMagpie-Robin
Total time to preprocess PhilippineMagpie-Robin: 39.61
PhilippinePiedFa

# MOVE TO ALL FOLDER

In [1]:
import os
from shutil import copy
import numpy as np

In [2]:
# path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 5 seconds"
path = "D:\\Aj\\BirdClassification\\NEW DATASET"

# spec_path = os.path.join(path, 'Mel Spectrogram 20 classes/')
# spec_path = os.path.join(path, 'Mel Spectrogram 20 classes median clipping/')
# spec_path = os.path.join(path, 'Mel Spectrogram 20 classes bandpass filter only/')
spec_path = os.path.join(path, 'FINAL MEL SPEC')

spec_path_orig = os.path.join(spec_path, 'original')
spec_path_timestretch = os.path.join(spec_path, 'timestretch')
spec_path_pitchscale = os.path.join(spec_path, 'pitchscale')
spec_path_noiseinjected = os.path.join(spec_path, 'noiseinjected')
spec_path_noisereduced = os.path.join(spec_path, 'noisereduced')
spec_path_horizontalshift = os.path.join(spec_path, 'horizontalshift')

target_path = os.path.join(spec_path, 'all no noisereduced')

In [3]:
species = os.listdir(spec_path_orig)
print(species)
if not os.path.exists(target_path):
    os.mkdir(target_path)
    for x in species:
        if not os.path.exists(os.path.join(target_path, x)):
            os.mkdir(os.path.join(target_path, x))

['AsianGlossyStarling', 'Black-crownedNightHeron', 'Black-napedOriole', 'Blue-tailedBee-eater', 'BrownShrike', 'ChestnutMunia', 'CollaredKingfisher', 'EurasianTreeSparrow', 'Grey-backedTailorbird', 'GreyWagtail', 'MangroveBlueFlycatcher', 'Olive-backedSunbird', 'PhilippineMagpie-Robin', 'PhilippinePiedFantail', 'PiedBushChat', 'Red-keeledFlowerpecker', 'White-breastedWaterhen', 'Yellow-ventedBulbul', 'ZebraDove']


In [4]:
# path_list = [spec_path_orig, spec_path_timestretch, spec_path_pitchscale, spec_path_noiseinjected, spec_path_noisereduced, spec_path_horizontalshift]
path_list = [spec_path_orig, spec_path_timestretch, spec_path_pitchscale, spec_path_noiseinjected, spec_path_horizontalshift]
# path_list = [spec_path_orig, spec_path_timestretch, spec_path_pitchscale]

In [5]:
for i in path_list:
    folders = os.listdir(i)
    for folder in folders:
        os.chdir(os.path.join(i, folder))
        files = os.listdir()
        target_folder = os.path.join(target_path, folder)
        for file in files:
            copy(file, target_folder)
print("DONE")

DONE


In [6]:
os.chdir(target_path)

for i in os.listdir(target_path):
    print(i, len(os.listdir(os.path.join(target_path, i))))

AsianGlossyStarling 972
Black-crownedNightHeron 703
Black-napedOriole 883
Blue-tailedBee-eater 825
BrownShrike 1165
ChestnutMunia 432
CollaredKingfisher 2168
EurasianTreeSparrow 5322
Grey-backedTailorbird 793
GreyWagtail 972
MangroveBlueFlycatcher 1420
Olive-backedSunbird 2199
PhilippineMagpie-Robin 521
PhilippinePiedFantail 871
PiedBushChat 870
Red-keeledFlowerpecker 783
White-breastedWaterhen 992
Yellow-ventedBulbul 1023
ZebraDove 2681


# DATASET ALLOCATION

In [7]:
import os
import random
import numpy as np
from shutil import copy

In [8]:
# cnn_path = "D:\Aj\\BirdClassification\\3rd method\\CNN4 with noise injection"
# cnn_path = "D:\\Aj\\BirdClassification\\Addition sa dataset\\CNN 20 classes"
# cnn_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 7 seconds\\CNN"
# cnn_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 5 seconds\\CNN bandpass only"
# cnn_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 5 seconds\\CNN noise reduce only"
cnn_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\FINAL DATASET"
# cnn_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 7 seconds\\CNN median clipping"
# cnn_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 5 seconds\\CNN median clipping"

# spectrogram_path = "D:\\Aj\\BirdClassification\\3rd method\\Mel Spectrogram"
# spectrogram_path = "D:\\Aj\\BirdClassification\\3rd method\\Mel Spectrogram5 sept1\\all"
# spectrogram_path = "D:\\Aj\\BirdClassification\\Addition sa dataset\\Mel Specs 20 classes"
# spectrogram_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 7 seconds\\Mel Spectrogram 20 classes\\all"
# spectrogram_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 7 seconds\\Mel Spectrogram 20 classes median clipping\\all"
spectrogram_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\FINAL MEL SPEC\\all no noisereduced"
# spectrogram_path = "D:\\Aj\\BirdClassification\\mel spec 20 classes 5 seconds\\Mel Spectrogram 20 classes with noise reduce only\\all"

In [9]:
## Creating folder for CNN
if not os.path.exists(cnn_path):
    os.mkdir(cnn_path)
    
if not os.path.exists(os.path.join(cnn_path, 'train')):
    os.mkdir(os.path.join(cnn_path, 'train'))
    
if not os.path.exists(os.path.join(cnn_path, 'test')):
    os.mkdir(os.path.join(cnn_path, 'test'))    
    
# create folder in CNN if not exist yet
species_name = os.listdir(os.path.join(spectrogram_path))
for i in species_name:
    if not os.path.exists(os.path.join(cnn_path, 'train', i)):
        os.mkdir(os.path.join(cnn_path, 'train', i))
    if not os.path.exists(os.path.join(cnn_path, 'test', i)):
        os.mkdir(os.path.join(cnn_path, 'test', i))

In [10]:
print(species_name, len(species_name))

['AsianGlossyStarling', 'Black-crownedNightHeron', 'Black-napedOriole', 'Blue-tailedBee-eater', 'BrownShrike', 'ChestnutMunia', 'CollaredKingfisher', 'EurasianTreeSparrow', 'Grey-backedTailorbird', 'GreyWagtail', 'MangroveBlueFlycatcher', 'Olive-backedSunbird', 'PhilippineMagpie-Robin', 'PhilippinePiedFantail', 'PiedBushChat', 'Red-keeledFlowerpecker', 'White-breastedWaterhen', 'Yellow-ventedBulbul', 'ZebraDove'] 19


In [11]:
# Using Stratified Random Sampling with 80% train & 20% validation
def allocate_imgs(img):
    count = []
    for _, _, files in os.walk(spectrogram_path):
        x = len(files)
        count.append(x)
    count.remove(0)
    min_files = min(count)
    train_number = round(min_files * 0.8)
    valid_number = round(min_files * 0.2)
    return train_number, valid_number

In [12]:
train_number, valid_number = allocate_imgs(os.listdir(spectrogram_path))

for root, _, files in os.walk(spectrogram_path):
    if root != spectrogram_path:
        os.chdir(root)
        img = os.listdir()
        species_name = os.path.split(root)
        species_name = species_name[-1]

        """
        get the number of images used for training. the rest are for validation
        allocate images to training and validation folders. 70% training 30% validation
        Cut the random samples and paste it to the train/validation folder 
        """

        if species_name != "":
            ### For the training sample
            train_sample = random.sample(img, train_number)
            for i in train_sample:
                copy(i, os.path.join(cnn_path, 'train', species_name))
                # img.remove(i)   

                """ 
                Images sent to the training folder are not removed. By removing each image sent to the training folder, it avoids the possibility of
                having a duplicate sent to the validation folder. Drawback of this is once the code is run, the number of images inside the source folder
                is less than the original number. This means the mel specs of the audios must be extracted again to obtain the original source.
                """

            ### For the Validation Sample
            valid_sample = random.sample(img, valid_number)
            for i in valid_sample:
                copy(i, os.path.join(cnn_path, 'test', species_name))
            
        print(f"For {species_name} folder, {train_number} images are used for training and {valid_number} images are used for validation.")

For AsianGlossyStarling folder, 346 images are used for training and 86 images are used for validation.
For Black-crownedNightHeron folder, 346 images are used for training and 86 images are used for validation.
For Black-napedOriole folder, 346 images are used for training and 86 images are used for validation.
For Blue-tailedBee-eater folder, 346 images are used for training and 86 images are used for validation.
For BrownShrike folder, 346 images are used for training and 86 images are used for validation.
For ChestnutMunia folder, 346 images are used for training and 86 images are used for validation.
For CollaredKingfisher folder, 346 images are used for training and 86 images are used for validation.
For EurasianTreeSparrow folder, 346 images are used for training and 86 images are used for validation.
For Grey-backedTailorbird folder, 346 images are used for training and 86 images are used for validation.
For GreyWagtail folder, 346 images are used for training and 86 images are

In [17]:
os.chdir(spectrogram_path)

for i in os.listdir(spectrogram_path):
    print(i, len(os.listdir(os.path.join(spectrogram_path, i))))

AsianGlossyStarling 827
Black-crownedNightHeron 618
Black-napedOriole 1256
Blue-headedFantail 1463
Blue-tailedBee-eater 680
BrownShrike 987
ChestnutMunia 436
CollaredKingfisher 1796
EurasianTreeSparrow 4343
Grey-backedTailorbird 644
GreyWagtail 815
MangroveBlueFlycatcher 1162
Olive-backedSunbird 1809
PhilippinePiedFantail 715
PiedBushChat 727
Red-keeledFlowerpecker 647
White-breastedWaterhen 818
Yellow-ventedBulbul 851
ZebraDove 2280


# MP3 TO WAV CONVERTER

In [1]:
import os
from pydub import AudioSegment

In [2]:
# path = "D:\\Aj\\2nd dataset\\dataset"
# path = "D:\\Aj"
path = "D:\\Aj\\BirdClassification\\NEW DATASET\\NEW AUDIOS"

## MP3 TO WAV

In [3]:
file_list = []

for root, dirs, files in os.walk(path):
    print(root)
    os.chdir(root)
    audio_files = os.listdir()
    for name in audio_files:
        first, ext = os.path.splitext(name)
        if ext == ".mp3":
            mp3_sound = AudioSegment.from_mp3(name)
            
            # rename them using the old name + ".wav"
            mp3_sound.export("{0}.wav".format(first), format="wav")
            
            # Deleting the .mp3 files para lahat .wav lang
            os.remove(name)

D:\Aj\BirdClassification\NEW DATASET\NEW AUDIOS
D:\Aj\BirdClassification\NEW DATASET\NEW AUDIOS\CollaredKingfisher
D:\Aj\BirdClassification\NEW DATASET\NEW AUDIOS\EurasianTreeSparrow
D:\Aj\BirdClassification\NEW DATASET\NEW AUDIOS\Red-keeledFlowerpecker


# NOISE INJECTION

In [35]:
import os
import scipy
import numpy as np
import soundfile as sf
import librosa as lb
import noisereduce as nr
from scipy.signal import butter, lfilter

### Filepaths and Creating Folders

In [41]:
nr_audios_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\NEW AUDIOS"
wavfiles_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\audio dataset\\original"

## List the species names
species = os.listdir(wavfiles_path)
# species = ["ZebraDove"]

In [42]:
if not os.path.exists(nr_audios_path):
    os.mkdir(nr_audios_path)
    
for i in species:
    folders = os.path.join(nr_audios_path, i)
    if not os.path.exists(folders):
        os.mkdir(folders)

### Functions

In [40]:
def butter_bandpass(lowcut, highcut, fs, order=6):
    return butter(order, [lowcut, highcut], fs=fs, btype='band')

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

def birdfrequency(index):
    birdfreq = species_freq[index]
    bound = birdfreq * 0.25
    return birdfreq, bound

### Process the Audio Files

In [8]:
# speciesfreq_path = "D:\\Aj\\project\\species frequencies.txt"

# with open(speciesfreq_path, 'r') as f:
#     species_freq = f.readlines()
#     species_freq = species_freq[0]
#     species_freq = species_freq.strip("[]")
#     species_freq = species_freq.split(",")
    
# for idx, line in enumerate(species_freq):
#     species_freq[idx] = float(line)
    
# print(species_freq)

species_freq = [3604.90, 4128.35, 7577.61]

In [13]:
path = "D:\\Aj\\BirdClassification\\FROM RPI RECORDINGS"
file = "D:\\Aj\\BirdClassification\\FROM RPI RECORDINGS\\2022-10-21_BeforeDawn_Recording5.wav"

sr = 22050

In [15]:
audio, _ = lb.load(file, sr=sr)
audio = butter_bandpass_filter(audio, 1000, 11000, sr)

first = "2022-10-21_BeforeDawn_Recording5"
filename = f"{first}_nr.wav"
os.chdir(path)
sf.write(filename, audio, sr, format="wav")

In [9]:
species_index = 0
sr = 22050
SR = 22050


for root,_,files in os.walk(wavfiles_path):
    if root != wavfiles_path:
        for file in files:
            first,ext = os.path.splitext(file)
            if ext == ".wav":
                print(file)
                os.chdir(root)
                audio, _ = lb.load(file, sr=sr)
                # audio = nr.reduce_noise(y=audio, sr=sr)
                # birdfreq, bound = birdfrequency(species_index)
                # birdfreq = 1095.719181838233
                # bound = birdfreq * 0.25
                # audio = butter_bandpass_filter(audio, 1000, 11000, sr)

                filename = f"{first}_nr.wav"
                os.chdir(os.path.join(nr_audios_path, species[species_index]))
                sf.write(filename, audio, sr, format="wav")
            
        species_index += 1

121823.wav
121824.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.84it/s]


151130.wav
151131.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.98it/s]


151135.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.37it/s]


151136.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:02<00:00,  2.20it/s]


151642.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.18it/s]


152005.wav
152203.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.51it/s]


164689.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  1.69it/s]


164693.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.75it/s]


166851.wav
177320.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.68it/s]


179372.wav
179374.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.94it/s]


181238.wav
186559.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.92it/s]


192025.wav
192026.wav
197619.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.09it/s]


269435.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.76it/s]


269988.wav
290553.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.96it/s]


290554.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.12it/s]


290555.wav
293219.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.94it/s]


295158.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.59it/s]


327431.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.73it/s]


338730.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.70it/s]


356241.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.05it/s]


36965.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.97it/s]


376315.wav
376316.wav
39156.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.87it/s]


402013.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  3.08it/s]


481276.wav
501968.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.08it/s]


575558.wav
575559.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.97it/s]


619809.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.89it/s]


631904.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.99it/s]


631908.wav
647048.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.16it/s]


86946.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.05it/s]


86947.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.01it/s]


89927.wav
640741.wav
647020.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.02it/s]


651246.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  3.00it/s]


652220.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.95it/s]


654313.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:02<00:00,  2.50it/s]


656074.wav
656075.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.53it/s]


656076.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.41it/s]


656077.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.77it/s]


656079.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.49it/s]


656081.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.53it/s]


656083.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  1.96it/s]


656912.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.22it/s]


657799.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.18it/s]


658099.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:02<00:00,  2.26it/s]


658100.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.07it/s]


660618.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.05it/s]


664001.wav
665130.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  3.06it/s]


666859.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  3.14it/s]


668118.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.14it/s]


668119.wav
668120.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.16it/s]


669475.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.00it/s]


670877.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.09it/s]


679234.wav
679322.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.73it/s]


679355.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.08it/s]


679436.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.11it/s]


679503.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  3.00it/s]


679530.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.88it/s]


679885.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.91it/s]


685096.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.01it/s]


687471.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.11it/s]


690645.wav
696161.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.88it/s]


696880.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.03it/s]


697507.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.94it/s]


697536.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.79it/s]


697571.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.76it/s]


699014.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.91it/s]


699131.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.13it/s]


699133.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.96it/s]


699886.wav


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.91it/s]


700315.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.91it/s]


700317.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.08it/s]


700325.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.95it/s]


701920.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.04it/s]


702427.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.88it/s]


704315.wav
704316.wav
709534.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.82it/s]


709542.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.96it/s]


709543.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  2.88it/s]


710251.wav
710648.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.05it/s]


710681.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.05it/s]


710683.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.01it/s]


710684.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.09it/s]


710686.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.86it/s]


710693.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:02<00:00,  2.89it/s]


710702.wav


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:04<00:00,  2.95it/s]


712426.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.83it/s]


713874.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.98it/s]


713909.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.18it/s]


713917.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.82it/s]


715544.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.76it/s]


715872.wav
716156.wav


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.86it/s]


717520.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.85it/s]


720858.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.02it/s]


720869.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.01it/s]


720872.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.95it/s]


721995.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  2.91it/s]


721996.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  2.78it/s]


AV10896_Dicaeum+australe_PH_1_0-65_U.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.69it/s]


AV10910_Dicaeum+australe+australe_PH_1_0-13_S.wav
AV10911_Dicaeum+australe+australe_PH_1_0-43_S.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.86it/s]


AV15924_Dicaeum+australe_PH_1_1-14_SC.wav
AV15970_Dicaeum+australe_PH_1_7-223_SC.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:03<00:00,  2.66it/s]


AV16089_Dicaeum+australe_PH_1_41-51_C.wav
AV16163_Dicaeum+australe_PH_1_838-837_SC.wav
AV2008_Dicaeum+australe_PH_1_249-320_C.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.96it/s]


AV2009_Dicaeum+australe_PH_1_20-50_C.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.94it/s]


AV2010_Dicaeum+australe_PH_1_2-6_C.wav
AV8082_Dicaeum+australe_PH_1_2-36_SC.wav


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.94it/s]


In [46]:
# Noise Reduced wavfiles
nr_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\audio dataset\\original"

# Noise injected path
noise_path = "D:\\Aj\\BirdClassification\\NEW DATASET\\NEW AUDIOS"

In [47]:
if not os.path.exists(noise_path):
    os.mkdir(noise_path)
    
for folder in species:
    species_folder = os.path.join(noise_path, folder)
    if not os.path.exists(species_folder):
        os.mkdir(species_folder)

In [48]:
def add_white_noise(file, noise_factor):
    signal, _ = lb.load(file)
    noise = np.random.normal(0, signal.std(), signal.size)
    augmented_signal = signal + noise * noise_factor 
    return augmented_signal

In [49]:
for folder in species:
    print(folder)
    folderpath = os.path.join(nr_path, folder)
    os.chdir(folderpath)
    filenames = os.listdir()
    for file in filenames:
        print(file)
        filename, _ = os.path.splitext(file)
        audio = add_white_noise(file, 0.1)
        new_filename = filename + '_ni.wav'
        os.chdir(os.path.join(noise_path, folder))
        sf.write(new_filename, audio, SR)
        os.chdir(folderpath)

AsianGlossyStarling
135410.wav
150616.wav
150784.wav
151015.wav
151125.wav
203630.wav
297713.wav
307393.wav
359561.wav
369053.wav
369054.wav
398686.wav
401716.wav
424589.wav
442860.wav
442861.wav
442862.wav
442863.wav
446174.wav
461216.wav
497072.wav
525873.wav
528889.wav
529066.wav
578355.wav
593991.wav
622285.wav
656218.wav
67615.wav
79320.wav
98514.wav
98515.wav
Black-crownedNightHeron
110872.wav
257328.wav
257329.wav
257330.wav
263289.wav
280512.wav
304259.wav
319076.wav
365511.wav
376571.wav
441587.wav
449100.wav
464577.wav
464824.wav
466177.wav
509469.wav
528094.wav
548760.wav
549290.wav
551281.wav
557328.wav
558100.wav
579688.wav
580668.wav
588333.wav
588337.wav
588338.wav
597256.wav
656877.wav
656878.wav
658124.wav
661217.wav
670576.wav
672539.wav
673963.wav
675411.wav
675694.wav
676277.wav
676688.wav
680952.wav
684781.wav
705780.wav
709199.wav
709867.wav
711341.wav
713290.wav
714053.wav
715318.wav
720203.wav
720749.wav
Black-napedOriole
AV10952_Oriolus+chinensis+chinensis_PH_1

# FIND BIRD FREQUENCY

In [1]:
import os
import librosa as lb
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
# import IPython.display as ipd
import pandas as pd
from scipy.signal import butter, lfilter
import noisereduce as nr
from itertools import combinations

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def butter_bandpass(lowcut, highcut, fs, order=6):
    return butter(order, [lowcut, highcut], fs=fs, btype='band')

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

def find_frequency(audio):
    w = np.fft.fft(audio)
    freqs = np.fft.fftfreq(len(w))
    idx = np.argmax(np.abs(w))
    freq = freqs[idx]
    freq_in_hertz = abs(freq * 22050)
    return freq_in_hertz

def perc_diff(x,y):
    return np.abs(x - y) / (np.mean([x,y]))

### This function is to prevent outlier frequencies becoming the reference for preprocessing
def bird_freq_correction(audio_infos):
    freqs = []
    acc_freq = []
    percent_differences = []
    bird_freq = 0
    
    ### Reading the audio files of the test dataset
    for i in audio_infos:
        birdfreq = find_frequency(i)
        freqs.append(birdfreq)
    
    ### Listing the number of combinations of each frequency
    c = list(combinations(freqs, 2))
    for i in c:
        z = perc_diff(i[0], i[1])
        percent_differences.append(z)
    
    ### Getting the 5 least percent differences of the combinations. These values are likely the most precise and accurate. 
    min5 = sorted(percent_differences)[:5]
    for val in min5:
        for idx, value in enumerate(percent_differences):
            if val == value:
                acc_freq.append(c[idx])
    freq_mean = np.mean(acc_freq)
    
    ### Replacing outlier frequencies with the average of the 5 combinations with the least percent differences. 
    for idx, i in enumerate(freqs):
        diff = perc_diff(freq_mean, i)
        ### If percent difference is more than 50%, replace outlier
        if diff > 0.5:
            freqs[idx] = freq_mean
    bird_freq = np.mean(freqs)
    return bird_freq

def fourier(sample):
    ft = sp.fft.fft(sample)
    mag = np.absolute(ft)
    freq = np.linspace(0, sr, len(mag))
    fig = plt.figure(figsize=(20,8))
    plt.plot(freq, mag)
    plt.xlabel("Frequency")
    plt.ylabel("Amplitude")
    plt.close(fig)
    return freq, mag

def threshold(freqs, i):
    thresh = len(freqs[i]) // 2
    return thresh

In [3]:
datasetpath = "D:\\Aj\\BirdClassification\\NEW DATASET\\after NEW AUDIOS\\ZebraDove"

frame_size = 2048
hop_size = 512
sr = 22050
lowcut = 200
highcut = 11000

In [4]:
## INITIALIZING THE LIST OF VALUES
species_freqs = []
species_magnitudes = []

In [6]:
for root, _, files in os.walk(datasetpath):
    if root == datasetpath:
        os.chdir(root)
        audio_files = os.listdir()
        bird_freq = 0
        # print(audio_files)
        audio_values = []

        for file in audio_files:
            _, ext = os.path.splitext(audio_files[0])
            if ext == '.wav':
                audio, _ = lb.load(file)
                audio = butter_bandpass_filter(audio, 200, 11000, sr)
                audio = nr.reduce_noise(y=audio, sr=sr)
                bird_freq = find_frequency(audio)
                # audio = butter_bandpass_filter(audio, bird_freq-200, bird_freq+200, sr)
                audio_values.append(audio)
                # freq, mag = fourier(audio)

        ### Use the audio_infos list to get the estimated bird frequency. 
        bird_freq = bird_freq_correction(audio_values)
        print(os.path.split(root)[-1])
        print(bird_freq)
        species_freqs.append(bird_freq)

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.36it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.52it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.50it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:02<00:00,  2.77it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.88it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.77it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.80it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.87it/s]
100%|███████████████████████████████████

ZebraDove
1095.719181838233
