In [None]:
 !pip install pydub xenopy librosa numpy noisereduce

In [None]:
!pip install tensorflow==2.11.0 tensorflow-io==0.31.0 matplotlib


In [None]:
import os
import numpy as np
import librosa
import os

from pydub.silence import split_on_silence
from scipy.io import wavfile
import noisereduce as nr

import pydub
from pydub.exceptions import CouldntDecodeError

from xenopy import Query

birds = ["Common Buzzard", "Mallard", "Mute Swan", "Great Tit",  "Hooded Crow", "Grey Heron", "Common Chaffinch",
         "Black-headed Gull", "Great Cormorant", "White Wagtail", "Eurasian Coot",
         "White Stork", "House Sparrow", "Great Egret", "Eurasian Jay", "Eurasian Magpie", "Rook",
         "Western Marsh Harrier", "Fieldfare", "Eurasian Tree Sparrow"]
#
dir_for_birds = dict()

for bird in birds:
    dir_for_birds[bird] = str(bird).strip().replace(" ", '')

def to_wav(bird, path):
    bird_dir = os.path.join(path,'..', 'sounds', bird)
    bird_dir_sounds = os.path.join(bird_dir, dir_for_birds[bird])
    converted_files = []
    os.chdir(bird_dir_sounds)
    delete_wavs = False
    print("recycling old stuff")

    for file in os.listdir(bird_dir_sounds):
        if '.wav' in file:
            if not delete_wavs:
                print(f"Do you want to delete all the .wav files inside {bird_dir_sounds} ?")
                delete_wavs = bool(int(input("1/0")))
            if delete_wavs:
                os.remove(file)

    print("writing .wav files")
    failed_files = []

    for file in os.listdir(bird_dir_sounds):
        mp3_file_path = os.path.join(os.getcwd(), file)  # Construct the full file path
        print(file)
        try:
            if '.mp3' in mp3_file_path:
                sound = pydub.AudioSegment.from_mp3(mp3_file_path)  # Load the MP3 file
                sound.export(f"{file.split('.')[0]}.wav", bitrate=16000, format="wav", )
                converted_files.append(file)
        except CouldntDecodeError as e:
            failed_files.append(file)

    print('failed for: ', failed_files)

    for file in failed_files:
        try:
            mp3_file_path = os.path.join(os.getcwd(), file)  # Construct the full file path
            sound = pydub.AudioSegment.from_file(mp3_file_path, format='mp4')  # Load the MP3 file
            sound.export(f"{file.split('.')[0]}.wav", format="wav")
            converted_files.append(file)
        except:
            pass

    print('deleting old mp3 files')

    for file in converted_files:
        os.remove(file)

    os.chdir(path)


def reduce_noise_(directory, sound_name, save_to):
    sound_path = directory + '/' + sound_name
    rate, data = wavfile.read(sound_path)

    # Check if audio is stereo
    if len(data.shape) == 2 and data.shape[1] == 2:
        # If already stereo, reshape it properly
        data = np.transpose(data)
    else:
        # If mono, duplicate the channel
        data = np.stack((data, data))

    # Reduce noise
    reduced_noise = nr.reduce_noise(y=data, sr=rate, stationary=True)

    # Save the reduced noise audio
    reduced_file_name = fr"{save_to}\{sound_name.split('.')[0]}_reduced.wav"
    wavfile.write(reduced_file_name, rate, reduced_noise.T)  # Transpose back to original shape
    return reduced_file_name


def split_bird(bird, path):
    bird_dir = os.path.join(path,'..', 'sounds', bird)
    bird_dir_sounds = os.path.join(bird_dir, f"{dir_for_birds[bird]}_reduced")
    os.chdir(bird_dir)
    try:
        os.mkdir('splits')
    except FileExistsError as e:
        pass
    file_count = 0
    for file in os.listdir(bird_dir_sounds):
        if file_count > 300:
            os.chdir(path)
            return 
        
        splits = split(file, f"{bird_dir_sounds}\\")
        print(len(splits), f" from {file}")
        save_slips(bird, splits, file_count)
        file_count += len(splits)
    os.chdir(path)


def split(file, path):
    print(file)
    sound = pydub.AudioSegment.from_file(path + file)
    chunks = split_on_silence(
        sound,
        min_silence_len=500,
        silence_thresh=sound.dBFS - 16,
        keep_silence=250
    )
    return chunks


def save_slips(bird_name, splits, file_count):
    export_path = os.path.join('.', 'splits')
    for _, split in enumerate(splits):
        split.export(f"{export_path}/{bird_name.split('.')[0]}_{file_count}.wav", format="wav", bitrate='16k')
        file_count += 1


def reduce_bird(bird, path):

    bird_dir = os.path.join(path,'..', 'sounds', bird)
    os.chdir(bird_dir)
    try:
      bird_dir_sounds = os.path.join(dir_for_birds[bird])
      reduced_dir_name = f"{dir_for_birds[bird]}_reduced"
    except:
      os.chdir(path)
      return

    try:
        os.mkdir(f"{reduced_dir_name}")
    except FileExistsError as e:
        print(e)

    reduced_dir = os.path.join(bird_dir, f"{reduced_dir_name}")

    for file in os.listdir(bird_dir_sounds):
        reduce_noise_(directory=bird_dir_sounds, sound_name=file, save_to=reduced_dir)

    os.chdir(path)


def download_sounds(birds):
    for bird_name in birds:
        q = Query(name=bird_name, q_gt="B", since="2023-06-01")
        # print(metafiles['recordings'])
        try:
            q.retrieve_recordings(multiprocess=False, nproc=5, attempts=5, outdir=f"../sounds/{bird_name}")
        except Exception as e:
            print(e)

if __name__ == "__main__":
    download_sounds(birds)
    for bird in birds:
        to_wav(bird, os.getcwd())
        reduce_bird(bird, os.getcwd())
        split_bird(bird, os.getcwd())

query: Common Buzzard q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 43/43 [00:42<00:00,  1.02it/s]


query: Mallard q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 32/32 [00:37<00:00,  1.17s/it]


query: Mute Swan q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 43/43 [00:33<00:00,  1.29it/s]


query: Great Tit q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|█████████▉| 267/268 [05:39<00:00,  1.10it/s]

Bad url: https://xeno-canto.org/806787/download


process 16592: 100%|██████████| 268/268 [05:40<00:00,  1.27s/it]


query: Hooded Crow q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 165/165 [03:49<00:00,  1.39s/it]


query: Grey Heron q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 62/62 [00:51<00:00,  1.21it/s]


query: Common Chaffinch q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592:   8%|▊         | 21/262 [00:29<06:54,  1.72s/it]

Bad url: https://xeno-canto.org/905325/download


process 16592:   8%|▊         | 22/262 [00:30<05:57,  1.49s/it]

Bad url: https://xeno-canto.org/905323/download


process 16592:   9%|▉         | 23/262 [00:31<05:22,  1.35s/it]

Bad url: https://xeno-canto.org/905322/download


process 16592: 100%|██████████| 262/262 [05:48<00:00,  1.33s/it]


query: Black-headed Gull q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 58/58 [00:58<00:00,  1.01s/it]


query: Great Cormorant q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 11/11 [00:08<00:00,  1.24it/s]


query: White Wagtail q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 65/65 [00:58<00:00,  1.11it/s]


query: Eurasian Coot q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 171/171 [02:04<00:00,  1.37it/s]


query: White Stork q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 19/19 [00:16<00:00,  1.18it/s]


query: House Sparrow q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 118/118 [02:08<00:00,  1.09s/it]


query: Great Egret q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 11/11 [00:07<00:00,  1.39it/s]


query: Eurasian Jay q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 92/92 [01:15<00:00,  1.22it/s]


query: Eurasian Magpie q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 70/70 [01:09<00:00,  1.01it/s]


query: Rook q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 31/31 [00:41<00:00,  1.34s/it]


query: Western Marsh Harrier q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 11/11 [00:15<00:00,  1.39s/it]


query: Fieldfare q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 23/23 [00:23<00:00,  1.03s/it]


query: Eurasian Tree Sparrow q_gt:B since:2023-06-01
... retrieving metadata ...


process 16592: 100%|██████████| 53/53 [00:47<00:00,  1.12it/s]


recycling old stuff
writing .wav files
809061.mp3
810548.mp3
812485.mp3
814444.mp3
815852.mp3
815861.mp3
818538.mp3
818696.mp3
818698.mp3
818828.mp3
822397.mp3
822398.mp3
822399.mp3
823860.mp3
826457.mp3
826462.mp3
827008.mp3
835184.mp3
837632.mp3
838796.mp3
839246.mp3
839247.mp3
841724.mp3
845010.mp3
855270.mp3
860503.mp3
862506.mp3
862509.mp3
862718.mp3
862720.mp3
863949.mp3
863950.mp3
864277.mp3
870655.mp3
871119.mp3
871602.mp3
872338.mp3
878780.mp3
884699.mp3
891011.mp3
895029.mp3
895030.mp3
902906.mp3
failed for:  []
deleting old mp3 files




809061_reduced.wav
1  from 809061_reduced.wav
810548_reduced.wav
1  from 810548_reduced.wav
812485_reduced.wav
42  from 812485_reduced.wav
814444_reduced.wav
1  from 814444_reduced.wav
815852_reduced.wav
1  from 815852_reduced.wav
815861_reduced.wav
1  from 815861_reduced.wav
818538_reduced.wav
6  from 818538_reduced.wav
818696_reduced.wav
6  from 818696_reduced.wav
818698_reduced.wav
27  from 818698_reduced.wav
818828_reduced.wav
28  from 818828_reduced.wav
822397_reduced.wav
2  from 822397_reduced.wav
822398_reduced.wav
9  from 822398_reduced.wav
822399_reduced.wav
1  from 822399_reduced.wav
823860_reduced.wav
1  from 823860_reduced.wav
826457_reduced.wav
17  from 826457_reduced.wav
826462_reduced.wav
45  from 826462_reduced.wav
827008_reduced.wav
2  from 827008_reduced.wav
835184_reduced.wav
3  from 835184_reduced.wav
837632_reduced.wav
14  from 837632_reduced.wav
838796_reduced.wav
1  from 838796_reduced.wav
839246_reduced.wav
1  from 839246_reduced.wav
839247_reduced.wav
1  from 8

  rate, data = wavfile.read(sound_path)


813078_reduced.wav
1  from 813078_reduced.wav
827691_reduced.wav
21  from 827691_reduced.wav
831436_reduced.wav
1  from 831436_reduced.wav
838988_reduced.wav
1  from 838988_reduced.wav
840246_reduced.wav
3  from 840246_reduced.wav
844667_reduced.wav
16  from 844667_reduced.wav
844759_reduced.wav
1  from 844759_reduced.wav
844760_reduced.wav
1  from 844760_reduced.wav
863472_reduced.wav
1  from 863472_reduced.wav
870822_reduced.wav
6  from 870822_reduced.wav
871057_reduced.wav
1  from 871057_reduced.wav
871469_reduced.wav
1  from 871469_reduced.wav
871637_reduced.wav
1  from 871637_reduced.wav
871707_reduced.wav
1  from 871707_reduced.wav
871708_reduced.wav
1  from 871708_reduced.wav
872431_reduced.wav
2  from 872431_reduced.wav
876460_reduced.wav
4  from 876460_reduced.wav
877850_reduced.wav
1  from 877850_reduced.wav
878872_reduced.wav
1  from 878872_reduced.wav
881155_reduced.wav
13  from 881155_reduced.wav
883873_reduced.wav
1  from 883873_reduced.wav
884361_reduced.wav
3  from 8843

  return filtered_chunk.astype(self._dtype)


806433_reduced.wav
14  from 806433_reduced.wav
806892_reduced.wav
16  from 806892_reduced.wav
807108_reduced.wav
40  from 807108_reduced.wav
807722_reduced.wav
4  from 807722_reduced.wav
807863_reduced.wav
29  from 807863_reduced.wav
807985_reduced.wav
32  from 807985_reduced.wav
807986_reduced.wav
34  from 807986_reduced.wav
807987_reduced.wav
5  from 807987_reduced.wav
808285_reduced.wav
5  from 808285_reduced.wav
808717_reduced.wav
15  from 808717_reduced.wav
809220_reduced.wav
4  from 809220_reduced.wav
809399_reduced.wav
1  from 809399_reduced.wav
810058_reduced.wav
67  from 810058_reduced.wav
810060_reduced.wav
16  from 810060_reduced.wav
810061_reduced.wav
7  from 810061_reduced.wav
810072_reduced.wav
5  from 810072_reduced.wav
811279_reduced.wav
1  from 811279_reduced.wav
811393_reduced.wav
1  from 811393_reduced.wav
811564_reduced.wav
14  from 811564_reduced.wav
recycling old stuff
writing .wav files
805909.mp3
805912.mp3
806871.mp3
806872.mp3
806876.mp3
806878.mp3
806978.mp3


  rate, data = wavfile.read(sound_path)
  filtered_chunk[:, pos: pos + end0 - start0] = filtered_chunk0[:, start0:end0]


806398_reduced.wav
1  from 806398_reduced.wav
806401_reduced.wav
1  from 806401_reduced.wav
806498_reduced.wav
5  from 806498_reduced.wav
806564_reduced.wav
15  from 806564_reduced.wav
806671_reduced.wav
21  from 806671_reduced.wav
807085_reduced.wav
10  from 807085_reduced.wav
807677_reduced.wav
10  from 807677_reduced.wav
807914_reduced.wav
7  from 807914_reduced.wav
807916_reduced.wav
6  from 807916_reduced.wav
808065_reduced.wav
1  from 808065_reduced.wav
808186_reduced.wav
30  from 808186_reduced.wav
808461_reduced.wav
1  from 808461_reduced.wav
808462_reduced.wav
1  from 808462_reduced.wav
808463_reduced.wav
1  from 808463_reduced.wav
808464_reduced.wav
1  from 808464_reduced.wav
808507_reduced.wav
6  from 808507_reduced.wav
808921_reduced.wav
46  from 808921_reduced.wav
809275_reduced.wav
8  from 809275_reduced.wav
809349_reduced.wav
8  from 809349_reduced.wav
809401_reduced.wav
1  from 809401_reduced.wav
809403_reduced.wav
1  from 809403_reduced.wav
809761_reduced.wav
49  from 



829673_reduced.wav
4  from 829673_reduced.wav
843754_reduced.wav
1  from 843754_reduced.wav
856599_reduced.wav
6  from 856599_reduced.wav
862521_reduced.wav
3  from 862521_reduced.wav
863045_reduced.wav
2  from 863045_reduced.wav
865915_reduced.wav
2  from 865915_reduced.wav
896643_reduced.wav
1  from 896643_reduced.wav
897905_reduced.wav
1  from 897905_reduced.wav
898000_reduced.wav
6  from 898000_reduced.wav
898080_reduced.wav
1  from 898080_reduced.wav
898805_reduced.wav
1  from 898805_reduced.wav
recycling old stuff
writing .wav files
806270.mp3
806271.mp3
806276.mp3
806928.mp3
807703.mp3
811588.mp3
816534.mp3
817707.mp3
819388.mp3
819392.mp3
819475.mp3
822090.mp3
822483.mp3
822523.mp3
823685.mp3
826797.mp3
828279.mp3
830604.mp3
830610.mp3
831097.mp3
831259.mp3
831928.mp3
831929.mp3
831930.mp3
831931.mp3
832280.mp3
832427.mp3
832749.mp3
833311.mp3
833319.mp3
833646.mp3
834350.mp3
835123.mp3
835140.mp3
835308.mp3
836034.mp3
838272.mp3
839040.mp3
839042.mp3
840682.mp3
841069.mp3
8412

In [None]:
import os
from matplotlib import pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio

In [None]:
def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1, desired_samples=48000)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [None]:
dirs = dict()

bird_labels = {bird: i for i, bird in enumerate(birds)}

for bird in birds:
    dirs[bird] = os.path.join('..', 'sounds', bird, 'splits')
    
files = dict()
for bird in birds:
    files[bird] = tf.data.Dataset.list_files(dirs[bird] + "\*.wav")
    
bird_data = dict()

for bird in birds:
    bird_label = bird_labels[bird]
    dataset_length = len(list(files[bird].as_numpy_iterator()))
    labels_dataset = tf.data.Dataset.from_tensor_slices(tf.ones(dataset_length) * bird_label)
    bird_data[bird] = tf.data.Dataset.zip((files[bird], labels_dataset))
    
data = None
for bird in birds:
    if data is None:
        data = bird_data[bird]
    else:
        data = data.concatenate(bird_data[bird])

In [None]:
lengths = []
skipped = 0

for file in os.listdir(house_sparrows_dir):
    try:
        tensor_wave = load_wav_16k_mono(os.path.join(house_sparrows_dir, file))
        lengths.append(len(tensor_wave))
    except Exception as e:
        skipped += 1
        print(file)
        pass
print("done")

In [None]:
# tf.math.reduce_mean(lengths)
# tf.math.reduce_min(lengths)

In [None]:
def preprocess(file_path, label):
    wav = load_wav_16k_mono(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav], 0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

In [None]:
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=1000)
data = data.batch(16)
data = data.prefetch(8)
data

In [None]:
train = data.take(36)
test = data.skip(36).take(15)

In [None]:
samples, labels = train.as_numpy_iterator().next()


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten

In [None]:
model = Sequential()
model.add(Conv2D(16, (3,3), activation="relu", input_shape=(1491, 257,1)))
model.add(Conv2D(16, (3,3), activation="relu"))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])


In [None]:
hist = model.fit(train, epochs=1, validation_data=test)


In [None]:
plt.title('Loss')
plt.plot(hist.history['loss'], 'r')
plt.plot(hist.history['val_loss'], 'b')
plt.show()

In [None]:
plt.title('Precision')
plt.plot(hist.history['precision'], 'r')
plt.plot(hist.history['val_precision'], 'b')
plt.show()

In [None]:
plt.title('Recall')
plt.plot(hist.history['recall'], 'r')
plt.plot(hist.history['val_recall'], 'b')
plt.show()

In [None]:
X_test, y_test = test.as_numpy_iterator().next()
yhat = model.predict(X_test)


In [None]:
yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]
yhat


In [None]:
def load_mp3_16k_mono(filename):
    res = tfio.audio.AudioIOTensor(filename)
    tensor = res.to_tensor()
    tensor = tf.math.reduce_sum(tensor, axis=1)/2

    sample_rate = res.rate
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=160000)
    return wav