## Install dependencies

In [2]:
!pip install pydub

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.1.2[0m[39;49m -> [0m[32;49m22.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
!pip install ffmpeg

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.1.2[0m[39;49m -> [0m[32;49m22.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
!pip install ffprobe

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.1.2[0m[39;49m -> [0m[32;49m22.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


### Import dependencies

In [5]:
import os
import io
import math, random
import boto3
from pydub import AudioSegment
from IPython.display import Audio

### Helper functions 
* mono_to_stereo converts mono audio files to stereo by replicating the single channel to both the left and reight 
* slice_audio creates 3 second audio segments with rolling window 500 ms 
* process_audio is a wrapper function for the above 2 functions

In [6]:
def mono_to_stereo(file_path):
    
    left_channel = AudioSegment.from_wav(file_path)
    right_channel = AudioSegment.from_wav(file_path)

    stereo_sound = AudioSegment.from_mono_audiosegments(left_channel, right_channel)
    file_no_ext = os.path.splitext(file_path)[0]
    stereo_sound.export(file_no_ext+'_stereo.wav', format="wav")
    return file_no_ext+'_stereo.wav' 

def slice_audio(filename, category):
    base = os.path.basename(filename)
    input_file = os.path.splitext(base)[0]
    extension = os.path.splitext(base)[1]
    audio = AudioSegment.from_file(filename)
    length = audio.duration_seconds
    if not os.path.isdir('../data/clips/'+category): 
        os.mkdir('../data/clips/'+category)
    for i in range(0, (int(length)-3)*1000, 500):
        start = i
        end = i+3000
        filename1 = input_file+'_'+str(start)+'_'+str(end)+extension
        #print(input_file)
        #print(filename1)s
        newAudio = AudioSegment.from_wav(filename)
        newAudio = newAudio[start:end]
        newAudio.export('../data/clips/'+category+'/'+filename1, format="wav")
        
def process_audio(file_path, category):
    sound = AudioSegment.from_file(file_path)
    channel_count = sound.channels
    if channel_count == 1: 
        file_path = mono_to_stereo(file_path)
    slice_audio(file_path, category)


### Download audio data from S3

In [19]:
#this downloads the resources 
s3r = boto3.resource('s3')

# Select Your S3 Bucket
bucket = s3r.Bucket('audio-classifier-data')

# Iterate All Objects in Your S3 Bucket Over the for Loop
for s3_object in bucket.objects.all():
    path, filename = os.path.split(s3_object.key)
    if not os.path.isdir("../data/"+path): 
        os.makedirs("../data/"+path)
    if filename:
        bucket.download_file(s3_object.key, "../data/"+path+"/"+filename)

### Create 3 second audio clips from processed audio

In [14]:
bird_species = ['eastern-whipbird', 'kookaburra', 'willie-wagtail']

for i in bird_species: 
    input_dir = '../data/processed/'+i
    for subdir, dirs, files in os.walk(input_dir):
        for file in files:
            file_path = os.path.join(subdir, file)
            process_audio(file_path, i)

### Upload 3 second clips to S3

In [25]:
path = '../data/clips'
for subdir, dirs, files in os.walk(path):
    for file in files:
        full_path = os.path.join(subdir, file)
        path = '/'.join(full_path.split('/')[2:])
        with open(full_path, 'rb') as data:
            bucket.put_object(Key=path, Body=data)

### Manual Processing
Listen to each 3 second clip
If it contains at least 1 full distinctive example of this bird's call, keep it
If it only contains a partial example of this bird's call, discard it
If it contains another bird's call, discard it, even if it also contains the bird you are listening for

Keep approved files in the existing directory

In [159]:
from IPython.display import Audio
Audio('clips/willie-wagtail/72fcdd4c-29f0-41f9-a32a-6f30f4a4bbcc_stereo_17500_20500.wav')

### Doing the whole process on the /test/ subdirectory 


In [31]:
def mono_to_stereo(file_path):
    
    left_channel = AudioSegment.from_wav(file_path)
    right_channel = AudioSegment.from_wav(file_path)

    stereo_sound = AudioSegment.from_mono_audiosegments(left_channel, right_channel)
    file_no_ext = os.path.splitext(file_path)[0]
    stereo_sound.export(file_no_ext+'_stereo.wav', format="wav")
    return file_no_ext+'_stereo.wav' 

def slice_test_audio(filename, category):
    base = os.path.basename(filename)
    input_file = os.path.splitext(base)[0]
    extension = os.path.splitext(base)[1]
    audio = AudioSegment.from_file(filename)
    length = audio.duration_seconds
    
    if not os.path.isdir('../data/test/clips'): 
        os.mkdir('../data/test/clips')

    if not os.path.isdir('../data/test/clips/'+category): 
        os.mkdir('../data/test/clips/'+category)
    for i in range(0, (int(length)-3)*1000, 500):
        start = i
        end = i+3000
        filename1 = input_file+'_'+str(start)+'_'+str(end)+extension
        #print(input_file)
        #print(filename1)s
        newAudio = AudioSegment.from_wav(filename)
        newAudio = newAudio[start:end]
        newAudio.export('../data/test/clips/'+category+'/'+filename1, format="wav")
        
def process_test_audio(file_path, category):
    sound = AudioSegment.from_file(file_path)
    channel_count = sound.channels
    if channel_count == 1: 
        file_path = mono_to_stereo(file_path)
    slice_test_audio(file_path, category)


In [30]:
bird_species = ['eastern-whipbird', 'kookaburra', 'willie-wagtail']

for i in bird_species: 
    input_dir = '../data/test/'+i
    for subdir, dirs, files in os.walk(input_dir):
        for file in files:
            file_path = os.path.join(subdir, file)
            process_test_audio(file_path, i)