In [25]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Bidirectional, Conv2D, MaxPooling2D, Flatten
from keras.utils import to_categorical
import numpy as np
import os
import pickle
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import pandas as pd
import librosa
import glob
from sklearn.utils import shuffle
from io import StringIO
import warnings
import gc
import tensorflow as tf
from sklearn.metrics import precision_recall_fscore_support as prfs

In [2]:
warnings.filterwarnings("ignore")

if os.path.exists("../input/birdsong-recognition/test_audio/"):
    n_epochs=200
    fc=None
else:
    n_epochs=50
    fc=100

prob_cutoff = 0.5

prepared_test_str = """row_id,site,audio_id,seconds
bulori/XC128942,bulori/XC128942,bulori/XC128942,5
bulori/XC170988,bulori/XC170988,bulori/XC170988,5
normoc/XC54018,normoc/XC54018,normoc/XC54018,5
normoc/XC62791,normoc/XC62791,normoc/XC62791,5
herthr/XC53784,herthr/XC53784,herthr/XC53784,5
herthr/XC119596,herthr/XC119596,herthr/XC119596,5
brnthr/XC31308,brnthr/XC31308,brnthr/XC31308,5
brnthr/XC53695,brnthr/XC53695,brnthr/XC53695,5
vesspa/XC17095,vesspa/XC17095,vesspa/XC17095,5
vesspa/XC17096,vesspa/XC17096,vesspa/XC17096,5
solsan/XC17025,solsan/XC17025,solsan/XC17025,5
norfli/XC11578,norfli/XC11578,norfli/XC11578,5
lesnig/XC27724,lesnig/XC27724,lesnig/XC27724,5
grycat/XC31058,grycat/XC31058,grycat/XC31058,5
eastow/XC53188,eastow/XC53188,eastow/XC53188,5
aldfly/XC2628,aldfly/XC2628,aldfly/XC2628,5
ameavo/XC99571,ameavo/XC99571,ameavo/XC99571,5
amebit/XC127371,amebit/XC127371,amebit/XC127371,5
amecro/XC51410,amecro/XC51410,amecro/XC51410,5
amegfi/XC17120,amegfi/XC17120,amegfi/XC17120,5
"""

In [3]:
def load_test_audio(path):
    signal, blah = librosa.load(path, sr=22050)
    
    if len(signal.shape)==1:
        y = signal
    else:
        y = np.average(signal, axis=1)
    
    return signal
    

def getBirdMfcc(bird, n_mfcc=50, filecount=None):
    birddirectory = "data/mfcc_50/" + bird + ".pickle"
    mfcc = pickle.load(open(birddirectory,'rb')).T
    return mfcc

In [4]:
n_birds = 1
neg_folds = 10
mfcc_directory = 'data/mfcc_50/'
birds = [f.split(".")[0] for f in os.listdir(mfcc_directory)][180:180+neg_folds*n_birds]

ohe = OneHotEncoder(sparse=False)
le = LabelEncoder()

features = np.array(birds[:n_birds])
fint = le.fit_transform(features).reshape(len(features),1)

ohe.fit(fint)

OneHotEncoder(categorical_features='all', dtype=<class 'float'>,
       handle_unknown='error', n_values='auto', sparse=False)

In [40]:
dataMap = {}
testMap = {}

for bird in birds:
    bird_mfcc = getBirdMfcc(bird, filecount=fc)
    train_samples = int(0.9*bird_mfcc.shape[0])
    test_samples = -1*int(0.1*bird_mfcc.shape[0])
    dataMap[bird] = bird_mfcc[:train_samples,]
    testMap[bird] = bird_mfcc[test_samples:,]

In [7]:
tester = dataMap["aldfly"][:10000,:]
print(tester.shape)
mags = (np.diagonal(np.matmul(tester,tester.T)))
print(mags)
print(max(mags),min(mags))
len(mags)

KeyError: 'aldfly'

In [41]:
birds

['pinsis',
 'pinwar',
 'plsvir',
 'prawar',
 'purfin',
 'pygnut',
 'rebmer',
 'rebnut',
 'rebsap',
 'rebwoo']

In [42]:
model = Sequential()

# model.add(Bidirectional(LSTM(10, return_sequences=True), input_shape=(None, 50)))
# model.add(Bidirectional(LSTM(10, return_sequences=True)))
# model.add(Dense(25, activation = 'sigmoid'))

model.add(Conv2D(4, kernel_size=(5, 1), activation='relu', input_shape=(215, 50, 1)))
model.add(MaxPooling2D((3, 1), padding='same'))
model.add(Conv2D(4, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D((8, 1), strides=(8,1), padding='same'))
model.add(Flatten())
model.add(Dense(n_birds, activation='sigmoid'))

for layer in model.layers:
    print(layer.output_shape)
    if "conv" in layer.name:
        filters, biases = layer.get_weights()
        print(layer.name, filters.shape)

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

gc.collect()

(None, 211, 50, 4)
conv2d_11 (5, 1, 1, 4)
(None, 71, 50, 4)
(None, 69, 48, 4)
conv2d_12 (3, 3, 4, 4)
(None, 9, 48, 4)
(None, 1728)
(None, 1)


16037

In [47]:
def train_generator():
    while True:
        sequence_length = 215
        
        xarr = []
        yarr = []
        for k,v in dataMap.items():
            nsamples = v.shape[0]
            size=9
            if k not in birds[:n_birds]:
                size = 1
            samples = np.random.randint(0,nsamples-sequence_length+1,size=size)
            try:
                birdohe = ohe.transform(le.transform([k]).reshape(1,1))
            except:
                birdohe = np.array([[0]*n_birds])
            for sample in samples:
#                 xarr.append(v[sample:sample+sequence_length,:].reshape(1,sequence_length,50))
#                 yarr.append(np.tile(birdohe,(sequence_length,1)).reshape(1,sequence_length,n_birds))
                
                xf = v[sample:sample+sequence_length,:]
                lb, ub = xf.min(), xf.max()
                xp, yp = [xf < lb, xf > ub , np.logical_not(np.logical_and(xf < lb , xf > ub))], [0, 1, lambda x: (x-lb)/ub]
                xt = np.piecewise(xf, xp, yp)
                
                xarr.append(xt.reshape(1,sequence_length,50,1))
                yarr.append(birdohe.reshape(1,n_birds))
        x_train = np.concatenate(xarr)
        y_train = np.concatenate(yarr)
        x_train, y_train = shuffle(x_train, y_train)
        yield x_train, y_train

def test_generator():
    sequence_length = 215

    xarr = []
    yarr = []
    for k,v in testMap.items():
        nsamples = v.shape[0]
        size=1800
        if k not in birds[:n_birds]:
            size = 200
        samples = np.random.randint(0,nsamples-sequence_length+1,size=size)
        try:
            birdohe = ohe.transform(le.transform([k]).reshape(1,1))
        except:
            birdohe = np.array([[0]*n_birds])
        for sample in samples:
#             xarr.append(v[sample:sample+sequence_length,:].reshape(1,sequence_length,50))
#             yarr.append(np.tile(birdohe,(sequence_length,1)).reshape(1,sequence_length,n_birds))

            xf = v[sample:sample+sequence_length,:]
            lb, ub = xf.min(), xf.max()
            xp, yp = [xf < lb, xf > ub , np.logical_not(np.logical_and(xf < lb , xf > ub))], [0, 1, lambda x: (x-lb)/ub]
            xt = np.piecewise(xf, xp, yp)

            xarr.append(xt.reshape(1,sequence_length,50,1))
            yarr.append(birdohe.reshape(1,n_birds))
    x_train = np.concatenate(xarr)
    y_train = np.concatenate(yarr)
#     x_train, y_train = shuffle(x_train, y_train)
    return x_train, y_train

x_val,y_val = test_generator()

# x,y = train_generator()
history = model.fit_generator(train_generator(), steps_per_epoch=30, epochs=50, verbose=1, validation_data=(x_val, y_val))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [48]:
plt.subplot(2,1,1)
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])

plt.subplot(2,1,2)
plt.plot(history.history["acc"])
plt.plot(history.history["val_acc"])
plt.show()

In [49]:
preds = model.predict(x_val)
preds[preds > 0.5] = 1
preds[preds <= 0.5] = 0

prfs(y_val,preds,labels=[1,0])

(array([0.72109375, 0.62198276]),
 array([0.51277778, 0.80166667]),
 array([0.59935065, 0.70048544]),
 array([1800, 1800], dtype=int64))

In [None]:
if os.path.exists("../input/birdsong-recognition/test_audio/"):
    test_dir = "../input/birdsong-recognition/test_audio/"
    testtable = pd.read_csv("../input/birdsong-recognition/test.csv")
else:
    test_dir = "../input/birdsong-recognition/train_audio/"
    testtable = pd.read_csv(StringIO(prepared_test_str))


def load_test_clip(signal, start_time, duration=5):
    sr=22050
    maxl = len(signal)
    
    if duration!=None:
        y = signal[max(0,int(start_time)*22050):min(int(start_time+duration)*22050,maxl)]
    else:
        y = signal[max(0,start_time*22050):]
    
    mfcc_feat = librosa.feature.mfcc(n_mfcc=50, y=y, sr=22050)
    return mfcc_feat.T

def make_prediction(sound_clip):
    predidx = [i for i,each in enumerate(make_probabilities(sound_clip)) if each>prob_cutoff]
    if len(predidx)==0:
        predbirds = "nocall"
    else:
        predbirds = " ".join(list(le.inverse_transform(predidx)))
    
    return predbirds

def make_probabilities(sound_clip):
    mfccdim = sound_clip.shape
    sound_clip = sound_clip.reshape(1,mfccdim[0],mfccdim[1])
    
    pred = model.predict(sound_clip).reshape(mfccdim[0],5)
    
    return np.mean(pred,axis=0)

In [None]:
test_info = testtable.sort_values("audio_id")

preds = []
sound_read = ""
for index, row in test_info.iterrows():
    # Get test row information
    site = row['site']
    try:
        start_time = row['seconds'] - 5
    except:
        start_time = 0
    row_id = row['row_id']
    audio_id = row['audio_id']

    try:
        if sound_read != audio_id:
            sound_audio = load_test_audio(test_dir + audio_id + '.mp3')
            sound_read = audio_id
    except:
        print("exception")
        pred = "nocall"
        preds.append(pred)
        continue
        
    if site == 'site_3':
        sound_clip = load_test_clip(sound_audio, 0, duration=None)
    else:
        sound_clip = load_test_clip(sound_audio, start_time)
            
    
    pred = make_prediction(sound_clip)
    preds.append(pred)
    
testtable = test_info.sort_index()

test_submission = testtable.drop(['site', 'audio_id', 'seconds'], axis = 1) 
test_submission['birds'] = preds

test_submission.to_csv('submission.csv', index = None)

In [None]:
print(test_submission.head(20))