# Model

In [12]:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile #find sampling rate
from python_speech_features.python_speech_features import mfcc, logfbank  #had to install
import librosa #had to install
from keras.layers import Conv2D, MaxPool2D, Flatten, LSTM
from keras.layers import Dropout, Dense, TimeDistributed
from keras.models import Sequential, load_model
from keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
import pickle
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('FILE_PATH/wav_species.csv')

In [5]:
df.set_index('wav', inplace=True)
for f in df.index:
    rate, signal = wavfile.read('clean/'+f)
    df.at[f, 'length'] = signal.shape[0]/rate

classes = list(np.unique(df.bird_names))
class_dist = df.groupby(['bird_names'])['length'].mean()

In [6]:
n_samples = 2 * int(df['length'].sum()/0.1)
prob_dist = class_dist / class_dist.sum()
choices = np.random.choice(class_dist.index, p=prob_dist)
prob_dist

bird_names
Barred Owl                      0.045680
Black-bellied Whistling Duck    0.033367
Blue Jay                        0.048250
Carolina Wren                   0.058461
Eastern Screech Owl             0.061565
Great Horned Owl                0.077781
Great-tailed Grackle            0.068037
House Finch                     0.063435
Lesser Goldfinch                0.058172
Monk Parakeet                   0.043106
Mourning Dove                   0.033859
Northern Cardinal               0.076298
Northern Mockingbird            0.112049
Painted Bunting                 0.057148
Red-shouldered Hawk             0.041951
Red-tailed Hawk                 0.052725
White-winged Dove               0.068116
Name: length, dtype: float64

In [7]:
df.shape

(2897, 2)

In [8]:
for f in df.index:
    rate, wav = wavfile.read('clean/' + f)
    if wav.shape[0]-(rate/2) < 0:
        df.drop(f, inplace=True)

In [9]:
df.shape

(2887, 2)

## Set up parameters

In [14]:
def check_data():
    if os.path.isfile(config.p_path): #normally don't want config to be global file
        print('loading existing data')
        with open(config.p_path, 'rb') as handle:
            tmp = pickle.load(handle)
            return tmp
    else:
        return None
    
    
class Config:
    def __init__(self, mode='conv', 
                 nfilt=26, nfeat=13, nfft=512, rate=16000):
        self.mode = mode
        self.nfilt = nfilt
        self.nfeat = nfeat
        self.rate = rate
        self.nfft = nfft
        self.step = int(rate/2)
        self.model_path = os.path.join('models', mode + '.model')
        self.p_path = os.path.join('pickles', mode + '.pkl')


def build_rand_feat():
    tmp = check_data()
    if tmp:
        return tmp.data[0], tmp.data[1]
    X = []
    y = []
    _min, _max = float('inf'), -float('inf')
    for _ in tqdm(range(n_samples)):
        rand_class = np.random.choice(class_dist.index, p=prob_dist)
        file = np.random.choice(df[df.bird_names == rand_class].index)
        rate, wav = wavfile.read('clean/' + file)
        label = df.at[file, 'bird_names']
        rand_index = np.random.randint(0, wav.shape[0]-config.step)
        sample = wav[rand_index:rand_index+config.step]
        X_sample = mfcc(sample, rate,
                       numcep=config.nfeat,
                       nfilt=config.nfilt, 
                       nfft=config.nfft)
        _min = min(np.amin(X_sample), _min)
        _max = max(np.amax(X_sample), _max)
        X.append(X_sample)
        y.append(classes.index(label)) #encoding classes 0-9
    config.min = _min
    config.max = _max
    X, y = np.array(X), np.array(y)
    X = (X - _min) / (_max - _min)
    if config.mode == 'conv':
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    elif config.mode == 'time':
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2])
    y = to_categorical(y, num_classes=17)
    config.data = (X, y)
    
    with open(config.p_path, 'wb') as handle:
        pickle.dump(config, handle, protocol=2) #2 to crossversion (Highest Protocol otherwise)
    
    return X, y
        
        
def get_conv_model():
    model = Sequential()
    model.add(Conv2D(16, (3,3), activation='relu', strides=(1,1),
                    padding='same', input_shape=input_shape))
    model.add(Conv2D(32, (3,3), activation='relu', strides=(1,1), 
                    padding='same'))
    model.add(Conv2D(64, (3,3), activation='relu', strides=(1,1), 
                    padding='same'))
    model.add(Conv2D(128, (3,3), activation='relu', strides=(1,1), 
                    padding='same'))
    model.add(MaxPool2D((2,2)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(17, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', 
                 optimizer='adam', metrics=['acc'])
    return model

## Train Model

In [None]:
config = Config(mode='conv')

if config.mode == 'conv':
    X, y = build_rand_feat()
    y_flat = np.argmax(y, axis=1)
    input_shape = (X.shape[1], X.shape[2], 1)
    model = get_conv_model()
    
class_weight = compute_class_weight('balanced', np.unique(y_flat), y_flat)


checkpoint = ModelCheckpoint(config.model_path, monitor='val_acc',
                             verbose=1, mode='max', save_best_only=True,
                            save_weights_only=False, period=1)

early_stop = EarlyStopping(
    monitor ='val_loss',
    min_delta=0,
    patience=5)

model.fit(X, y, epochs=10, batch_size=32, 
         shuffle=True, 
         class_weight=class_weight, validation_split=0.1,
         callbacks=[checkpoint, early_stop])

model.save(config.model_path)

 66%|██████▌   | 2035250/3087940 [2:06:12<1:02:07, 282.40it/s]

## Build Predictions

In [None]:
def build_predictions(audio_dir):
    y_true = []
    y_pred = []
    fn_prob = {}
    print('Extracting features from audio')
    for filename in tqdm(os.listdir(audio_dir)):
        rate, wav = wavfile.read(os.path.join(audio_dir, filename))
        label = fn2class[filename]
        c = classes.index(label)
        y_prob = []
        for i in range(0, wav.shape[0]-config.step, config.step):
            sample = wav[i:i+config.step]
            x = mfcc(sample, rate,
                       numcep=config.nfeat,
                       nfilt=config.nfilt, 
                       nfft=config.nfft)
            x = (x - config.min) / (config.max - config.min)
            x = x.reshape(1, x.shape[0], x.shape[1], 1)
            y_hat = model.predict(x)
            y_prob.append(y_hat)
            y_pred.append(np.argmax(y_hat))
            y_true.append(c)
        fn_prob[fn] = np.mean(y_prob, axis=0).flatten()
    return y_true, y_pred, fn_prob

config = Config(mode='conv')

fname = []
bird = []    
import os,sys
folder = 'new_audio/'
for filename in os.listdir(folder):
    infilename = os.path.join(folder,filename)
    if not os.path.isfile(infilename): continue
    oldbase = os.path.splitext(filename)
    newname = infilename.replace('.m4a', '.wav')
    print(newname[:-4].split('/')[1])
    fname.append(newname.split('/')[1])
    bird.append(newname[:-4].split('/')[1])
    output = os.rename(infilename, newname)

dct = {}
dct['fname'] = fname
dct['label'] = bird
df = pd.DataFrame(dct)

classes = list(np.unique(df.label))
fn2class = dict(zip(df.fname, df.label))
# p_path = os.path.join('pickles_full', 'conv.p')

with open(config.p_path, 'rb') as handle:
    config = pickle.load(handle)

model = load_model(config.model_path)

y_true, y_pred, fn_prob = build_predictions('new_audio')

acc_scrore = accuracy_score(y_true=y_true, y_pred=y_pred)

y_probs = []

for i, row in df.iterrows():
    y_prob = fn_prob[row.fname]
    y_probs.append(y_prob)
    for c, p in zip(classes, y_prob):
        df.at[i, c] = p
        

y_pred = [classes[np.argmax(y)] for y in y_probs]

df['y_pred'] = y_pred

df.to_csv('predictions.csv',index=False)