In [1]:
import pandas as pd
import numpy as np
import librosa
from librosa import display
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import Adam
from keras.utils import to_categorical

In [7]:
# Here we will convert all the mp3 files to wav files
import subprocess
import os 

# Get a list of file paths
base_path = 'train_audio/'
#bird_paths = os.walk(base_path)
#list(os.walk(base_path))
bird_dirs = os.listdir(base_path)
bird_dirs = [base_path + bird_dir + '/' for bird_dir in bird_dirs]
bird_dirs = list(filter(lambda x: '.' not in x, bird_dirs))
bird_paths = [bird_dir + bird_file for bird_dir in bird_dirs for bird_file in os.listdir(bird_dir)]
print(len(bird_paths))

wav_paths = [bird_dir + bird_file for bird_dir in bird_dirs for bird_file in os.listdir(bird_dir)]
wav_paths = list(filter(lambda x: '.wav' in x, wav_paths))
print(len(wav_paths))
for wav_path in wav_paths:
    subprocess.run(['rm', wav_path])

def convert_to_wav(path):
    wav_path = path.strip('.mp3')
    wav_path = wav_path + '.wav'
    subprocess.run(['ffmpeg', '-i', path, wav_path])
    return wav_path

def delete_file(path):
    subprocess.run(['rm', path])
    return

#convert_to_wav(bird_paths)

21375
0


In [8]:
meta_csv_path = 'train.csv'
meta_data = pd.read_csv(meta_csv_path)
# We drop all audio files with a rating below 4 points, this still leaves some 15 thousand files.
meta_data = meta_data[meta_data.rating >= 4]
meta_data.shape
meta_data.columns

Index(['rating', 'playback_used', 'ebird_code', 'channels', 'date', 'pitch',
       'duration', 'filename', 'speed', 'species', 'number_of_notes', 'title',
       'secondary_labels', 'bird_seen', 'sci_name', 'location', 'latitude',
       'sampling_rate', 'type', 'elevation', 'description', 'bitrate_of_mp3',
       'file_type', 'volume', 'background', 'xc_id', 'url', 'country',
       'author', 'primary_label', 'longitude', 'length', 'time', 'recordist',
       'license'],
      dtype='object')

In [9]:
#test_dataset = meta_data.sample(n=10)
#test_dataset.shape

def get_fp_from_df(df):
    base_path = "train_audio/"
    codes = df['ebird_code']
    filenames = df['filename']
    path_suffix = list(zip(codes,filenames))
    path_suffix = list(map(lambda x: x[0]+'/'+x[1], path_suffix))
    paths = [base_path + path for path in path_suffix]
    return paths

def get_mpccs(path):
    data, sample_rate = librosa.load(path, res_type='kaiser_fast')
    mfcc = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
    mfcc_processed = np.mean(mfcc.T, axis=0)
    return (mfcc_processed,sample_rate)
    
def extract_features(df):
    mp3_paths = get_fp_from_df(df)
    mfcc_list = []
    librosa_srs = []
    for path in mp3_paths:
        wav_path = convert_to_wav(path)
        extracted = get_mpccs(wav_path)
        mfcc_list.append(extracted[0])
        librosa_srs.append(extracted[1])
        delete_file(wav_path)
    df['mfccs'] = mfcc_list
    df['librosa_sample_rate'] = librosa_srs
    return df

feature_df = extract_features(meta_data)
feature_df.to_csv('feature.csv', index=False)

#test_df = pd.read_csv('feature.csv')

In [61]:
# Convert features and output labels to numpy arrays
X = np.array(feature_df.mfccs.tolist())
y = np.array(feature_df.ebird_code.tolist())
print(type(X[1057][0]))
# Label Encoder
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state=127)

<class 'numpy.float32'>


In [70]:
# The actuall feedforward neural network architecture
num_labels = yy.shape[1]
filter_size = 2
def build_model_graph(input_shape=(40,)):
    model = Sequential()
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_labels))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model

model = build_model_graph()
model.layers
#model.summary()
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: {}".format(accuracy))

Pre-training accuracy: 0.2585649723187089


In [72]:
from keras.callbacks import ModelCheckpoint
#from datetime import datetime
import time

num_epochs = 100
num_batch_size = 32

start_time = time.time
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test,y_test),verbose=1)
stop_time = time.time
print("Training time: {}".format(stop_time-start_time))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


TypeError: unsupported operand type(s) for -: 'builtin_function_or_method' and 'builtin_function_or_method'

In [76]:
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: {0:.2%}".format(score[1]))

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: {0:.2%}".format(score[1]))

Training Accuracy: 0.74%
Testing Accuracy: 0.36%
