<a href="https://colab.research.google.com/github/dalton-rutledge/Tweeter/blob/master/MFCCExtraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#install pudub before anything else for some sound processing functionality
! pip install pydub

Collecting pydub
  Downloading https://files.pythonhosted.org/packages/79/db/eaf620b73a1eec3c8c6f8f5b0b236a50f9da88ad57802154b7ba7664d0b8/pydub-0.23.1-py2.py3-none-any.whl
Installing collected packages: pydub
Successfully installed pydub-0.23.1


In [0]:
'''
Dalton Rutledge
Tweeter Model Development 2019-2020 Capstone

This file contains methods and code that allows you to extract MFCCs from all audio files in a directory (in google drive) and save that data as a csv
This file will also be coding out data into numeric classes 0 through 9

CLASS to number code LIST:
    0 - American Crow
    1 - Black-capped Chickadee
    2 - Cactus Wren
    3 - House Finch
    4 - Mourning Dove 
    5 - Northern Cardinal
    6 - Wood Thrush (Call)
    7 - Wood Thrush (Song)
    8 - Tufted Titmouse
    9 - White Breasted Nuthatch
'''

###IMPORTANT: MOUNT THE DRIVE BEFORE RUNNING!

import librosa
import os
import numpy as np
import pandas
import pydub
from pydub import AudioSegment

data_directories = ["/content/drive/My Drive/Tweeter/trainingData/AmericanCrow",
                    "/content/drive/My Drive/Tweeter/trainingData/BlackcappedChickadee",
                    "/content/drive/My Drive/Tweeter/trainingData/CactusWren",
                    "/content/drive/My Drive/Tweeter/trainingData/HouseFinch",
                    "/content/drive/My Drive/Tweeter/trainingData/MourningDove",
                    "/content/drive/My Drive/Tweeter/trainingData/NorthernCardinal",
                    "/content/drive/My Drive/Tweeter/trainingData/WoodThrushCall",
                    "/content/drive/My Drive/Tweeter/trainingData/WoodThrushSong",
                    "/content/drive/My Drive/Tweeter/trainingData/tuftedTitMouse",
                    "/content/drive/My Drive/Tweeter/trainingData/whiteBreastedNuthatch"
                    ]


#Uses librosa to extract mfccs from a single audio file. Returns mfccs. 
def extractFeatures(file_path):
    audio, sample_rate = librosa.load(file_path)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=20)
    mfccs_processed = np.mean(mfccs.T,axis=0)
    return mfccs_processed

#Calls extractFeatures on all .mp3 files (converted to .wav for librosa) in a list of directories, and saves this data as a csv
def getData(list_of_directories):
    features = []
    class_label = 0
    for directory in data_directories:
        for filename in os.listdir(directory):
            if filename.endswith(".mp3"):
                file_path = os.path.join(os.path.abspath(directory), filename)
                data = extractFeatures(file_path)
                features.append([list(data), class_label])
        class_label += 1
    featuresDF = pandas.DataFrame(features)
    featuresDF.to_csv('/content/drive/My Drive/Tweeter/trainingData/FinalTESTMFCC.csv')
    return featuresDF

#featuresdf = getData(data_directories)    UNCOMMENT THIS LINE OF CODE TO GENERATE A NEW FEATURES CSV
print("i worked good :)")

i worked good :)


In [0]:
#run this code block to see what returned mfccs look like, and how we average them
audio, sample_rate = librosa.load("/content/drive/My Drive/Tweeter/trainingData/AmericanCrow/1.mp3")
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=20)
print(mfccs)                       
print(len(mfccs[0]))
print(np.mean(mfccs[0]))
mfccs_processed = np.mean(mfccs.T,axis=0)
print(mfccs_processed)
print(len(mfccs_processed))

[[-546.17537466 -546.17537466 -402.77646135 ... -185.11302292
  -256.14063676 -478.73061532]
 [   0.            0.           52.33208305 ...   40.4833786
    42.39417025   36.97722662]
 [   0.            0.          -10.76096625 ...   -5.25841996
     1.56990882    9.99687778]
 ...
 [   0.            0.           -3.70000658 ...   -7.9374226
   -10.14073705   -8.60791055]
 [   0.            0.           -3.97941192 ...   -1.94960676
    -6.20989014   -2.23488975]
 [   0.            0.           -4.06994471 ...    2.31235319
     3.2934662     3.45470103]]
236
-107.78727091222292
[-107.78727091   27.8873093   -52.25086789  -21.28091671  -18.66253111
   19.92436765  -18.64387288  -14.76690476  -27.22685445    8.33763415
  -13.91587007    3.39686486  -11.55909716    2.32430421  -18.52766984
   -1.21917856   -7.1912224    -6.23804845   -1.98997334    0.55126912]
20


In [0]:
import numpy as np
import pandas
import librosa
from scipy.io import wavfile as wav
from sklearn import metrics 
from sklearn.model_selection import train_test_split 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import tensorflow.keras

In [0]:
featuresdf.rename(columns={0:'features',1:'label'},inplace=True)

In [0]:
# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.features.tolist())
y = np.array(featuresdf.label.tolist())
# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

In [0]:
# split the dataset 
x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 128)
print(x_train[0].shape)

(20,)


In [0]:
num_labels = yy.shape[1]
filter_size = 2
model = None
def build_model_graph(input_shape=(20,)):
    model = Sequential()
    model.add(Dense(512, activation='relu', input_dim=20))
    model.add(Dropout(0.5))
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_labels))
    model.add(Activation('softmax'))
    # Compile the model
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model
    
model = build_model_graph()

In [0]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 
num_epochs = 100
num_batch_size = 32
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1)

Train on 296 samples, validate on 74 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

<keras.callbacks.callbacks.History at 0x7f0fdcd1ef28>

In [0]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: {0:.2%}".format(score[1]))
score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: {0:.2%}".format(score[1]))

Training Accuracy: 98.99%
Testing Accuracy: 91.89%


In [0]:
import keras
ANN_file = "/content/drive/My Drive/Tweeter/FinalKerasTEST.h5"
keras.models.save_model(model, ANN_file)

In [0]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               10752     
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
activation_1 (Activation)    (None, 512)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
_________________________________________________________________
activation_2 (Activation)    (None, 10)               