Load modules

In [1]:
from utils import *
import numpy as np
import pandas as pd
import IPython

from keras import layers
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv1D, MaxPooling1D, Dropout
from keras.models import Model

loaded train_cat with 942.0454375 sec of audio
loaded train_dog with 317.2408125 sec of audio
loaded test_cat with 381.8525 sec of audio
loaded test_dog with 281.1975 sec of audio


Using TensorFlow backend.


Read csv and set home directory

In [2]:
## DOWNLOAD DATA (ZIP) HERE
# URL = 'https://www.kaggle.com/mmoreaux/audio-cats-and-dogs/downloads/audio-cats-and-dogs.zip/5'

ROOT_DIR = 'input/cats_dogs/'
CSV_PATH = 'input/train_test_split.csv'

df = pd.read_csv(CSV_PATH)

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,test_cat,test_dog,train_cat,train_dog
0,0,cat_22.wav,dog_barking_97.wav,cat_99.wav,dog_barking_33.wav
1,1,cat_116.wav,dog_barking_0.wav,cat_54.wav,dog_barking_86.wav
2,2,cat_155.wav,dog_barking_93.wav,cat_34.wav,dog_barking_45.wav
3,3,cat_58.wav,dog_barking_10.wav,cat_132.wav,dog_barking_76.wav
4,4,cat_77.wav,dog_barking_26.wav,cat_124.wav,dog_barking_4.wav


Count WAVs 

In [4]:
[(name, sum(df[name].isnull() == False)) for name in df.columns]

[('Unnamed: 0', 115),
 ('test_cat', 49),
 ('test_dog', 49),
 ('train_cat', 115),
 ('train_dog', 64)]

Play WAV sample

In [5]:
IPython.display.Audio("./input/cats_dogs/" + df['test_cat'][5])

In [6]:
IPython.display.Audio("./input/cats_dogs/" + df['test_dog'][5])

Load WAVs using sci_wav

In [7]:
dataset = load_dataset(df)

[(k, dataset[k].shape) for k in dataset.keys()]

loaded train_cat with 942.0454375 sec of audio
loaded train_dog with 317.2408125 sec of audio
loaded test_cat with 381.8525 sec of audio
loaded test_dog with 281.1975 sec of audio


[('train_cat', (15072727,)),
 ('train_dog', (5075853,)),
 ('test_cat', (6109640,)),
 ('test_dog', (4499160,))]

Set paramters for batch size (16000 units = 1s in WAV) and calculate number of epochs in each dataset

In [8]:
batch_shape = (20, 32000)
s_per_batch = batch_shape[0]
s_len = batch_shape[1]

epoch_train = int(dataset['train_cat'].shape[0] / s_len / s_per_batch)
epoch_test = int(dataset['test_cat'].shape[0] / s_len / s_per_batch)

print('train: ', epoch_train)
print('test: ', epoch_test)

train:  23
test:  9


Build CNN Model

In [9]:
x_input = Input((s_len, 1))
X = Conv1D(filters = 30, kernel_size = 50, strides = 5, padding = 'valid')(x_input)
X = Conv1D(filters = 60, kernel_size = 10, strides = 2, padding = 'valid')(X)
X = MaxPooling1D(pool_size = 2, strides = 1, padding = 'valid')(X)
X = Activation('relu')(X)
X = BatchNormalization()(X)
X = Flatten()(X)
X = Dropout(0.25)(X)
X = Dense(1, activation = 'sigmoid')(X)

model = Model(inputs = x_input, outputs = X)
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32000, 1)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 6391, 30)          1530      
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 3191, 60)          18060     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3190, 60)          0         
_________________________________________________________________
activation_1 (Activation)    (None, 3190, 60)          0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 3190, 60)          240       
_________________________________________________________________
flatten_1 (Flatten)          (None, 191400)            0         
__________

In [11]:
model.fit_generator(generator = dataset_gen(is_train = True, batch_shape = batch_shape, sample_augmentation = 1),
                    steps_per_epoch = epoch_train,
                    validation_data = dataset_gen(is_train = False, batch_shape = batch_shape), 
                    validation_steps = epoch_test,
                    epochs = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x118ca7b8>

In [12]:
# valid_gen = dataset_gen(is_train = False, batch_shape = batch_shape)
# valid_data = [next(valid_gen) for i in range(epoch_test)]

# pred_prob = [model.predict(x[0]) for x in valid_data]
# pred_prob = np.array(pred_prob).flatten()
# valid_y = [y[1] for y in valid_data]

# pred_label = 1 * (pred_prob > 0.5)

# acc = np.mean(pred_label == valid_y)
# acc