In [1]:
# Change this to True to replicate the result
COMPLETE_RUN = True

In [2]:
import numpy as np
np.random.seed(1001)

import os
import shutil

import IPython
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from keras.callbacks import ReduceLROnPlateau
from sklearn.cross_validation import StratifiedKFold

%matplotlib inline
matplotlib.style.use('ggplot')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/sample_submission.csv")

In [4]:
import librosa
import numpy as np
import scipy
from keras import losses, models, optimizers
from keras.activations import relu, softmax
from keras.callbacks import (EarlyStopping, LearningRateScheduler,
                             ModelCheckpoint, TensorBoard, ReduceLROnPlateau)
from keras.layers import (Convolution1D, Dense, Dropout, GlobalAveragePooling1D, 
                          GlobalMaxPool1D, Input, MaxPool1D, concatenate)
from keras.utils import Sequence, to_categorical

In [5]:
# import tensorflow as tf
# from keras.backend.tensorflow_backend import set_session
# config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.3
# set_session(tf.Session(config=config))

In [6]:
class Config(object):
    def __init__(self,
                 sampling_rate=16000, audio_duration=2, n_classes=41,
                 use_mfcc=False, n_folds=10, learning_rate=0.0001, 
                 max_epochs=50, n_mfcc=20):
        self.sampling_rate = sampling_rate
        self.audio_duration = audio_duration
        self.n_classes = n_classes
        self.use_mfcc = use_mfcc
        self.n_mfcc = n_mfcc
        self.n_folds = n_folds
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs

        self.audio_length = self.sampling_rate * self.audio_duration
        if self.use_mfcc:
            self.dim = (self.n_mfcc, 1 + int(np.floor(self.audio_length/512)), 1)
        else:
            self.dim = (self.audio_length, 1)

In [7]:
class DataGenerator(Sequence):
    def __init__(self, config, data_dir, list_IDs, labels=None, 
                 batch_size=64, preprocessing_fn=lambda x: x):
        self.config = config
        self.data_dir = data_dir
        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.preprocessing_fn = preprocessing_fn
        self.on_epoch_end()
        self.dim = self.config.dim

    def __len__(self):
        return int(np.ceil(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        return self.__data_generation(list_IDs_temp)

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))

    def __data_generation(self, list_IDs_temp):
        cur_batch_size = len(list_IDs_temp)
        X = np.empty((cur_batch_size, *self.dim))

        input_length = self.config.audio_length
        for i, ID in enumerate(list_IDs_temp):
            file_path = self.data_dir + ID
            
            # Read and Resample the audio
            data, _ = librosa.core.load(file_path, sr=self.config.sampling_rate,
                                        res_type='kaiser_fast')

            # Random offset / Padding
            if len(data) > input_length:
                max_offset = len(data) - input_length
                offset = np.random.randint(max_offset)
                data = data[offset:(input_length+offset)]
            else:
                if input_length > len(data):
                    max_offset = input_length - len(data)
                    offset = np.random.randint(max_offset)
                else:
                    offset = 0
                data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
                
            # Normalization + Other Preprocessing
            if self.config.use_mfcc:
                data = librosa.feature.mfcc(data, sr=self.config.sampling_rate,
                                                   n_mfcc=self.config.n_mfcc)
                data = np.expand_dims(data, axis=-1)
            else:
                data = self.preprocessing_fn(data)[:, np.newaxis]
            X[i,] = data

        if self.labels is not None:
            y = np.empty(cur_batch_size, dtype=int)
            for i, ID in enumerate(list_IDs_temp):
                y[i] = self.labels[ID]
            return X, to_categorical(y, num_classes=self.config.n_classes)
        else:
            return X

In [8]:
def audio_norm(data):
    max_data = np.max(data)
    min_data = np.min(data)
    data = (data-min_data)/(max_data-min_data+1e-6)
    return data-0.5

In [9]:
def get_1d_dummy_model(config):
    
    nclass = config.n_classes
    input_length = config.audio_length
    
    inp = Input(shape=(input_length,1))
    x = GlobalMaxPool1D()(inp)
    out = Dense(nclass, activation=softmax)(x)

    model = models.Model(inputs=inp, outputs=out)
    opt = optimizers.Adam(config.learning_rate)

    model.compile(optimizer=opt, loss=losses.categorical_crossentropy, metrics=['acc'])
    return model

def get_1d_conv_model(config):
    
    nclass = config.n_classes
    input_length = config.audio_length
    
    inp = Input(shape=(input_length,1))
    x = Convolution1D(16, 9, activation=relu, padding="valid")(inp)
    x = Convolution1D(16, 9, activation=relu, padding="valid")(x)
    x = MaxPool1D(16)(x)
    x = Dropout(rate=0.1)(x)
    
    x = Convolution1D(32, 3, activation=relu, padding="valid")(x)
    x = Convolution1D(32, 3, activation=relu, padding="valid")(x)
    x = MaxPool1D(4)(x)
    x = Dropout(rate=0.1)(x)
    
    x = Convolution1D(32, 3, activation=relu, padding="valid")(x)
    x = Convolution1D(32, 3, activation=relu, padding="valid")(x)
    x = MaxPool1D(4)(x)
    x = Dropout(rate=0.1)(x)
    
    x = Convolution1D(256, 3, activation=relu, padding="valid")(x)
    x = Convolution1D(256, 3, activation=relu, padding="valid")(x)
    x = GlobalMaxPool1D()(x)
    x = Dropout(rate=0.2)(x)

    x = Dense(64, activation=relu)(x)
    x = Dense(1028, activation=relu)(x)
    out = Dense(nclass, activation=softmax)(x)

    model = models.Model(inputs=inp, outputs=out)
    opt = optimizers.Adam(config.learning_rate)

    model.compile(optimizer=opt, loss=losses.categorical_crossentropy, metrics=['acc'])
    return model

In [10]:
LABELS = list(train.label.unique())
label_idx = {label: i for i, label in enumerate(LABELS)}
train.set_index("fname", inplace=True)
test.set_index("fname", inplace=True)
train["label_idx"] = train.label.apply(lambda x: label_idx[x])
if not COMPLETE_RUN:
    train = train[:2000]
    test = test[:2000]

In [11]:
config = Config(sampling_rate=16000, audio_duration=2, n_folds=10, learning_rate=0.001)

In [12]:
PREDICTION_FOLDER = "./predictions_1d_conv/"
if not os.path.exists(PREDICTION_FOLDER):
    os.mkdir(PREDICTION_FOLDER)
if os.path.exists('logs/' + PREDICTION_FOLDER):
    shutil.rmtree('logs/' + PREDICTION_FOLDER)

skf = StratifiedKFold(train.label_idx, n_folds=config.n_folds,
                      shuffle=True, random_state=13)

for i, (train_split, val_split) in enumerate(skf):
    train_set = train.iloc[train_split]
    val_set = train.iloc[val_split]
    checkpoint = ModelCheckpoint('../model/best_%d.h5'%i, monitor='val_loss', verbose=1, save_best_only=True,
                                save_weights_only=False)
    early = EarlyStopping(monitor="val_loss", mode="min", patience=8)
    tb = TensorBoard(log_dir='./logs/' + PREDICTION_FOLDER + '/fold_%d'%i, write_graph=True)
    rLR = ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=5, verbose=1, mode='min')

    callbacks_list = [checkpoint, early, tb, rLR]
    print("Fold: ", i)
    print("#"*50)

    model = get_1d_conv_model(config)


    train_generator = DataGenerator(config, '../data/audio_train/', train_set.index, 
                                    train_set.label_idx, batch_size=64,
                                    preprocessing_fn=audio_norm)
    val_generator = DataGenerator(config, '../data/audio_train/', val_set.index, 
                                  val_set.label_idx, batch_size=64,
                                  preprocessing_fn=audio_norm)

    history = model.fit_generator(train_generator, callbacks=callbacks_list, validation_data=val_generator,
                                  epochs=500, use_multiprocessing=True, workers=6, max_queue_size=40)

#     model.load_weights('best_%d.h5'%i)

#     # Save train predictions
#     train_generator = DataGenerator(config, './dataset/audio_train/', train.index, batch_size=128,
#                                     preprocessing_fn=audio_norm)
#     predictions = model.predict_generator(train_generator, use_multiprocessing=True, 
#                                           workers=6, max_queue_size=40, verbose=1)
#     np.save(PREDICTION_FOLDER + "/train_predictions_%d.npy"%i, predictions)

#     # Save test predictions
#     test_generator = DataGenerator(config, './dataset/audio_test/', test.index, batch_size=128,
#                                     preprocessing_fn=audio_norm)
#     predictions = model.predict_generator(test_generator, use_multiprocessing=True, 
#                                           workers=6, max_queue_size=40, verbose=1)
#     np.save(PREDICTION_FOLDER + "/test_predictions_%d.npy"%i, predictions)

#     # Make a submission file
#     top_3 = np.array(LABELS)[np.argsort(-predictions, axis=1)[:, :3]]
#     predicted_labels = [' '.join(list(x)) for x in top_3]
#     test['label'] = predicted_labels
#     test[['label']].to_csv(PREDICTION_FOLDER + "/predictions_%d.csv"%i)

Instructions for updating:
Use the retry module or similar alternatives.
Fold:  0
##################################################
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
Epoch 1/500

Epoch 00001: val_loss improved from inf to 3.08498, saving model to best_0.h5
Epoch 2/500

Epoch 00002: val_loss improved from 3.08498 to 2.89189, saving model to best_0.h5
Epoch 3/500

Epoch 00003: val_loss improved from 2.89189 to 2.55508, saving model to best_0.h5
Epoch 4/500

Epoch 00004: val_loss improved from 2.55508 to 2.40100, saving model to best_0.h5
Epoch 5/500

Epoch 00005: val_loss improved from 2.40100 to 2.26315, saving model to best_0.h5
Epoch 6/500

Epoch 00006: val_loss did not improve from 2.26315
Epoch 7/500

Epoch 00007: val_loss improved from 2.26315 to 2.08872, saving model to best_0.h5
Epoch 8/500

Epoch 00008: val_loss did not improve from 2.08872
Epoch 9/500

Epoch 00009: val_loss improved from 2.08872 to 2.04632, saving model to best_

Epoch 31/500

Epoch 00031: val_loss did not improve from 1.55889
Epoch 32/500

Epoch 00032: val_loss did not improve from 1.55889
Epoch 33/500

Epoch 00033: val_loss did not improve from 1.55889
Epoch 34/500

Epoch 00034: val_loss improved from 1.55889 to 1.53586, saving model to best_1.h5
Epoch 35/500

Epoch 00035: val_loss improved from 1.53586 to 1.48413, saving model to best_1.h5
Epoch 36/500

Epoch 00036: val_loss did not improve from 1.48413
Epoch 37/500

Epoch 00037: val_loss did not improve from 1.48413
Epoch 38/500

Epoch 00038: val_loss did not improve from 1.48413
Epoch 39/500

Epoch 00039: val_loss did not improve from 1.48413
Epoch 40/500

Epoch 00040: val_loss did not improve from 1.48413

Epoch 00040: ReduceLROnPlateau reducing learning rate to 0.0007000000332482159.
Epoch 41/500

Epoch 00041: val_loss improved from 1.48413 to 1.47177, saving model to best_1.h5
Epoch 42/500

Epoch 00042: val_loss improved from 1.47177 to 1.41831, saving model to best_1.h5
Epoch 43/500

E


Epoch 00060: val_loss did not improve from 1.29111
Epoch 61/500

Epoch 00061: val_loss did not improve from 1.29111

Epoch 00061: ReduceLROnPlateau reducing learning rate to 0.0004900000232737511.
Epoch 62/500

Epoch 00062: val_loss did not improve from 1.29111
Epoch 63/500

Epoch 00063: val_loss improved from 1.29111 to 1.28613, saving model to best_2.h5
Epoch 64/500

Epoch 00064: val_loss improved from 1.28613 to 1.28496, saving model to best_2.h5
Epoch 65/500

Epoch 00065: val_loss did not improve from 1.28496
Epoch 66/500

Epoch 00066: val_loss did not improve from 1.28496
Epoch 67/500

Epoch 00067: val_loss did not improve from 1.28496
Epoch 68/500

Epoch 00068: val_loss did not improve from 1.28496
Epoch 69/500

Epoch 00069: val_loss did not improve from 1.28496

Epoch 00069: ReduceLROnPlateau reducing learning rate to 0.00034300000406801696.
Epoch 70/500

Epoch 00070: val_loss did not improve from 1.28496
Epoch 71/500

Epoch 00071: val_loss did not improve from 1.28496
Epoch 72

Epoch 28/500

Epoch 00028: val_loss improved from 1.56580 to 1.55372, saving model to best_3.h5
Epoch 29/500

Epoch 00029: val_loss did not improve from 1.55372
Epoch 30/500

Epoch 00030: val_loss did not improve from 1.55372
Epoch 31/500

Epoch 00031: val_loss did not improve from 1.55372
Epoch 32/500

Epoch 00032: val_loss did not improve from 1.55372
Epoch 33/500

Epoch 00033: val_loss did not improve from 1.55372

Epoch 00033: ReduceLROnPlateau reducing learning rate to 0.0007000000332482159.
Epoch 34/500

Epoch 00034: val_loss did not improve from 1.55372
Epoch 35/500

Epoch 00035: val_loss improved from 1.55372 to 1.55050, saving model to best_3.h5
Epoch 36/500

Epoch 00036: val_loss improved from 1.55050 to 1.51046, saving model to best_3.h5
Epoch 37/500

Epoch 00037: val_loss did not improve from 1.51046
Epoch 38/500

Epoch 00038: val_loss improved from 1.51046 to 1.43432, saving model to best_3.h5
Epoch 39/500

Epoch 00039: val_loss did not improve from 1.43432
Epoch 40/500

E


Epoch 00061: val_loss did not improve from 1.26470
Epoch 62/500

Epoch 00062: val_loss did not improve from 1.26470
Epoch 63/500

Epoch 00063: val_loss improved from 1.26470 to 1.25985, saving model to best_4.h5
Epoch 64/500

Epoch 00064: val_loss did not improve from 1.25985
Epoch 65/500

Epoch 00065: val_loss improved from 1.25985 to 1.24476, saving model to best_4.h5
Epoch 66/500

Epoch 00066: val_loss improved from 1.24476 to 1.23838, saving model to best_4.h5
Epoch 67/500

Epoch 00067: val_loss did not improve from 1.23838
Epoch 68/500

Epoch 00068: val_loss did not improve from 1.23838
Epoch 69/500

Epoch 00069: val_loss did not improve from 1.23838
Epoch 70/500

Epoch 00070: val_loss did not improve from 1.23838
Epoch 71/500

Epoch 00071: val_loss did not improve from 1.23838

Epoch 00071: ReduceLROnPlateau reducing learning rate to 0.00024009999469853935.
Epoch 72/500

Epoch 00072: val_loss improved from 1.23838 to 1.20484, saving model to best_4.h5
Epoch 73/500

Epoch 00073: 


Epoch 00020: val_loss did not improve from 1.59036
Epoch 21/500

Epoch 00021: val_loss did not improve from 1.59036
Epoch 22/500

Epoch 00022: val_loss improved from 1.59036 to 1.56660, saving model to best_5.h5
Epoch 23/500

Epoch 00023: val_loss did not improve from 1.56660
Epoch 24/500

Epoch 00024: val_loss improved from 1.56660 to 1.53727, saving model to best_5.h5
Epoch 25/500

Epoch 00025: val_loss did not improve from 1.53727
Epoch 26/500

Epoch 00026: val_loss improved from 1.53727 to 1.52900, saving model to best_5.h5
Epoch 27/500

Epoch 00027: val_loss did not improve from 1.52900
Epoch 28/500

Epoch 00028: val_loss improved from 1.52900 to 1.49528, saving model to best_5.h5
Epoch 29/500

Epoch 00029: val_loss did not improve from 1.49528
Epoch 30/500

Epoch 00030: val_loss did not improve from 1.49528
Epoch 31/500

Epoch 00031: val_loss did not improve from 1.49528
Epoch 32/500

Epoch 00032: val_loss did not improve from 1.49528
Epoch 33/500

Epoch 00033: val_loss improved

Epoch 35/500

Epoch 00035: val_loss did not improve from 1.51277
Epoch 36/500

Epoch 00036: val_loss did not improve from 1.51277
Epoch 37/500

Epoch 00037: val_loss improved from 1.51277 to 1.47018, saving model to best_6.h5
Epoch 38/500

Epoch 00038: val_loss did not improve from 1.47018
Epoch 39/500

Epoch 00039: val_loss improved from 1.47018 to 1.44561, saving model to best_6.h5
Epoch 40/500

Epoch 00040: val_loss did not improve from 1.44561
Epoch 41/500

Epoch 00041: val_loss did not improve from 1.44561
Epoch 42/500

Epoch 00042: val_loss did not improve from 1.44561
Epoch 43/500

Epoch 00043: val_loss did not improve from 1.44561
Epoch 44/500

Epoch 00044: val_loss improved from 1.44561 to 1.42312, saving model to best_6.h5
Epoch 45/500

Epoch 00045: val_loss did not improve from 1.42312
Epoch 46/500

Epoch 00046: val_loss did not improve from 1.42312
Epoch 47/500

Epoch 00047: val_loss did not improve from 1.42312
Epoch 48/500

Epoch 00048: val_loss did not improve from 1.423


Epoch 00057: val_loss did not improve from 1.38961
Epoch 58/500

Epoch 00058: val_loss did not improve from 1.38961
Epoch 59/500

Epoch 00059: val_loss did not improve from 1.38961
Fold:  8
##################################################
Epoch 1/500

Epoch 00001: val_loss improved from inf to 3.02515, saving model to best_8.h5
Epoch 2/500

Epoch 00002: val_loss improved from 3.02515 to 2.82117, saving model to best_8.h5
Epoch 3/500

Epoch 00003: val_loss improved from 2.82117 to 2.65691, saving model to best_8.h5
Epoch 4/500

Epoch 00004: val_loss improved from 2.65691 to 2.44997, saving model to best_8.h5
Epoch 5/500

Epoch 00005: val_loss improved from 2.44997 to 2.33022, saving model to best_8.h5
Epoch 6/500

Epoch 00006: val_loss improved from 2.33022 to 2.24243, saving model to best_8.h5
Epoch 7/500

Epoch 00007: val_loss improved from 2.24243 to 2.15715, saving model to best_8.h5
Epoch 8/500

Epoch 00008: val_loss improved from 2.15715 to 2.15662, saving model to best_8.h5
Ep


Epoch 00038: val_loss did not improve from 1.51385
Epoch 39/500

Epoch 00039: val_loss did not improve from 1.51385
Epoch 40/500

Epoch 00040: val_loss did not improve from 1.51385
Fold:  9
##################################################
Epoch 1/500

Epoch 00001: val_loss improved from inf to 3.03890, saving model to best_9.h5
Epoch 2/500

Epoch 00002: val_loss improved from 3.03890 to 2.77918, saving model to best_9.h5
Epoch 3/500

Epoch 00003: val_loss improved from 2.77918 to 2.55611, saving model to best_9.h5
Epoch 4/500

Epoch 00004: val_loss improved from 2.55611 to 2.43184, saving model to best_9.h5
Epoch 5/500

Epoch 00005: val_loss improved from 2.43184 to 2.28985, saving model to best_9.h5
Epoch 6/500

Epoch 00006: val_loss improved from 2.28985 to 2.27856, saving model to best_9.h5
Epoch 7/500

Epoch 00007: val_loss improved from 2.27856 to 2.12649, saving model to best_9.h5
Epoch 8/500

Epoch 00008: val_loss improved from 2.12649 to 2.00345, saving model to best_9.h5
Ep


Epoch 00037: val_loss did not improve from 1.40763
Epoch 38/500

Epoch 00038: val_loss improved from 1.40763 to 1.38170, saving model to best_9.h5
Epoch 39/500

Epoch 00039: val_loss improved from 1.38170 to 1.32508, saving model to best_9.h5
Epoch 40/500

Epoch 00040: val_loss did not improve from 1.32508
Epoch 41/500

Epoch 00041: val_loss did not improve from 1.32508
Epoch 42/500

Epoch 00042: val_loss did not improve from 1.32508
Epoch 43/500

Epoch 00043: val_loss did not improve from 1.32508
Epoch 44/500

Epoch 00044: val_loss did not improve from 1.32508

Epoch 00044: ReduceLROnPlateau reducing learning rate to 0.0007000000332482159.
Epoch 45/500

Epoch 00045: val_loss did not improve from 1.32508
Epoch 46/500

Epoch 00046: val_loss did not improve from 1.32508
Epoch 47/500

Epoch 00047: val_loss improved from 1.32508 to 1.31916, saving model to best_9.h5
Epoch 48/500

Epoch 00048: val_loss did not improve from 1.31916
Epoch 49/500

Epoch 00049: val_loss did not improve from 1.

In [13]:
# pred_list = []
# for i in range(10):
#     pred_list.append(np.load(PREDICTION_FOLDER +"/test_predictions_%d.npy"%i))
# prediction = np.ones_like(pred_list[0])
# for pred in pred_list:
#     prediction = prediction*pred
# prediction = prediction**(1./len(pred_list))
# # Make a submission file
# top_3 = np.array(LABELS)[np.argsort(-prediction, axis=1)[:, :3]]
# predicted_labels = [' '.join(list(x)) for x in top_3]
# test = pd.read_csv('./dataset/sample_submission.csv')
# test['label'] = predicted_labels
# test[['fname', 'label']].to_csv("1d_conv_ensembled_submission.csv", index=False)