In [1]:
import os
import sys
import json
import datetime
import shutil
import time
import math
from collections import Counter
import hashlib
from  IPython.display import clear_output

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import trange
from tqdm import tqdm
from tqdm import tqdm_notebook
%matplotlib inline


import tensorflow as tf
import keras
import keras.backend as K
from keras.metrics import top_k_categorical_accuracy
from keras.models import Sequential
from keras.layers import (Dense,
                          Dropout,
                          Activation,
                          BatchNormalization)
from keras.models import model_from_json
from keras import (losses,
                   models,
                   optimizers)
from keras.activations import relu, softmax
from keras.callbacks import (EarlyStopping,
                             LearningRateScheduler,
                             ModelCheckpoint,
                             TensorBoard,
                             ReduceLROnPlateau)
from keras.layers import (Convolution1D,
                          Dense,
                          Dropout,
                          GlobalAveragePooling1D, 
                          GlobalMaxPool1D, 
                          Input,
                          MaxPool1D,
                          concatenate)
from keras.utils import (Sequence,
                         to_categorical)

from keras.layers import (Convolution2D,
                          GlobalAveragePooling2D,
                          BatchNormalization,
                          Flatten,
                          GlobalMaxPool2D,
                          MaxPool2D,
                          concatenate,
                          Activation)

import sklearn
from sklearn import preprocessing
from sklearn import model_selection
from sklearn.model_selection import (StratifiedKFold,
                                     KFold)
from sklearn.utils import murmurhash3_32

import wave
import scipy
from scipy.io import wavfile
import librosa


############################################
import constants
# from constants import *
import utils
from utils import (raw_labels_to_indices,
                   PrepareData,
                   get_class_weights,
                   train_and_store_results,
                   top_1_accuracy, top_2_accuracy, top_3_accuracy)
import archs
import archs_2
from archs_2 import (get_1d_conv_model,
                    get_general_2d_conv_model)

SIZE = None
LABELS = ['Hi-hat', 'Saxophone', 'Trumpet', 'Glockenspiel', 'Cello', 'Knock',
       'Gunshot_or_gunfire', 'Clarinet', 'Computer_keyboard',
       'Keys_jangling', 'Snare_drum', 'Writing', 'Laughter', 'Tearing',
       'Fart', 'Oboe', 'Flute', 'Cough', 'Telephone', 'Bark', 'Chime',
       'Bass_drum', 'Bus', 'Squeak', 'Scissors', 'Harmonica', 'Gong',
       'Microwave_oven', 'Burping_or_eructation', 'Double_bass',
       'Shatter', 'Fireworks', 'Tambourine', 'Cowbell', 'Electric_piano',
       'Meow', 'Drawer_open_or_close', 'Applause', 'Acoustic_guitar',
       'Violin_or_fiddle', 'Finger_snapping']
COMPLETE_RUN = True
SIZE_WHEN_NOT_COMPLETE_RUN = 50
SAMPLE_RATE = 44100
CONFIG_PATH = constants.CONFIG_PATH
TRAIN_OFFLINE = 0.01

Using TensorFlow backend.


In [2]:
# from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

# Config

In [3]:
class Config(object):
    def __init__(self,
                 complete_run=None,
                 size_when_not_complete_run=None,
                 train_labels=None,
                 sampling_rate=44100,
                 audio_duration=2,
                 n_classes=41,
                 use_mfcc=False,
                 n_mfcc=40,
                 smooth_factor=0.1,

                 n_folds=5,
                 learning_rate=0.001, 
                 max_epochs=150,
                 batch_size=64,
                 
                 kernel_size=(10, 4),
                 depth_conv=4,
                 depth_dense=1,
                 dense_sizes=None,
                 
                 do_batch_normalization=False,
                 dropout_rate=0.15,
                 filters=32,
                 filter_sizes=None,
                 
                 kernel_sizes_1d=None,
                 maxpool_sizes_1d=None):
        
        if train_labels is None:
            raise ValueError('You must specify train_labels for calculating class_weights')
            
        self.complete_run = complete_run or COMPLETE_RUN
        self.size_when_not_complete_run = size_when_not_complete_run or SIZE_WHEN_NOT_COMPLETE_RUN
        
        self.smooth_factor = smooth_factor
        self.class_weights = get_class_weights(train_labels, smooth_factor=self.smooth_factor)
        
        self.sampling_rate = sampling_rate
        self.audio_duration = audio_duration
        self.n_classes = n_classes
        self.use_mfcc = use_mfcc
        self.n_mfcc = n_mfcc
        
        self.n_folds = n_folds
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs
        self.batch_size = batch_size

        self.kernel_size = kernel_size
        self.depth_conv = depth_conv
        self.depth_dense = depth_dense
        self.dense_sizes = dense_sizes  # or TODO
        
        self.dropout_rate = dropout_rate
        self.filters = filters
        self.do_batch_normalization = do_batch_normalization
        self.filter_sizes = filter_sizes  # or TODO
        
        self.kernel_sizes_1d = kernel_sizes_1d  # or TODO
        self.maxpool_sizes_1d = maxpool_sizes_1d  # or  TODO
        
        self.audio_length = self.sampling_rate * self.audio_duration
        if self.use_mfcc:
            self.dim = (self.n_mfcc, 1 + int(np.floor(self.audio_length / 512)), 1)
        else:
            self.dim = (self.audio_length, 1)

# PrepareData

____

# Archs

___

# StoreStatistics

____

# Train_and_store_results

____

# -------------Begin Execution--------------------

In [4]:
train_original = pd.read_csv(os.path.join(constants.DATADIR, "train.csv"))
test_original = pd.read_csv(os.path.join(constants.DATADIR, "sample_submission.csv"))
train_original['nframes'] = train_original['fname'].apply(
    lambda f: wave.open(os.path.join(constants.DATADIR, 'audio_train/', f)).getnframes())
test_original['nframes'] = test_original['fname'].apply(
    lambda f: wave.open(os.path.join(constants.DATADIR, 'audio_test/', f)).getnframes())
train_original_1 = train_original.set_index("fname", inplace=False)
test_original_1 = test_original.set_index("fname", inplace=False)
train_original_1['label_idx'] = raw_labels_to_indices(train_original_1[constants.LABEL])
if not COMPLETE_RUN:
    train_original_1 = train_original_1[:SIZE_WHEN_NOT_COMPLETE_RUN]
    test_original_1 = test_original_1[:SIZE_WHEN_NOT_COMPLETE_RUN]

# add offline for evaluating
if COMPLETE_RUN:
    train, train_offline = sklearn.model_selection.train_test_split(
        train_original_1,
        test_size=TRAIN_OFFLINE,
        stratify=train_original_1['label_idx'])
else:
    train, train_offline = sklearn.model_selection.train_test_split(
        train_original_1,
        test_size=0.1)
test = test_original_1

In [5]:
try:
    prepare_data_first
except NameError:
    prepare_data_first = PrepareData(preprocessing_fn=utils.audio_norm)
    prepare_data_second = PrepareData(preprocessing_fn=utils.audio_norm)

In [6]:
N_MFCC = 60
AUDIO_DURATION = 2
SMOOTH_FOR_CLASS_WEIGHTS = 10 # Amazing but this value won when expermenting with 0.1, 1, 100
BATCH_SIZE = 64  # not too big, not too small
KERNEL_SIZE = (6, 15)  # (6, 15) was good too ; (4, 10) is certainly nit bad

DEPTH_CONV = 7  # TODO: en the end put 7. It is longer learning but better result; 4 for experimenting
DEPTH_DENSE = 1  # 1 is the best
FILTERS = 64  # 32 or 64?
N_FOLDS = 10  # 2 or 5 or 10?
USE_MFCC = True
DROPOUT_RATE = 0.2  # 0.2 is better. And it seems that 0.3 is better too.

DO_BATCH_NORMALIZATION = False
DENSE_SIZES = None
FILTER_SIZES = None
KERNEL_SIZES_1D = None
MAXPOOL_SIZES_1D = None

# below dor 1d model and other notebook
# DENSE_SIZES = [128]  # [128] won against [256] and [128, 128]
# FILTER_SIZES = [16, 32, 32, 256]
# DO_BATCH_NORMALIZATION = False  # False won
# KERNEL_SIZES_1D = [9, 3, 3, 3]
# MAXPOOL_SIZES_1D = [16, 4, 4, 4]

In [7]:
func_to_create_model = get_general_2d_conv_model

In [None]:
%%time

do_kfold_now = True
make_predictions = True
slide_tick_for_first_epoch = None
slide_tick = None
verbose = 0

for _ in range(1):
    name_experiment = func_to_create_model.__name__
    
    ################# ATTEN #########################
    name_experiment += '_2d_final'       

    config_now = Config(
        sampling_rate=44100,
        use_mfcc=True,
        audio_duration=AUDIO_DURATION,
        n_mfcc=N_MFCC,
        batch_size=BATCH_SIZE,
        smooth_factor=SMOOTH_FOR_CLASS_WEIGHTS,
        dropout_rate=DROPOUT_RATE,
        kernel_size=KERNEL_SIZE,
        depth_conv=DEPTH_CONV,
        depth_dense=DEPTH_DENSE,
        train_labels=train.label_idx,
        filters=FILTERS,
        n_folds=N_FOLDS)
    
    config_now = Config(
        complete_run=COMPLETE_RUN,
        size_when_not_complete_run=SIZE_WHEN_NOT_COMPLETE_RUN,
        sampling_rate=44100,
        use_mfcc=USE_MFCC,
        audio_duration=AUDIO_DURATION,
        n_mfcc=N_MFCC,
        train_labels=train.label_idx,
        smooth_factor=SMOOTH_FOR_CLASS_WEIGHTS,
        
        n_folds=N_FOLDS,
        batch_size=BATCH_SIZE,
        
        filters=FILTERS,
        depth_conv=DEPTH_CONV,
        depth_dense=DEPTH_DENSE,
        kernel_size=KERNEL_SIZE,
        
        do_batch_normalization=DO_BATCH_NORMALIZATION,
        dropout_rate=DROPOUT_RATE,
        dense_sizes=DENSE_SIZES,
        filter_sizes=FILTER_SIZES,
        
        kernel_sizes_1d=KERNEL_SIZES_1D,
        maxpool_sizes_1d=MAXPOOL_SIZES_1D)

    X_train_now = prepare_data_first(train, config_now, os.path.join(constants.DATA, 'audio_train'))
    X_test_now = prepare_data_second(test, config_now, os.path.join(constants.DATA, 'audio_test'))
    y_train_now = to_categorical(
        train.label_idx, num_classes=len(constants.LABELS))

    # standartization
    if config_now.use_mfcc:
        mean = np.mean(X_train_now, axis=0)
        std = np.std(X_train_now, axis=0)
        X_train_now = (X_train_now - mean) / std
        X_test_now = (X_test_now - mean) / std               


    name_experiment_final = name_experiment
    print(name_experiment_final)
    store_statistics_object = train_and_store_results(
        name_experiment=name_experiment_final,
        func_to_create_model=func_to_create_model,
        config=config_now,
        X_train=X_train_now,
        y_train=y_train_now,
        y_train_label_idx=train.label_idx,
        X_test=X_test_now,
        slide_tick_for_first_epoch=slide_tick_for_first_epoch,
        verbose_keras=verbose,
        make_predictions=make_predictions,
        do_kfold=do_kfold_now)