In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

import tensorflow as tf
import keras
import numpy as np
import tensorflow
from keras.optimizers import SGD

import sys
sys.path.append("..")
from utils import *
from funcs import *

from mobnet import model_mobnet
from training_functions import *

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

data_path = 'data_2020/'
train_csv = data_path + 'evaluation_setup/fold1_train.csv'
val_csv = data_path + 'evaluation_setup/fold1_evaluate.csv'
feat_path = 'features/logmel128_scaled_d_dd/'
experiments = 'exp_mobnet'

if not os.path.exists(experiments):
    os.makedirs(experiments)



  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


# MODEL 0

In [6]:
# random sample data, to keep all three classes have similar number of training samples
total_csv = balance_class_data(train_csv, experiments)

num_audio_channels = 2
num_freq_bin = 128
num_time_bin = 461
num_classes = 3
max_lr = 0.1
batch_size = 32
num_epochs = 50
mixup_alpha = 0.4
sample_num = len(open(train_csv, 'r').readlines()) - 1
no = 0

data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
y_val = keras.utils.to_categorical(y_val, num_classes)

model = model_mobnet(num_classes, input_shape=[num_freq_bin, num_time_bin, 3*num_audio_channels], num_filters=24, wd=1e-3)

model.compile(loss='categorical_crossentropy',
              optimizer = SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy']) #ori

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
save_path = experiments + "/model-mixup_alpha:{}-max_lr:{}-total_epochs:{}-no:{}.hdf5".format(mixup_alpha, max_lr, num_epochs, no)
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

train_data_generator = Generator_balanceclass_timefreqmask_nocropping_splitted(feat_path, train_csv, total_csv, experiments, num_freq_bin, 
                              batch_size=batch_size,
                              alpha=mixup_alpha, splitted_num=4)()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 128, 461, 6)  0                                            
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 64, 461, 6)   0           input_2[0][0]                    
__________________________________________________________________________________________________
lambda_4 (Lambda)               (None, 64, 461, 6)   0           input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_43 (Conv2D)              (None, 32, 231, 32)  1760        lambda_3[0][0]                   
__________________________________________________________________________________________________
conv2d_63 

In [5]:
history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/50

LR:0.050278

Epoch 00001: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 2/50

LR:0.000011

Epoch 00002: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 3/50

LR:0.085453

Epoch 00003: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 4/50

LR:0.050141

Epoch 00004: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 5/50

LR:0.014750

Epoch 00005: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 6/50

LR:0.000010

Epoch 00006: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 7/50

LR:0.096220

Epoch 00007: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 8/50

LR:0.085405

Epoch 00008: 


LR:0.099043

Epoch 00032: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 33/50

LR:0.097852

Epoch 00033: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 34/50

LR:0.096201

Epoch 00034: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 35/50

LR:0.094105

Epoch 00035: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 36/50

LR:0.091584

Epoch 00036: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 37/50

LR:0.088662

Epoch 00037: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 38/50

LR:0.085369

Epoch 00038: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 39/50

LR:0.081735

Epoch 00039: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:50-no:0.hdf5
Epoch 40/50


# MODEL 1

In [2]:
# random sample data, to keep all three classes have similar number of training samples
total_csv = balance_class_data(train_csv, experiments)

num_audio_channels = 2
num_freq_bin = 128
num_time_bin = 461
num_classes = 3
max_lr = 0.1
batch_size = 32
num_epochs = 30
mixup_alpha = 0.4
sample_num = len(open(train_csv, 'r').readlines()) - 1
no = 1

data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
y_val = keras.utils.to_categorical(y_val, num_classes)

model = model_mobnet(num_classes, input_shape=[num_freq_bin, num_time_bin, 3*num_audio_channels], num_filters=24, wd=1e-3)

model.compile(loss='categorical_crossentropy',
              optimizer = SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy']) #ori

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
save_path = experiments + "/model-mixup_alpha:{}-max_lr:{}-total_epochs:{}-no:{}.hdf5".format(mixup_alpha, max_lr, num_epochs, no)
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

train_data_generator = Generator_balanceclass_timefreqmask_nocropping_splitted(feat_path, train_csv, total_csv, experiments, num_freq_bin, 
                              batch_size=batch_size,
                              alpha=mixup_alpha, splitted_num=4)()








Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 128, 461, 6)  0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 64, 461, 6)   0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 64, 461, 6)   0           input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 32, 231, 32)  1760        lambda_1[0][0]       

In [None]:
history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/30

LR:0.050278

Epoch 00001: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:30-no:1.hdf5
Epoch 2/30

LR:0.000011

Epoch 00002: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:30-no:1.hdf5
Epoch 3/30

LR:0.085453

Epoch 00003: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:30-no:1.hdf5
Epoch 4/30

LR:0.050141

Epoch 00004: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:30-no:1.hdf5
Epoch 5/30

LR:0.014750

Epoch 00005: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:30-no:1.hdf5
Epoch 6/30

LR:0.000010

Epoch 00006: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:30-no:1.hdf5
Epoch 7/30

LR:0.096220

Epoch 00007: saving model to exp_mobnet/model-mixup_alpha:0.4-max_lr:0.1-total_epochs:30-no:1.hdf5
Epoch 8/30

# MODEL 2

In [None]:
# random sample data, to keep all three classes have similar number of training samples
total_csv = balance_class_data(train_csv, experiments)

num_audio_channels = 2
num_freq_bin = 128
num_time_bin = 461
num_classes = 3
max_lr = 0.1
batch_size = 32
num_epochs = 500
mixup_alpha = 0.4
sample_num = len(open(train_csv, 'r').readlines()) - 1
no = 2


data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
y_val = keras.utils.to_categorical(y_val, num_classes)

model = model_mobnet(num_classes, input_shape=[num_freq_bin, num_time_bin, 3*num_audio_channels], num_filters=24, wd=1e-3)

model.compile(loss='categorical_crossentropy',
              optimizer = SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy']) #ori

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
save_path = experiments + "/model-mixup_alpha:{}-max_lr:{}-total_epochs:{}-no:{}.hdf5".format(mixup_alpha, max_lr, num_epochs, no)
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

train_data_generator = Generator_balanceclass_timefreqmask_nocropping_splitted(feat_path, train_csv, total_csv, experiments, num_freq_bin, 
                              batch_size=batch_size,
                              alpha=mixup_alpha, splitted_num=4)()


In [None]:
history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

# MODEL 3

In [None]:
# random sample data, to keep all three classes have similar number of training samples
total_csv = balance_class_data(train_csv, experiments)

num_audio_channels = 2
num_freq_bin = 128
num_time_bin = 461
num_classes = 3
max_lr = 0.1
batch_size = 32
num_epochs = 50
mixup_alpha = 0.8
sample_num = len(open(train_csv, 'r').readlines()) - 1
no = 3


data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
y_val = keras.utils.to_categorical(y_val, num_classes)

model = model_mobnet(num_classes, input_shape=[num_freq_bin, num_time_bin, 3*num_audio_channels], num_filters=24, wd=1e-3)

model.compile(loss='categorical_crossentropy',
              optimizer = SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy']) #ori

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
save_path = experiments + "/model-mixup_alpha:{}-max_lr:{}-total_epochs:{}-no:{}.hdf5".format(mixup_alpha, max_lr, num_epochs, no)
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

train_data_generator = Generator_balanceclass_timefreqmask_nocropping_splitted(feat_path, train_csv, total_csv, experiments, num_freq_bin, 
                              batch_size=batch_size,
                              alpha=mixup_alpha, splitted_num=4)()


In [None]:
history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

# MODEL 4

In [None]:
# random sample data, to keep all three classes have similar number of training samples
total_csv = balance_class_data(train_csv, experiments)

num_audio_channels = 2
num_freq_bin = 128
num_time_bin = 461
num_classes = 3
max_lr = 0.25
batch_size = 32
num_epochs = 50
mixup_alpha = 0.4
sample_num = len(open(train_csv, 'r').readlines()) - 1
no = 4


data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
y_val = keras.utils.to_categorical(y_val, num_classes)

model = model_mobnet(num_classes, input_shape=[num_freq_bin, num_time_bin, 3*num_audio_channels], num_filters=24, wd=1e-3)

model.compile(loss='categorical_crossentropy',
              optimizer = SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy']) #ori

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
save_path = experiments + "/model-mixup_alpha:{}-max_lr:{}-total_epochs:{}-no:{}.hdf5".format(mixup_alpha, max_lr, num_epochs, no)
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

train_data_generator = Generator_balanceclass_timefreqmask_nocropping_splitted(feat_path, train_csv, total_csv, experiments, num_freq_bin, 
                              batch_size=batch_size,
                              alpha=mixup_alpha, splitted_num=4)()


In [None]:
history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

# MODEL 5

In [None]:
# random sample data, to keep all three classes have similar number of training samples
total_csv = balance_class_data(train_csv, experiments)

num_audio_channels = 2
num_freq_bin = 128
num_time_bin = 461
num_classes = 3
max_lr = 0.1
batch_size = 32
num_epochs = 50
mixup_alpha = 0.4
sample_num = len(open(train_csv, 'r').readlines()) - 1
no = 5


data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
y_val = keras.utils.to_categorical(y_val, num_classes)

model = model_mobnet(num_classes, input_shape=[num_freq_bin, num_time_bin, 3*num_audio_channels], num_filters=24, wd=1e-3)

model.compile(loss='categorical_crossentropy',
              optimizer = SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy']) #ori

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
save_path = experiments + "/model-mixup_alpha:{}-max_lr:{}-total_epochs:{}-no:{}.hdf5".format(mixup_alpha, max_lr, num_epochs, no)
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

train_data_generator = Generator_balanceclass_timefreqmask_nocropping_splitted(feat_path, train_csv, total_csv, experiments, num_freq_bin, 
                              batch_size=batch_size,
                              alpha=mixup_alpha, splitted_num=4)()


In [None]:
history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

# MODEl 6

In [None]:
# random sample data, to keep all three classes have similar number of training samples
total_csv = balance_class_data(train_csv, experiments)

num_audio_channels = 2
num_freq_bin = 128
num_time_bin = 461
num_classes = 3
max_lr = 0.1
batch_size = 32
num_epochs = 50
mixup_alpha = 0.1
sample_num = len(open(train_csv, 'r').readlines()) - 1
no = 6


data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
y_val = keras.utils.to_categorical(y_val, num_classes)

model = model_mobnet(num_classes, input_shape=[num_freq_bin, num_time_bin, 3*num_audio_channels], num_filters=24, wd=1e-3)

model.compile(loss='categorical_crossentropy',
              optimizer = SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy']) #ori

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
save_path = experiments + "/model-mixup_alpha:{}-max_lr:{}-total_epochs:{}-no:{}.hdf5".format(mixup_alpha, max_lr, num_epochs, no)
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

train_data_generator = Generator_balanceclass_timefreqmask_nocropping_splitted(feat_path, train_csv, total_csv, experiments, num_freq_bin, 
                              batch_size=batch_size,
                              alpha=mixup_alpha, splitted_num=4)()


In [None]:
history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

# MODEL 7

In [None]:
# random sample data, to keep all three classes have similar number of training samples
total_csv = balance_class_data(train_csv, experiments)

num_audio_channels = 2
num_freq_bin = 128
num_time_bin = 461
num_classes = 3
max_lr = 0.05
batch_size = 32
num_epochs = 50
mixup_alpha = 0.4
sample_num = len(open(train_csv, 'r').readlines()) - 1
no = 7


data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
y_val = keras.utils.to_categorical(y_val, num_classes)

model = model_mobnet(num_classes, input_shape=[num_freq_bin, num_time_bin, 3*num_audio_channels], num_filters=24, wd=1e-3)

model.compile(loss='categorical_crossentropy',
              optimizer = SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
              metrics=['accuracy']) #ori

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0,255.0]) 
save_path = experiments + "/model-mixup_alpha:{}-max_lr:{}-total_epochs:{}-no:{}.hdf5".format(mixup_alpha, max_lr, num_epochs, no)
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

train_data_generator = Generator_balanceclass_timefreqmask_nocropping_splitted(feat_path, train_csv, total_csv, experiments, num_freq_bin, 
                              batch_size=batch_size,
                              alpha=mixup_alpha, splitted_num=4)()


In [None]:
history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 