In [None]:
import sys
import os
import pandas
import librosa
import numpy
import pickle
import soundfile
import pandas as pd
import numpy as np
import tensorflow
import tensorflow.keras
import scipy


print("Librosa version = ",librosa.__version__)
print("keras version = ",tensorflow.keras.__version__)
print("tensorflow version = ",tensorflow.__version__)

In [None]:
data_fold='/Data2/DCASE/DCASE2020t1_B/TAU-urban-acoustic-scenes-2020-3class-development/'
train_filename = data_fold + 'evaluation_setup/fold1_train.csv'
test_filename = data_fold + 'evaluation_setup/fold1_evaluate.csv'
meta_filename = os.path.join(data_fold, 'meta.csv')
meta_db = pandas.read_csv(meta_filename, '\t')
scene_labels = meta_db["scene_label"].unique().tolist()   # 3개 indoor, outdoor, transportation
identifiers = meta_db["identifier"].unique().tolist() # 514개

meta_db = meta_db.to_dict('records')
for path in meta_db:
    path['filename'] = os.path.join(data_fold, path['filename'])
    
train_db=pandas.read_csv(train_filename,'\t')
train_db = train_db.to_dict('records')
train_files=[]
for path in train_db:
    split_filename=path['filename'].split('-')
    path['filename'] = os.path.join(data_fold, path['filename'])
    path['identifier']= '-'.join(split_filename[1:3])
    train_files.append(path['filename'])
    
test_db=pandas.read_csv(test_filename,'\t')
test_db = test_db.to_dict('records')
test_files=[]
for path in test_db:
    split_filename=path['filename'].split('-')
    path['filename'] = os.path.join(data_fold, path['filename'])
    path['identifier']= '-'.join(split_filename[1:3])
    test_files.append(path['filename'])

In [None]:
db=train_db

import random
random.seed(0)

training_files = []
validation_files = []


for scene_id, scene_label in enumerate(scene_labels):
    scene_meta = [file for file in db if file['scene_label']==scene_label]
    data = {}

    unique_identifiers = [file["identifier"] for file in scene_meta]
    unique_identifiers.sort()
    for identifier in unique_identifiers:
        path = identifier.split("-")
        new_value=[file["filename"] for file in scene_meta if file["identifier"]==identifier]
        if path[0] not in data:
            data[path[0]] = {}

        data[path[0]][path[1]] = new_value
        
    current_scene_validation_amount = []
    sets_candidates = []

    identifier_first_level = list(data.keys())

    for i in range(100):
        current_validation_files = []
        current_training_files = []

        current_validation_identifiers2 = 0
        for identifier1 in identifier_first_level:
            current_ids = list(data[identifier1].keys())
            random.shuffle(current_ids, random.random)

            validation_split_index = int(numpy.ceil(0.3 * len(current_ids)))
            current_validation = current_ids[0:validation_split_index]
            current_training = current_ids[validation_split_index:]

            for identifier2 in current_validation:
                current_validation_files += data[identifier1][identifier2]

            for identifier2 in current_training:
                current_training_files += data[identifier1][identifier2]

            current_validation_identifiers2 += len(current_validation)
            
        current_scene_validation_amount.append(
            len(current_validation_files) / float(
                len(current_validation_files) + len(current_training_files))
        ) # 전체 v파일에 대해서 validation_files의 비율

        sets_candidates.append({
            'validation': current_validation_files,
            'training': current_training_files,
            'validation_identifiers1': len(identifier_first_level),
            'validation_identifiers2': current_validation_identifiers2,
        })

    best_set_id = numpy.argmin(numpy.abs(numpy.array(current_scene_validation_amount) - 0.3))

    validation_files += sets_candidates[best_set_id]['validation']
    training_files += sets_candidates[best_set_id]['training']

In [None]:
print('train set: ', len(training_files))
print('val set: ', len(validation_files))
print('test set: ', len(test_files))

In [None]:
feature_name1 = '3class_melspecdelta_6'


item_list_train = []
item_list_validation = []
item_list_test = []

for item in meta_db:
    _, current_last_level_path = os.path.split(item["filename"])
    base_filename, _ = os.path.splitext(current_last_level_path)

    feature_filename1 = os.path.join(data_fold+'features/'
                                     +feature_name1+'/'+base_filename+'.npz')

    item_ = {
        'data': {
            'filename':[feature_filename1]
        },
        'meta': {
            'label': item["scene_label"]
        }
    }

    if item["filename"] in test_files:
        item_list_test.append(item_)
    elif item["filename"] in training_files:
        item_list_train.append(item_)
    elif item["filename"] in validation_files:
        item_list_validation.append(item_)

In [None]:
X_train = []
Y_train = []
for item in item_list_train:
    binary_matrix = numpy.zeros((len(scene_labels), 1))
    pos = scene_labels.index(item["meta"]["label"])
    binary_matrix[pos,:] = 1
    
    audio = numpy.load(item["data"]["filename"][0])
    embedding = audio['embedding']
    
    X_train.append(embedding)
    Y_train.append(binary_matrix.T)
    

X_train = numpy.array(X_train)
Y_train = numpy.vstack(Y_train)


X_val = []
Y_val = []
for item in item_list_validation:
    binary_matrix = numpy.zeros((len(scene_labels), 1))
    pos = scene_labels.index(item["meta"]["label"])
    binary_matrix[pos,:] = 1
    
    audio = numpy.load(item["data"]["filename"][0])
    embedding = audio['embedding']
    

    X_val.append(embedding)
    Y_val.append(binary_matrix.T)
    

X_val = numpy.array(X_val)
Y_val = numpy.vstack(Y_val)

In [None]:
X_test = []
Y_test = []
for item in item_list_test:
    binary_matrix = numpy.zeros((len(scene_labels), 1))
    pos = scene_labels.index(item["meta"]["label"])
    binary_matrix[pos,:] = 1
    
    audio = numpy.load(item["data"]["filename"][0])
    embedding = audio['embedding']
    
    X_test.append(embedding)
    Y_test.append(binary_matrix.T)
    

X_test = numpy.array(X_test)
Y_test = numpy.vstack(Y_test)

In [None]:
print('deltas-deltadelta train shape: ',X_train.shape)
print('deltas-deltadelta validation shape: ', X_val.shape)
print('deltas-deltadelta test shape: ',X_test.shape)

In [None]:
dev_test_labels=[]
for i in item_list_test:
    if i['meta']['label']=='indoor':
        dev_test_labels.append(0)
    elif i['meta']['label']=='transportation':
        dev_test_labels.append(1)
    else:
        dev_test_labels.append(2)

In [None]:
dev_val_labels=[]
for i in item_list_validation:
    if i['meta']['label']=='indoor':
        dev_val_labels.append(0)
    elif i['meta']['label']=='transportation':
        dev_val_labels.append(1)
    else:
        dev_val_labels.append(2)

In [None]:
from Mixup import MixupGenerator

In [None]:
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

import numpy
import random

from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import ZeroPadding2D,Input,Add, Permute, Cropping2D, Activation, Maximum,Dropout, Flatten, Dense, Conv2D, MaxPooling2D, MaxPool2D,BatchNormalization, Convolution2D, ReLU, GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization as BN
from tensorflow.keras.layers import DepthwiseConv2D, SeparableConv2D


from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint,ReduceLROnPlateau
from tensorflow.keras import backend as K
import tensorflow as tf
from tensorflow.keras import regularizers
from tensorflow.keras.regularizers import l2

from tensorflow.keras.layers import  concatenate

import tensorflow.keras

from tensorflow.keras.models import load_model

## (6) BIR-ResNet RF-1

In [None]:
def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


def relu6(x):
    """Relu 6
    """
    return K.relu(x, max_value=6.0)

def _conv_block(inputs, filters, kernel, strides):
    """Convolution Block
    This function defines a 2D convolution operation with BN and relu6.
    # Arguments
        inputs: Tensor, input tensor of conv layer.
        filters: Integer, the dimensionality of the output space.
        kernel: An integer or tuple/list of 2 integers, specifying the
            width and height of the 2D convolution window.
        strides: An integer or tuple/list of 2 integers,
            specifying the strides of the convolution along the width and height.
            Can be a single integer to specify the same value for
            all spatial dimensions.
    # Returns
        Output tensor.
    """

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation(relu6)(x)
    
    return x

def _bottleneck(inputs, filters, kernel, t, alpha, s, r=False):
    """Bottleneck
    This function defines a basic bottleneck structure.
    # Arguments
        inputs: Tensor, input tensor of conv layer.
        filters: Integer, the dimensionality of the output space.
        kernel: An integer or tuple/list of 2 integers, specifying the
            width and height of the 2D convolution window.
        t: Integer, expansion factor.
            t is always applied to the input size.
        s: An integer or tuple/list of 2 integers,specifying the strides
            of the convolution along the width and height.Can be a single
            integer to specify the same value for all spatial dimensions.
        alpha: Integer, width multiplier.
        r: Boolean, Whether to use the residuals.
    # Returns
        Output tensor.
    """

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    # Depth
    tchannel = K.int_shape(inputs)[channel_axis] * t
    # Width
    cchannel = int(filters * alpha)

    x = _conv_block(inputs, tchannel, (1, 1), (1, 1))

    x = DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation(relu6)(x)

    x = Conv2D(cchannel, (1, 1), strides=(1, 1), padding='same')(x)
    x = BatchNormalization(axis=channel_axis)(x)

    if r:
        x = Add()([x, inputs])

    return x



def _inverted_residual_block(inputs, filters, kernel, t, alpha, strides, n):
    """Inverted Residual Block
    This function defines a sequence of 1 or more identical layers.
    # Arguments
        inputs: Tensor, input tensor of conv layer.
        filters: Integer, the dimensionality of the output space.
        kernel: An integer or tuple/list of 2 integers, specifying the
            width and height of the 2D convolution window.
        t: Integer, expansion factor.
            t is always applied to the input size.
        alpha: Integer, width multiplier.
        s: An integer or tuple/list of 2 integers,specifying the strides
            of the convolution along the width and height.Can be a single
            integer to specify the same value for all spatial dimensions.
        n: Integer, layer repeat times.
    # Returns
        Output tensor.
    """

    #x = _bottleneck(inputs, filters, kernel, t, alpha, strides)
    inputs = Conv2D(filters, (1,1))(inputs)
    for i in range(1, n):
        x = _bottleneck(inputs, filters, kernel, t, alpha, 1, True)

    return x




def resnet_1_mb2(input_shape, alpha=1.0):
    
    input_tensor = input_shape
    
    def conv1_layer(x):    
        x = ZeroPadding2D(padding=(3, 3))(x)
        x = Conv2D(64, (3, 3), strides=(2, 2))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = ZeroPadding2D(padding=(1,1))(x)
        return x   
    
    def conv2_layer(x):         
        x = MaxPooling2D((3, 3), 2)(x)     

        shortcut = x

        for i in range(3):
            if (i == 0 or i ==1):
                x = _inverted_residual_block(x, 32, (3, 3), t=2, alpha=alpha, strides=1, n=2)

            else:
                x = Conv2D(64, (1, 1), strides=(1, 1), padding='same')(x)
                x = BatchNormalization()(x)
                x = Activation('relu')(x)

                x = Conv2D(64, (1, 1), strides=(1, 1), padding='same')(x)
                x = BatchNormalization()(x)
                x = Activation('relu')(x)

                shortcut = x        

        return x
    
    x = conv1_layer(input_tensor)
    x = conv2_layer(x)
    x = GlobalAveragePooling2D()(x)
    output_tensor = Dense(3, activation='softmax')(x)

    model = Model(input_tensor, output_tensor)

    return model





In [None]:
mb2_model = resnet_1_mb2(Input(X_train.shape[1:]))
mb2_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])

mb2_model.summary()

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(mb2_model, to_file='model.png')

In [None]:
def sigmoidal_decay(e, start=0, end=100, lr_start=1e-3, lr_end=1e-5):
    if e < start:
        return lr_start
    elif e > end:
        return lr_end

    middle = (start + end) / 2
    s = lambda x: 1 / (1 + np.exp(-x))

    return s(13 * (-e + middle) / np.abs(end - start)) * np.abs(lr_start - lr_end) + lr_end

lr = LearningRateScheduler(lambda e: sigmoidal_decay(e, end=100))

In [None]:
for i in range(10):
    model = resnet_1_mb2(Input(X_train.shape[1:]))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])

    lr = LearningRateScheduler(lambda e: sigmoidal_decay(e, end=100))

    model_name = 'bir_rf1'+ str(i)
    checkpoint="checkpoints-10times/"+model_name+'_'+"cp.h5"

    mc = ModelCheckpoint(checkpoint, monitor='val_categorical_accuracy',
                          verbose=True, save_best_only=True,
                          mode ='auto', period =1)

    TrainDataGen = MixupGenerator(X_train, 
                              Y_train, 
                              batch_size=64,
                              alpha=0.4)()

    history1 = model.fit_generator(TrainDataGen,
                                   validation_data = (X_val, Y_val),
                               epochs=100,
                               callbacks=[lr, mc],
                               steps_per_epoch=np.ceil(len(X_train)/64)
                              )

    scores = model.evaluate(X_test, Y_test, verbose=1)

    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

### test acc

In [None]:
model = resnet_1_mb2(Input(X_train.shape[1:]))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])

In [None]:
for i in range(10):
    model_name = 'bir_rf1'+ str(i)
    checkpoint="checkpoints-10times/"+model_name+'_'+"cp.h5"
    model.load_weights(checkpoint)

    scores_test = model.predict(X_test, verbose=1)
    y_pred_test = np.argmax(scores_test,axis=1)
    y_real = np.argmax(Y_test,axis=1)
    Overall_accuracy = np.sum(y_pred_test==dev_test_labels)/len(X_test)
    print('Test accuracy:', Overall_accuracy)

### val acc

In [None]:
for i in range(10):
    model_name = 'bir_rf1'+ str(i)
    checkpoint="checkpoints-10times/"+model_name+'_'+"cp.h5"
    model.load_weights(checkpoint)

    scores_test = model.predict(X_val, verbose=1)
    y_pred_test = np.argmax(scores_test,axis=1)
    y_real = np.argmax(Y_val,axis=1)
    Overall_accuracy = np.sum(y_pred_test==dev_val_labels)/len(X_val)
    print('Test accuracy:', Overall_accuracy)