In [1]:
#Pacotes utilizados
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import accuracy_score, confusion_matrix
from keras import models, optimizers, layers, losses, utils
from sklearn.ensemble import RandomForestClassifier

'''Função que recebe o endereço do pickle e retorna o conteúdo no formado de numpy array'''
def read_pickle(name):
    with (open(name, 'rb')) as openfile:
        while True:
            try:
                one_instance = pickle.load(openfile)
            except EOFError:
                break
    one_instance = np.asanyarray(one_instance)
    return one_instance

'''Função que recebe os espectogramas e os labels e transforma para o formato de entrada da rede aceito pelo Keras'''
def pre_processing_datas_to_cnn_format(X, y):
    X = X.reshape(X.shape[0],X.shape[1],X.shape[2],1)
    y = utils.to_categorical(y)
    return X, y

'''Função que cria a arquitetura de rede'''
def create_model(shape_in, num_classes = 10, dropout_value = 0.5):
    model = models.Sequential()

    #Primeira camada convolucional
    model.add(layers.Conv2D(32, kernel_size=(5,5), activation = 'relu', padding='same', input_shape=(shape_in.shape[0],shape_in.shape[1],shape_in.shape[2]), name = 'conv_1'))

    #Primeira camada de pooling
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name = 'pool_1', padding='same'))
    
    model.add(layers.BatchNormalization())
    
    #Segunda camada convolucional
    model.add(layers.Conv2D(64, kernel_size=(5,5), activation = 'relu', padding='same', name = 'conv_2'))

    #Segunda camada de pooling
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name = 'pool_2', padding='same'))
    
    model.add(layers.BatchNormalization())
    
    #Terceira camada convolucional
    model.add(layers.Conv2D(64, kernel_size=(5,5), activation = 'relu', padding='same', name = 'conv_3'))

    #Terceira camada de pooling
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name = 'pool_3', padding='same'))
    
    model.add(layers.BatchNormalization())

    model.add(layers.Flatten(name = 'flatten'))
    
    #Dropout
    model.add(layers.Dropout(dropout_value))

    #Camada totalmente conectada
    model.add(layers.Dense(512, name = 'dense_1', activation='relu'))
    
    #Dropout
    model.add(layers.Dropout(dropout_value))

    #Camada de saída
    model.add(layers.Dense(num_classes, activation='softmax', name = 'classification'))
    
    #Para visualizar a arquitetura da rede
    model.summary()
    return model

'''Função que recebe os espectogramas e o endereço do modelo treinado e retorna os características obtidas, que são as saídas da penultima camada da rede'''
def extract_features(X_test, path_model):
    model = models.load_model(path_model)

    intermediate_layer_model = models.Model(inputs=model.input, outputs=model.get_layer(index = -2).output)
    features = intermediate_layer_model.predict(X_test)

    features = pd.DataFrame(data=features)
    
    return features

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  from numpy.core.umath_tests import inner1d


In [2]:
#Endereços dos pickles contendo os espectogramas e labels
path_train = '/data/deborah/UrbanSound8K_temp/log_specs_100_treino.pickle'
path_train_labels = '/data/deborah/UrbanSound8K_temp/labels_100_treino.pickle'
path_test = '/data/deborah/UrbanSound8K_temp/log_specs_100_teste.pickle'
path_test_labels = '/data/deborah/UrbanSound8K_temp/labels_100_teste.pickle'

#Nome do modelo que será salvo
path_model = 'trained_model_100_augmented.h5'
learning_rate = 0.0001
epochs = 1
batch_size = 128
num_classes = 10
dropout_value = 0.5
decay_=1e-3

X_train = read_pickle(path_train)
y_train = read_pickle(path_train_labels)
X_test = read_pickle(path_test)
y_test = read_pickle(path_test_labels)

#Transformando os espectogramas e labels
X_train, y_train = pre_processing_datas_to_cnn_format(X_train, y_train)
X_test, y_test = pre_processing_datas_to_cnn_format(X_test, y_test)

In [5]:
#Criando o modelo e configurando os parâmetros de treinamento
model = create_model(X_train[0], num_classes, dropout_value)
sgd = optimizers.SGD(lr=learning_rate, decay=decay_)
model.compile(loss=losses.categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])

#Treinamento
model.fit(X_train, y_train, validation_data = (X_test, y_test), batch_size=batch_size, epochs=epochs, verbose=1)

#Salvando o modelo treinado
model.save(path_model) 


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_1 (Conv2D)              (None, 100, 100, 32)      832       
_________________________________________________________________
pool_1 (MaxPooling2D)        (None, 50, 50, 32)        0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 50, 50, 32)        128       
_________________________________________________________________
conv_2 (Conv2D)              (None, 50, 50, 64)        51264     
_________________________________________________________________
pool_2 (MaxPooling2D)        (None, 25, 25, 64)        0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 25, 25, 64)        256       
_________________________________________________________________
conv_3 (Conv2D)              (None, 25, 25, 64)       

In [6]:
#Lendo o modelo salvo após o treinamento
model = models.load_model(path_model)

#Fazendo a predição dos dados do conjunto de teste com a rede treinada
pred = model.predict_classes(X_test)

#Imprimindo a acuracia e a matriz de confusão
print('Acuracia da rede = ',accuracy_score(pred,np.argmax(y_test,axis=1)))
print('Matriz de confusão da rede = \n', confusion_matrix(pred,np.argmax(y_test,axis=1)))

Acuracia da rede =  0.33485342019543973
Matriz de confusão da rede = 
 [[134   0  26   7  12  67   4  41  15  30]
 [  0   0   0   7   6   0  15   0   6   0]
 [  5  10  62  25  37   9   0  15  27  37]
 [  1   1  16  42  13   2   0   5   7   7]
 [  0   0   1   0  41   0   0   7   0   1]
 [ 14   0   4   5   0  36   0  16   6   6]
 [  0   0   0   1  11   0   1   0   0   0]
 [  2   0   1   1  13   7   0  23   4   2]
 [  3  13  39  31   9  36   1  40  88  30]
 [ 40   8  51  18  15  38   3  67  24  87]]


In [7]:
#Utilizando a rede treinada para extrair as características dos espectogramas
X_train_features = extract_features(X_train,path_model)
X_test_features = extract_features(X_test,path_model)

#Treinando uma Random Forest
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train_features,np.argmax(y_train,axis=1))

#Predição com a Random Forest treinada
predicted = clf.predict(X_test_features)

#Imprimindo a acuracia e a matriz de confusão
print('Acuracia da Random Forest = ',accuracy_score(predicted,np.argmax(y_test,axis=1)))
print('Matriz de confusão da Random Forest = \n',confusion_matrix(predicted,np.argmax(y_test,axis=1)))

Acuracia da Random Forest =  0.4905537459283388
Matriz de confusão da Random Forest = 
 [[106   1  12   5  11  30   0   5   1  14]
 [  0   0   0   0   0   0   0   0   0   1]
 [ 15  14  99  12  21  20   1  41  17  41]
 [  1   1  12 100  17   2   1   2   6   7]
 [  0   1   2   0  80   2   1  17   1   9]
 [ 46   0   9   4   0  87   0  49   9   8]
 [  0   0   0   0   0   0  21   0   0   0]
 [  0   0   0   1   7   0   0  20   0   1]
 [  2  13  16   8   2  19   0  40 136  15]
 [ 29   2  50   7  19  35   0  40   7 104]]
