In [6]:
# draw
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
# basic handling
import os
import glob
import pickle
import numpy as np
# audio
import librosa
import librosa.display
import IPython.display
# normalization
import sklearn
# nn
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import to_categorical
from keras.callbacks import LearningRateScheduler

from keras import Sequential
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout
from keras.layers import Input
from keras.optimizers import SGD

## 置音频截断长度 采样率*时间(s)
def load_clip(filename):
    x, sr = librosa.load(filename)
    if len(x) < 88200:
        x = np.pad(x,(0,88200-x.shape[0]),'constant')
    else:
        x = x[0:88200:1]
#     x = np.pad(x,(0,5292000-x.shape[0]),'constant')
#     4429992
    return x, sr
def extract_feature(filename):
    x, sr = load_clip(filename)
    mfccs = librosa.feature.mfcc(y=x, sr=sr, n_mfcc=40)
    norm_mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
    return norm_mfccs
## 取数据(集) 返回音频数据MFCC矩阵
def load_dataset(filenames):
    features, labels = np.empty((0,40,173)), np.empty(0)
    cnt = 0;
    cnt_all = len(filenames)
    
    for filename in filenames:
        mfccs = extract_feature(filename)
        features = np.append(features,mfccs[None],axis=0)
        cnt+=1
        if(cnt%100==0):
            print([str(cnt)+' / '+str(cnt_all)+' finished'])
#         labels = np.append(labels, filename.split('\\')[1].split('-')[1])
        namelist = filename.split('/')
        labels = np.append(labels, namelist[len(namelist)-1].split('-')[1])
    return np.array(features), np.array(labels, dtype=np.int)
def get_trainData(filenames):
    data_x, data_y = load_dataset(filenames)
    train_x,val_x,train_y,val_y = sklearn.model_selection.train_test_split(data_x,data_y,test_size=0.3,random_state=0)
    train_x = train_x.reshape(train_x.shape[0],train_x.shape[1],train_x.shape[2],1)
    val_x = val_x.reshape(val_x.shape[0],val_x.shape[1],val_x.shape[2],1)
#     test_x = test_x.reshape(test_x.shape[0],test_x.shape[1],test_x.shape[2],1)

    train_y = to_categorical(train_y)
    val_y = to_categorical(val_y)
#     test_y = to_categorical(test_y)
    return train_x,val_x,train_y,val_y
def get_testData(filename):
    mfccs = extract_feature(filename)
    mfccs = mfccs.reshape(mfccs.shape[0],mfccs.shape[1],mfccs.shape[2],1)
    return mfccs
def show_history(history):
    print(history.history.keys())
    fig = plt.figure(figsize=(20,5))
    plt.subplot(121)
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.subplot(122)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='lower left')
    plt.show()
## 构造卷积网络模型
def initial(train_x,train_y):
    model = Sequential()

    # BLOCK 1
    # model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block1_conv1', input_shape = (40, 173, 1)))   
    model.add(Conv2D(filters = 32, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block1_conv1', input_shape = train_x.shape[1:]))   
    # model.add(Conv2D(filters = 32, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block1_conv2'))
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), name = 'block1_pool'))
    # model.add(Dropout(0.5))
    # BLOCK2
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block2_conv1'))   
    # model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block2_conv2'))
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), name = 'block2_pool'))
    # model.add(Dropout(0.5))
    # BLOCK3
    model.add(Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block3_conv1'))   
    # model.add(Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block3_conv2'))
    # model.add(Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block3_conv3'))
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), name = 'block3_pool'))
    # model.add(Dropout(0.5))
    # BLOCK4
    model.add(Conv2D(filters = 256, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block4_conv1'))   
    # model.add(Conv2D(filters = 256, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block4_conv2'))
    # model.add(Conv2D(filters = 256, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block4_conv3'))
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), name = 'block4_pool'))
    # model.add(Dropout(0.5))
    # BLOCK5
    model.add(Conv2D(filters = 512, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block5_conv1'))   
    # model.add(Conv2D(filters = 512, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block5_conv2'))
    # model.add(Conv2D(filters = 512, kernel_size = (3, 3), activation = 'relu', padding = 'same', name = 'block5_conv3'))
    model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), name = 'block5_pool'))
    # model.add(Dropout(0.5))
    model.add(Flatten())
    # model.add(Dense(1024, activation = 'relu', name = 'fc1'))
    # model.add(Dropout(0.5))
    # model.add(Dense(1024, activation = 'relu', name = 'fc2'))
    # model.add(Dropout(0.5))
    model.add(Dense(10, activation = 'softmax', name = 'prediction'))

    model.compile(optimizer='Adam',
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])
    model.summary(line_length=80)
    return model
## 输入训练数据train_x train_y 验证数据val_x val_y为通过调用load_dataset函数读取指定目录下的音频数据
def train(model,train_x,train_y,val_x,cal_y):
    history = model.fit(train_x, train_y, epochs=10, batch_size=32, validation_data=(val_x, val_y))
    show_history(history)
#     loss,accuracy=model.evaluate(test_x,test_y)
#     print('loss:',loss)
#     print('accuracy:',accuracy)
def test(model,x):
    result = model.predict(x)
    return result

In [4]:
## 设置GPU占用
import tensorflow as tf 
from keras.backend.tensorflow_backend import set_session 
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config = tf.ConfigProto() 
config.gpu_options.per_process_gpu_memory_fraction = 0.7 
set_session(tf.Session(config=config))
## 目录读取文件
parent_dir = './data/UrbanSound8K/audio/'
data_list = ['fold1/','fold2/','fold3/','fold4/','fold5/','fold6/','fold7/','fold8/','fold9/','fold10/']
# train_dir = 'train/'
val_dir = 'val/'
test_dir = 'test/fold10/'

file_name = '*.wav'
data_files = []
for data in data_list:
    
#     train_files = glob.glob(os.path.join(parent_dir, train_dir, file_name))
    data_files = data_files + glob.glob(os.path.join(parent_dir, data, file_name))
rand_index = np.argsort(np.random.uniform(0,1,len(data_files)))
length = len(data_files)*4//10
val_files = np.array(data_files)[rand_index[0:length:1]]
train_x,val_x,train_y,val_y = get_trainData(data_files)
# initial()



['100 / 8732 finished']




['200 / 8732 finished']




['300 / 8732 finished']




['400 / 8732 finished']




['500 / 8732 finished']




['600 / 8732 finished']




['700 / 8732 finished']




['800 / 8732 finished']




['900 / 8732 finished']




KeyboardInterrupt: 

In [None]:
## 设置GPU占用
import tensorflow as tf 
from keras.backend.tensorflow_backend import set_session 
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config = tf.ConfigProto() 
config.gpu_options.per_process_gpu_memory_fraction = 0.3 
set_session(tf.Session(config=config))

train_x = pickle.load(open('./train_x.dat', 'rb'))
train_y = pickle.load(open('./train_y.dat', 'rb'))
val_x = pickle.load(open('./val_x.dat', 'rb'))
val_y = pickle.load(open('./val_y.dat', 'rb'))
test_x = pickle.load(open('./test_x.dat', 'rb'))
test_y = pickle.load(open('./test_y.dat', 'rb'))
train_x = train_x.reshape(train_x.shape[0],train_x.shape[1],train_x.shape[2],1)
val_x = val_x.reshape(val_x.shape[0],val_x.shape[1],val_x.shape[2],1)
test_x = test_x.reshape(test_x.shape[0],test_x.shape[1],test_x.shape[2],1)
## 前18行代替上面运行结果
train_y = to_categorical(train_y)
val_y = to_categorical(val_y)
test_y = to_categorical(test_y)
model = initial(train_x,train_y)
train(model,train_x,train_y,val_x,val_y)
test(model,test_x)

________________________________________________________________________________
Layer (type)                        Output Shape                    Param #     
block1_conv1 (Conv2D)               (None, 40, 173, 32)             320         
________________________________________________________________________________
block1_pool (MaxPooling2D)          (None, 20, 86, 32)              0           
________________________________________________________________________________
block2_conv1 (Conv2D)               (None, 20, 86, 64)              18496       
________________________________________________________________________________
block2_pool (MaxPooling2D)          (None, 10, 43, 64)              0           
________________________________________________________________________________
block3_conv1 (Conv2D)               (None, 10, 43, 128)             73856       
________________________________________________________________________________
block3_pool (MaxPooling2D)  