In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display
import librosa.display
import numpy as np
import librosa
import tensorflow as tf
import glob

### If you have been save the data, you don't have to preprocessing and save the data

In [2]:
c_drone_path = '../../../1m/*.wav'
m_drone_path = '../../../20m/*.wav'
f_drone_path = '../../../50m/*.wav'
background_path = '../../data/background/*.wav'

c_drone_files = glob.glob(c_drone_path)
m_drone_files = glob.glob(m_drone_path)
f_drone_files = glob.glob(f_drone_path)
background_files = glob.glob(background_path)

In [3]:
CHUNK_SIZE = 8192
SR = 22050
N_MFCC = 16

In [4]:
def load(files, sr=22050):
    [raw, sr] = librosa.load(files[0], sr=sr)
    for f in files[1:]:
        [array, sr] = librosa.load(f, sr=sr)
        raw = np.hstack((raw, array))
    print(raw.shape)
    return raw

In [5]:
c_drone_raw = load(c_drone_files)
m_drone_raw = load(m_drone_files)
f_drone_raw = load(f_drone_files)
background_raw = load(background_files)

(2232320,)
(2232320,)
(2232320,)
(23317637,)


# Data Processing

In [6]:
def mfcc4(raw, label, chunk_size=8192, window_size=4096, sr=44100, n_mfcc=16, n_frame=16):
    mfcc = np.empty((0, n_mfcc, n_frame))
    y = []
    print(raw.shape)
    for i in range(0, len(raw), chunk_size//2):
        mfcc_slice = librosa.feature.mfcc(raw[i:i+chunk_size], sr=sr, n_mfcc=n_mfcc) #n_mfcc,17
        if mfcc_slice.shape[1] < 17:
            print("small end:", mfcc_slice.shape)
            continue
        mfcc_slice = mfcc_slice[:,:-1]
        mfcc_slice = mfcc_slice.reshape((1, mfcc_slice.shape[0], mfcc_slice.shape[1]))
        mfcc = np.vstack((mfcc, mfcc_slice))
        y.append(label)
    y = np.array(y)
    return mfcc, y

In [7]:
c_mfcc_drone, c_y_drone = mfcc4(c_drone_raw, 3)
m_mfcc_drone, m_y_drone = mfcc4(m_drone_raw, 2)
f_mfcc_drone, f_y_drone = mfcc4(f_drone_raw, 1)
mfcc_background, y_background = mfcc4(background_raw, 0)

print(c_mfcc_drone.shape, c_y_drone.shape)
print(m_mfcc_drone.shape, m_y_drone.shape)
print(f_mfcc_drone.shape, f_y_drone.shape)
print(mfcc_background.shape, y_background.shape)

(2232320,)
small end: (16, 9)
(2232320,)
small end: (16, 9)
(2232320,)
small end: (16, 9)
(23317637,)
small end: (16, 15)
small end: (16, 7)
(544, 16, 16) (544,)
(544, 16, 16) (544,)
(544, 16, 16) (544,)
(5691, 16, 16) (5691,)


In [8]:
X = np.concatenate((c_mfcc_drone,m_mfcc_drone,f_mfcc_drone, mfcc_background), axis=0)
#X = np.concatenate((mfcc_drone), axis=0)
#X = X.reshape(-1, 16,16,1)
y = np.hstack((c_y_drone, m_y_drone, f_y_drone, y_background))
#y = np.hstack(y_drone)
print(X.shape, y.shape)

(7323, 16, 16) (7323,)


In [9]:
n_labels = y.shape[0]
n_unique_labels = 4
y_encoded = np.zeros((n_labels, n_unique_labels))
y_encoded[np.arange(n_labels), y] = 1
print(y_encoded.shape)

(7323, 4)


In [10]:
# Split data
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y_encoded, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = model_selection.train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [11]:
print(X_train.shape, X_test.shape)
print(X_val.shape, y_val.shape)
print(y_train.shape, y_test.shape)

(4686, 16, 16) (1465, 16, 16)
(1172, 16, 16) (1172, 4)
(4686, 4) (1465, 4)


In [12]:
# Save Data
np.save('../../data/X_train_cnn', X_train)
np.save('../../data/X_test_cnn', X_test)
np.save('../../data/X_val_cnn', X_val)
np.save('../../data/y_val_cnn', y_val)
np.save('../../data/y_train_cnn', y_train)
np.save('../../data/y_test_cnn', y_test)

### Until this part

In [13]:
# Load Data
X_train = np.load('../../data/X_train_cnn.npy')
X_test = np.load('../../data/X_test_cnn.npy')
X_val = np.load('../../data/X_val_cnn.npy')
y_val = np.load('../../data/y_val_cnn.npy')
y_train = np.load('../../data/y_train_cnn.npy')
y_test = np.load('../../data/y_test_cnn.npy')

# Experiment 3 - One convolutional layer /w no dropout

##Experiment 3-2
- learning rate 0.005
- pooling stride 1x1
- #filter 1
- best result among every other settings
- cost kept fluctuated during training. (0.8 -> 1.3) -- why is that?

In [14]:
tf.reset_default_graph()

In [15]:
n_mfcc = 16
n_frame = 16
n_classes = 4
n_channels = 1

learning_rate = 0.0002  # 0.005
training_epochs = 500 # 수정해봐

# Layer

In [16]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
X = tf.reshape(X, [-1, n_mfcc, n_frame, n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

conv1 = tf.layers.conv2d(inputs=X, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                padding="SAME", strides=1)
# dropout넣어야하나
conv2 = tf.layers.conv2d(inputs=pool1, filters=1, kernel_size=[3, 3],
                         padding="SAME", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                padding="SAME", strides=1)
# 여기도
flat = tf.reshape(pool2, [-1, 16*16*1])
dense2 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
logits = tf.layers.dense(inputs=dense2, units=4)

In [17]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [18]:
Y_pred = tf.contrib.layers.fully_connected(logits,n_classes,activation_fn = None)

In [19]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [20]:
X_train2 = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test2 = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)
X_val2 = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2], 1)

In [21]:
# model save
model_path = '../../model/CNN/4_cnn_model'
saver = tf.train.Saver()

# Trainning

In [22]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import itertools as it

In [23]:
###########################
batch_size = 32
cost_history = np.empty(shape=[1], dtype=float)
with tf.device("/gpu:0"):
    for epoch in range(training_epochs):#training epoch 500 / batch_size 128 --> acc 90%
        avg_cost = 0
        val_avg_cost =0
        total_batch = int(y_train.shape[0] / batch_size)
        for i in range(0, y_train.shape[0], batch_size):
            feed_dict={X:X_train2[i:i+batch_size,:,:,:], Y:y_train[i:i+batch_size,:]}
            c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
            cost_history = np.append(cost_history,cost)
            avg_cost += c/total_batch 

        y_pred = sess.run(logits, feed_dict={X:X_val2})
        y_pred = sess.run(tf.argmax(y_pred,1))
        y_true = y_val
        
        y_true = sess.run(tf.argmax(y_true,1))
        print(len(y_pred),end=' ')
        print('Epoch:', '%04d' % (epoch+1), 'cost = ', '{:.9f}'.format(avg_cost), 'val = ','%f' %(accuracy_score(y_true, y_pred)) )
saver.save(sess, model_path)

1172 Epoch: 0001 cost =  0.645359016 val =  0.843003
1172 Epoch: 0002 cost =  0.330484204 val =  0.866894
1172 Epoch: 0003 cost =  0.270281284 val =  0.885666
1172 Epoch: 0004 cost =  0.235654838 val =  0.899317
1172 Epoch: 0005 cost =  0.211244871 val =  0.901024
1172 Epoch: 0006 cost =  0.191498321 val =  0.909556
1172 Epoch: 0007 cost =  0.173883245 val =  0.918089
1172 Epoch: 0008 cost =  0.157816821 val =  0.920648
1172 Epoch: 0009 cost =  0.143317553 val =  0.922355
1172 Epoch: 0010 cost =  0.129876732 val =  0.924915
1172 Epoch: 0011 cost =  0.117594395 val =  0.926621
1172 Epoch: 0012 cost =  0.106180098 val =  0.925768
1172 Epoch: 0013 cost =  0.096601810 val =  0.925768
1172 Epoch: 0014 cost =  0.087357859 val =  0.925768
1172 Epoch: 0015 cost =  0.078715266 val =  0.924061
1172 Epoch: 0016 cost =  0.070786714 val =  0.925768
1172 Epoch: 0017 cost =  0.063208975 val =  0.927474
1172 Epoch: 0018 cost =  0.056369638 val =  0.927474
1172 Epoch: 0019 cost =  0.050169211 val =  0.

1172 Epoch: 0156 cost =  0.000000098 val =  0.937713
1172 Epoch: 0157 cost =  0.000000091 val =  0.937713
1172 Epoch: 0158 cost =  0.000000084 val =  0.937713
1172 Epoch: 0159 cost =  0.000000077 val =  0.937713
1172 Epoch: 0160 cost =  0.000000071 val =  0.937713
1172 Epoch: 0161 cost =  0.000000066 val =  0.938567
1172 Epoch: 0162 cost =  0.000000060 val =  0.939420
1172 Epoch: 0163 cost =  0.000000056 val =  0.937713
1172 Epoch: 0164 cost =  0.000000052 val =  0.938567
1172 Epoch: 0165 cost =  0.000000048 val =  0.939420
1172 Epoch: 0166 cost =  0.000000043 val =  0.938567
1172 Epoch: 0167 cost =  0.000000040 val =  0.939420
1172 Epoch: 0168 cost =  0.000000037 val =  0.938567
1172 Epoch: 0169 cost =  0.000000034 val =  0.938567
1172 Epoch: 0170 cost =  0.000000032 val =  0.938567
1172 Epoch: 0171 cost =  0.000000029 val =  0.938567
1172 Epoch: 0172 cost =  0.000000027 val =  0.938567
1172 Epoch: 0173 cost =  0.000000025 val =  0.937713
1172 Epoch: 0174 cost =  0.000000023 val =  0.

1172 Epoch: 0311 cost =  0.000000047 val =  0.938567
1172 Epoch: 0312 cost =  0.000000044 val =  0.938567
1172 Epoch: 0313 cost =  0.000000042 val =  0.938567
1172 Epoch: 0314 cost =  0.000000040 val =  0.938567
1172 Epoch: 0315 cost =  0.000000038 val =  0.938567
1172 Epoch: 0316 cost =  0.000000036 val =  0.937713
1172 Epoch: 0317 cost =  0.000000034 val =  0.937713
1172 Epoch: 0318 cost =  0.000000032 val =  0.937713
1172 Epoch: 0319 cost =  0.000000031 val =  0.937713
1172 Epoch: 0320 cost =  0.000000029 val =  0.937713
1172 Epoch: 0321 cost =  0.000000027 val =  0.937713
1172 Epoch: 0322 cost =  0.000000026 val =  0.937713
1172 Epoch: 0323 cost =  0.000000025 val =  0.937713
1172 Epoch: 0324 cost =  0.000000023 val =  0.937713
1172 Epoch: 0325 cost =  0.000000022 val =  0.937713
1172 Epoch: 0326 cost =  0.000000020 val =  0.937713
1172 Epoch: 0327 cost =  0.000000019 val =  0.938567
1172 Epoch: 0328 cost =  0.000000018 val =  0.939420
1172 Epoch: 0329 cost =  0.000000017 val =  0.

1172 Epoch: 0466 cost =  0.000000057 val =  0.937713
1172 Epoch: 0467 cost =  0.000000054 val =  0.937713
1172 Epoch: 0468 cost =  0.000000052 val =  0.937713
1172 Epoch: 0469 cost =  0.000000050 val =  0.937713
1172 Epoch: 0470 cost =  0.000000047 val =  0.937713
1172 Epoch: 0471 cost =  0.000000045 val =  0.937713
1172 Epoch: 0472 cost =  0.000000043 val =  0.937713
1172 Epoch: 0473 cost =  0.000000041 val =  0.937713
1172 Epoch: 0474 cost =  0.000000039 val =  0.937713
1172 Epoch: 0475 cost =  0.000000038 val =  0.937713
1172 Epoch: 0476 cost =  0.000000036 val =  0.937713
1172 Epoch: 0477 cost =  0.000000034 val =  0.937713
1172 Epoch: 0478 cost =  0.000000033 val =  0.937713
1172 Epoch: 0479 cost =  0.000000031 val =  0.937713
1172 Epoch: 0480 cost =  0.000000030 val =  0.937713
1172 Epoch: 0481 cost =  0.000000028 val =  0.937713
1172 Epoch: 0482 cost =  0.000000027 val =  0.937713
1172 Epoch: 0483 cost =  0.000000026 val =  0.937713
1172 Epoch: 0484 cost =  0.000000025 val =  0.

'../../model/CNN/4_cnn_model'

## Prediction

In [24]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test2})
y_true = sess.run(tf.argmax(y_test,1))

In [25]:
# Print Result

from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.946
Accuracy:  0.9460750853242321
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1113
           1       0.80      0.79      0.79       121
           2       0.75      0.60      0.67       103
           3       0.93      0.98      0.96       128

   micro avg       0.95      0.95      0.95      1465
   macro avg       0.86      0.84      0.85      1465
weighted avg       0.94      0.95      0.94      1465

[[1103    5    3    2]
 [   5   95   18    3]
 [  19   18   62    4]
 [   1    1    0  126]]
