In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display
import librosa.display
import numpy as np
import librosa
import tensorflow as tf
import glob

### If you have been save the data, you don't have to preprocessing and save the data

In [2]:
drone_path_1 = '../../data/1m/*.wav'
drone_path_10 = '../../data/10m/*.wav'
drone_path_20 = '../../data/20m/*.wav'
drone_path_30 = '../../data/30m/*.wav'
drone_path_40 = '../../data/40m/*.wav'
drone_path_50 = '../../data/50m/*.wav'
background_path = '../../data/background/*.wav'

drone_files_1 = glob.glob(drone_path_1)
drone_files_10 = glob.glob(drone_path_10)
drone_files_20 = glob.glob(drone_path_20)
drone_files_30 = glob.glob(drone_path_30)
drone_files_40 = glob.glob(drone_path_40)
drone_files_50 = glob.glob(drone_path_50)
background_files = glob.glob(background_path)

In [3]:
CHUNK_SIZE = 8192
SR = 22050
N_MFCC = 16

In [4]:
def load(files, sr=22050):
    [raw, sr] = librosa.load(files[0], sr=sr)
    for f in files[1:]:
        [array, sr] = librosa.load(f, sr=sr)
        raw = np.hstack((raw, array))
    print(raw.shape)
    return raw

In [5]:
drone_raw_1 = load(drone_files_1)
drone_raw_10 = load(drone_files_10)
drone_raw_20 = load(drone_files_20)
drone_raw_30 = load(drone_files_30)
drone_raw_40 = load(drone_files_40)
drone_raw_50 = load(drone_files_50)

background_raw = load(background_files)

(2088960,)
(2088960,)
(2088960,)
(2072565,)
(2085722,)
(2723840,)
(6032517,)


# Data preprocessing

Chose `mfcc4` among functions below:
- input size 16x16; `n_mfcc=16`, used first 16 data points from 16x17 mfcc of a chunk with size 8192
- chunk siez 8192, overlapped half of it

In [6]:
def mfcc4(raw, label, chunk_size=8192, window_size=4096, sr=22050, n_mfcc=16, n_frame=16):
    mfcc = np.empty((0, n_mfcc, n_frame))
    y = []
    print(raw.shape)
    for i in range(0, len(raw), chunk_size//2):
        mfcc_slice = librosa.feature.mfcc(raw[i:i+chunk_size], sr=sr, n_mfcc=n_mfcc) #n_mfcc,17
        if mfcc_slice.shape[1] < 17:
            print("small end:", mfcc_slice.shape)
            continue
        mfcc_slice = mfcc_slice[:,:-1]
        mfcc_slice = mfcc_slice.reshape((1, mfcc_slice.shape[0], mfcc_slice.shape[1]))
        mfcc = np.vstack((mfcc, mfcc_slice))
        y.append(label)
    y = np.array(y)
    return mfcc, y

In [7]:
mfcc_drone_1, y_drone_1 = mfcc4(drone_raw_1, 0)
mfcc_drone_10, y_drone_10 = mfcc4(drone_raw_10, 1)
mfcc_drone_20, y_drone_20 = mfcc4(drone_raw_20, 2)
mfcc_drone_30, y_drone_30 = mfcc4(drone_raw_30, 3)
mfcc_drone_40, y_drone_40 = mfcc4(drone_raw_40, 4)
mfcc_drone_50, y_drone_50 = mfcc4(drone_raw_50, 5)

mfcc_background, y_background = mfcc4(background_raw, 6)

(2088960,)
small end: (16, 9)
(2088960,)
small end: (16, 9)
(2088960,)
small end: (16, 9)
(2072565,)
small end: (16, 16)
small end: (16, 8)
(2085722,)
small end: (16, 10)
small end: (16, 2)
(2723840,)
small end: (16, 9)
(6032517,)
small end: (16, 15)
small end: (16, 7)


In [8]:
print(mfcc_drone_1.shape, y_drone_1.shape)
print(mfcc_drone_10.shape, y_drone_10.shape)
print(mfcc_drone_20.shape, y_drone_20.shape)
print(mfcc_drone_30.shape, y_drone_30.shape)
print(mfcc_drone_40.shape, y_drone_40.shape)
print(mfcc_drone_50.shape, y_drone_50.shape)
print(mfcc_background.shape, y_background.shape)

(509, 16, 16) (509,)
(509, 16, 16) (509,)
(509, 16, 16) (509,)
(504, 16, 16) (504,)
(508, 16, 16) (508,)
(664, 16, 16) (664,)
(1471, 16, 16) (1471,)


In [9]:
X = np.concatenate((mfcc_drone_1,mfcc_drone_10,mfcc_drone_20,mfcc_drone_30,mfcc_drone_40,mfcc_drone_50, mfcc_background), axis=0)
y = np.hstack((y_drone_1, y_drone_10, y_drone_20, y_drone_30, y_drone_40, y_drone_50, y_background))
print(X.shape, y.shape)

(4674, 16, 16) (4674,)


In [10]:
X = np.reshape(X,(X.shape[0],-1))

In [11]:
X.shape

(4674, 256)

In [12]:
n_labels = y.shape[0]
n_unique_labels = 7
y_encoded = np.zeros((n_labels, n_unique_labels))
y_encoded[np.arange(n_labels), y] = 1
print(y_encoded.shape)

(4674, 7)


In [13]:
# Split data
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y_encoded, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = model_selection.train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [14]:
print(X_train.shape,y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

(2991, 256) (2991, 7)
(748, 256) (748, 7)
(935, 256) (935, 7)


In [15]:
# Save Data
np.save('../../model/X_train', X_train)
np.save('../../model/X_test', X_test)
np.save('../../model/X_val', X_val)
np.save('../../model/y_val', y_val)
np.save('../../model/y_train', y_train)
np.save('../../model/y_test', y_test)

### Until this part

In [16]:
# Load Data
X_train = np.load('../../model/X_train.npy')
X_test = np.load('../../model/X_test.npy')
X_val = np.load('../../model/X_val.npy')
y_val = np.load('../../model/y_val.npy')
y_train = np.load('../../model/y_train.npy')
y_test = np.load('../../model/y_test.npy')

# Experiment 3 - One convolutional layer /w no dropout


##Experiment 3-2
- learning rate 0.005
- pooling stride 1x1
- #filter 1
- best result among every other settings
- cost kept fluctuated during training. (0.8 -> 1.3) -- why is that?

In [17]:
tf.reset_default_graph()

In [18]:
n_mfcc = 16
n_frame = 16
n_classes = 7
n_channels = 1

kernel_size = 3
stride = 1
pad = "SAME"

learning_rate = 0.0002  # 0.005
training_epochs = 500 # 수정해야해

# Layer

## Experiment
- learning rate 0.0002
- no cnn/ pooling 
- just dense with dropout 

In [19]:
X = tf.placeholder(tf.float32, shape=[None,n_mfcc*n_frame*n_channels])
Y = tf.placeholder(tf.float32, shape=[None,n_classes])

keep_prob = tf.placeholder(tf.float32)

dense1 = tf.layers.dense(inputs=X, units=256, activation=tf.nn.relu)
dropout1 = tf.nn.dropout(dense1, keep_prob=keep_prob)
dense2 = tf.layers.dense(inputs=dropout1, units=256, activation=tf.nn.relu)
dropout2 = tf.nn.dropout(dense2, keep_prob=keep_prob)
dense3 = tf.layers.dense(inputs=dropout2, units=512, activation=tf.nn.relu)
dropout3 = tf.nn.dropout(dense3, keep_prob=keep_prob)
dense4 = tf.layers.dense(inputs=dropout3, units=512, activation=tf.nn.relu)
dropout4 = tf.nn.dropout(dense4, keep_prob=keep_prob)
dense5 = tf.layers.dense(inputs=dropout4, units=256, activation=tf.nn.relu)
dropout5 = tf.nn.dropout(dense5, keep_prob=keep_prob)

logits= tf.layers.dense(inputs=dropout5, units=7)

In [20]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [21]:
#sess = tf.Session()
#
config = tf.ConfigProto(allow_soft_placement = True)
sess = tf.Session(config = config)
sess.run(tf.global_variables_initializer())

In [22]:
# model save
model_path = '../../model/DNN/7_dense_model'
saver = tf.train.Saver()

# Trainning

In [23]:
import math

In [24]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
#    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [25]:
from sklearn.metrics import accuracy_score

In [26]:
#original 
batch_size = 32
cost_history = np.empty(shape=[1], dtype=float)
with tf.device("/gpu:0"):
    for epoch in range(1,500):
        avg_cost = 0    
#    total_batch = int(X_train.shape[0] / batch_size)
        for i in range( int(math.ceil(len(X_train)/batch_size)) ): # 배치 사이즈로 나눈 나머지 다 돌 수 있게 
            x_ = X_train[batch_size * i: batch_size * (i + 1)]
            y_ = y_train[batch_size * i: batch_size * (i + 1)]
            feed_dict={X:x_, Y:y_, keep_prob:0.6}    
            c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
            cost_history = np.append(cost_history,cost)
            avg_cost += c
            #accuracy_val = sess.run([accuracy], feed_dict={X:X_val, Y:y_val})
        val_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_val , keep_prob:1})  
        val_true = sess.run(tf.argmax(y_val,1))
        accuracy_val = accuracy_score(val_pred, val_true)
        print('Epoch:', '%04d' % (epoch), 'cost = ', '{:.9f}'.format(avg_cost / len(X_train)), "\t검증 세트 정확도: {:.3f}%".format(accuracy_val * 100),)
saver.save(sess, model_path)

Epoch: 0001 cost =  1.090883516 	검증 세트 정확도: 25.802%
Epoch: 0002 cost =  0.292768560 	검증 세트 정확도: 37.968%
Epoch: 0003 cost =  0.131394966 	검증 세트 정확도: 39.572%
Epoch: 0004 cost =  0.085053965 	검증 세트 정확도: 51.070%
Epoch: 0005 cost =  0.066055403 	검증 세트 정확도: 44.251%
Epoch: 0006 cost =  0.057231019 	검증 세트 정확도: 55.080%
Epoch: 0007 cost =  0.052465839 	검증 세트 정확도: 53.075%
Epoch: 0008 cost =  0.048578476 	검증 세트 정확도: 49.866%
Epoch: 0009 cost =  0.046006017 	검증 세트 정확도: 49.733%
Epoch: 0010 cost =  0.043829467 	검증 세트 정확도: 57.353%
Epoch: 0011 cost =  0.042128849 	검증 세트 정확도: 55.615%
Epoch: 0012 cost =  0.040461005 	검증 세트 정확도: 55.481%
Epoch: 0013 cost =  0.039251292 	검증 세트 정확도: 58.021%
Epoch: 0014 cost =  0.038224727 	검증 세트 정확도: 57.219%
Epoch: 0015 cost =  0.036704645 	검증 세트 정확도: 60.695%
Epoch: 0016 cost =  0.036136555 	검증 세트 정확도: 58.690%
Epoch: 0017 cost =  0.035350478 	검증 세트 정확도: 62.166%
Epoch: 0018 cost =  0.034621868 	검증 세트 정확도: 62.834%
Epoch: 0019 cost =  0.033596163 	검증 세트 정확도: 64.973%
Epoch: 0020 

Epoch: 0159 cost =  0.011857903 	검증 세트 정확도: 84.626%
Epoch: 0160 cost =  0.011795096 	검증 세트 정확도: 83.155%
Epoch: 0161 cost =  0.012230368 	검증 세트 정확도: 83.289%
Epoch: 0162 cost =  0.011599896 	검증 세트 정확도: 83.690%
Epoch: 0163 cost =  0.010789585 	검증 세트 정확도: 83.422%
Epoch: 0164 cost =  0.011517830 	검증 세트 정확도: 84.225%
Epoch: 0165 cost =  0.010541883 	검증 세트 정확도: 85.294%
Epoch: 0166 cost =  0.011526632 	검증 세트 정확도: 81.684%
Epoch: 0167 cost =  0.011913046 	검증 세트 정확도: 82.487%
Epoch: 0168 cost =  0.010915761 	검증 세트 정확도: 82.487%
Epoch: 0169 cost =  0.010717176 	검증 세트 정확도: 84.358%
Epoch: 0170 cost =  0.011216213 	검증 세트 정확도: 82.754%
Epoch: 0171 cost =  0.010205921 	검증 세트 정확도: 82.620%
Epoch: 0172 cost =  0.011207235 	검증 세트 정확도: 85.027%
Epoch: 0173 cost =  0.010580567 	검증 세트 정확도: 83.556%
Epoch: 0174 cost =  0.011498328 	검증 세트 정확도: 84.492%
Epoch: 0175 cost =  0.010626323 	검증 세트 정확도: 84.893%
Epoch: 0176 cost =  0.010954015 	검증 세트 정확도: 83.556%
Epoch: 0177 cost =  0.010066100 	검증 세트 정확도: 83.289%
Epoch: 0178 

Epoch: 0317 cost =  0.005419731 	검증 세트 정확도: 94.251%
Epoch: 0318 cost =  0.006121619 	검증 세트 정확도: 93.048%
Epoch: 0319 cost =  0.006080696 	검증 세트 정확도: 92.914%
Epoch: 0320 cost =  0.005583123 	검증 세트 정확도: 93.583%
Epoch: 0321 cost =  0.005950192 	검증 세트 정확도: 92.914%
Epoch: 0322 cost =  0.005620964 	검증 세트 정확도: 93.717%
Epoch: 0323 cost =  0.006267237 	검증 세트 정확도: 93.182%
Epoch: 0324 cost =  0.005384384 	검증 세트 정확도: 93.182%
Epoch: 0325 cost =  0.005340311 	검증 세트 정확도: 92.380%
Epoch: 0326 cost =  0.005838645 	검증 세트 정확도: 94.920%
Epoch: 0327 cost =  0.006403150 	검증 세트 정확도: 93.583%
Epoch: 0328 cost =  0.006107671 	검증 세트 정확도: 94.118%
Epoch: 0329 cost =  0.004987740 	검증 세트 정확도: 93.717%
Epoch: 0330 cost =  0.006008157 	검증 세트 정확도: 95.187%
Epoch: 0331 cost =  0.005460932 	검증 세트 정확도: 93.850%
Epoch: 0332 cost =  0.005044229 	검증 세트 정확도: 95.588%
Epoch: 0333 cost =  0.005596256 	검증 세트 정확도: 93.316%
Epoch: 0334 cost =  0.005810279 	검증 세트 정확도: 94.251%
Epoch: 0335 cost =  0.004829198 	검증 세트 정확도: 93.182%
Epoch: 0336 

Epoch: 0475 cost =  0.005126151 	검증 세트 정확도: 93.717%
Epoch: 0476 cost =  0.004945075 	검증 세트 정확도: 94.786%
Epoch: 0477 cost =  0.006374626 	검증 세트 정확도: 92.112%
Epoch: 0478 cost =  0.005338669 	검증 세트 정확도: 93.048%
Epoch: 0479 cost =  0.005045999 	검증 세트 정확도: 93.717%
Epoch: 0480 cost =  0.004979462 	검증 세트 정확도: 93.850%
Epoch: 0481 cost =  0.005965601 	검증 세트 정확도: 94.385%
Epoch: 0482 cost =  0.004702282 	검증 세트 정확도: 93.984%
Epoch: 0483 cost =  0.004472178 	검증 세트 정확도: 93.182%
Epoch: 0484 cost =  0.004450052 	검증 세트 정확도: 92.380%
Epoch: 0485 cost =  0.005781753 	검증 세트 정확도: 92.647%
Epoch: 0486 cost =  0.005148717 	검증 세트 정확도: 91.176%
Epoch: 0487 cost =  0.004296308 	검증 세트 정확도: 93.449%
Epoch: 0488 cost =  0.004577401 	검증 세트 정확도: 92.246%
Epoch: 0489 cost =  0.004460933 	검증 세트 정확도: 92.647%
Epoch: 0490 cost =  0.003842208 	검증 세트 정확도: 93.850%
Epoch: 0491 cost =  0.003726343 	검증 세트 정확도: 93.316%
Epoch: 0492 cost =  0.004625736 	검증 세트 정확도: 94.251%
Epoch: 0493 cost =  0.004880330 	검증 세트 정확도: 93.316%
Epoch: 0494 

'../../model/DNN/7_dense_model'

## Prediction

In [27]:
y_pred = sess.run(tf.argmax(logits,1),feed_dict={X: X_test , keep_prob:1}) 
y_true = sess.run(tf.argmax(y_test,1))

In [28]:
# Ptint Result

from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y_true, y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true, y_pred))

F-Score: 0.944
Accuracy:  0.9443850267379679
              precision    recall  f1-score   support

           0       0.99      0.97      0.98       117
           1       0.95      0.88      0.92        94
           2       0.75      0.95      0.84       113
           3       0.99      0.99      0.99       106
           4       0.99      1.00      0.99        88
           5       0.93      0.77      0.84       128
           6       0.99      1.00      0.99       289

   micro avg       0.94      0.94      0.94       935
   macro avg       0.94      0.94      0.94       935
weighted avg       0.95      0.94      0.94       935

[[113   0   1   0   0   3   0]
 [  0  83   8   0   0   3   0]
 [  0   2 107   0   0   2   2]
 [  0   0   0 105   1   0   0]
 [  0   0   0   0  88   0   0]
 [  1   2  26   0   0  99   0]
 [  0   0   0   1   0   0 288]]
