In [1]:
import os
import glob
import numpy as np
from tqdm import tqdm
import librosa
import pandas as pd
from sklearn.utils import shuffle
from keras.utils import np_utils
import time

from keras.models import Model
from keras.layers import Conv1D, MaxPooling1D, Input, Conv2D, MaxPooling2D
from keras.models import Sequential
from keras.layers import Dense, Bidirectional, LSTM, BatchNormalization, Dropout, Flatten, Embedding
from keras.optimizers import SGD
from sklearn.metrics import confusion_matrix

def save_model(model_type):
    return "_".join(time.ctime().split(" ")).replace(":", "-") +"_{}_.h5".format(model_type)

Using TensorFlow backend.


In [2]:
files = 'audio_train/'
all_files = glob.glob('audio_train/*.wav')
print(len(all_files))
all_files[:10]
# np.random.shuffle(new_files)

9473


['audio_train/e6949d46.wav',
 'audio_train/e2e24e19.wav',
 'audio_train/121749b9.wav',
 'audio_train/03e88255.wav',
 'audio_train/497e8e00.wav',
 'audio_train/b4ef7f66.wav',
 'audio_train/113c5e2f.wav',
 'audio_train/973bef4c.wav',
 'audio_train/384f43cf.wav',
 'audio_train/673defaf.wav']

In [3]:
df_train_info = pd.read_csv("train.csv")
verified = df_train_info [df_train_info.manually_verified == 1]
print(f'verified shape:{verified.shape}')
verified.head()

verified shape:(3710, 3)


Unnamed: 0,fname,label,manually_verified
1,001ca53d.wav,Saxophone,1
3,0033e230.wav,Glockenspiel,1
4,00353774.wav,Cello,1
6,003da8e5.wav,Knock,1
7,0048fd00.wav,Gunshot_or_gunfire,1


In [4]:
files_to_load = verified.fname.to_list()
print(len(files_to_load))
files_to_load[1]

3710


'0033e230.wav'

In [5]:
mydict = dict(zip(verified.fname, verified.label))
npy_files = glob.glob('single_label_npy2/*.npy')
# assert(len(npy_files) == 3709)

cleanup = []
for f in tqdm(npy_files):
    data = np.load(f)
    # Parse out the sub items here
    songid = f.split("/")[-1].split(".")[0]
    genre = mydict.get(songid+'.wav')
    cleanup.append({
        'data': data.T, 
        'genre': genre,
        'songid': songid,
        'unique_song': songid + "," + genre
    })

cleanup = pd.DataFrame.from_records(cleanup)
cleanup['shape'] = cleanup['data'].map(lambda x: x.shape)

100%|██████████| 3709/3709 [00:03<00:00, 1129.02it/s]


In [6]:
print(cleanup.shape)
cleanup.head()

(3709, 5)


Unnamed: 0,data,genre,songid,unique_song,shape
0,"[[0.00033258431907888216, 0.000255535323233157...",Saxophone,001ca53d,"001ca53d,Saxophone","(432, 128)"
1,"[[0.00013988338396967164, 2.3834661453106825e-...",Glockenspiel,0033e230,"0033e230,Glockenspiel","(164, 128)"
2,"[[0.10736758412714446, 0.05325495966526957, 0....",Cello,00353774,"00353774,Cello","(195, 128)"
3,"[[6.98744115673468e-05, 4.795149790012115e-05,...",Knock,003da8e5,"003da8e5,Knock","(28, 128)"
4,"[[0.0013208101030349694, 0.0005771960559312855...",Gunshot_or_gunfire,0048fd00,"0048fd00,Gunshot_or_gunfire","(45, 128)"


In [7]:
def get_subarrays(row):
    return [row[pos:pos + 3,] for pos in range(0, len(row),3)]

cleanup['batches'] = cleanup['data'].map(get_subarrays)

In [8]:
genres = {x:idx for idx, x in enumerate(cleanup['genre'].unique())}
print(f'number of genres:{len(genres)}')
genres

number of genres:41


{'Saxophone': 0,
 'Glockenspiel': 1,
 'Cello': 2,
 'Knock': 3,
 'Gunshot_or_gunfire': 4,
 'Hi-hat': 5,
 'Laughter': 6,
 'Flute': 7,
 'Telephone': 8,
 'Bark': 9,
 'Scissors': 10,
 'Gong': 11,
 'Microwave_oven': 12,
 'Shatter': 13,
 'Harmonica': 14,
 'Bass_drum': 15,
 'Oboe': 16,
 'Bus': 17,
 'Tambourine': 18,
 'Keys_jangling': 19,
 'Electric_piano': 20,
 'Clarinet': 21,
 'Fireworks': 22,
 'Meow': 23,
 'Double_bass': 24,
 'Cough': 25,
 'Acoustic_guitar': 26,
 'Violin_or_fiddle': 27,
 'Snare_drum': 28,
 'Squeak': 29,
 'Finger_snapping': 30,
 'Writing': 31,
 'Trumpet': 32,
 'Drawer_open_or_close': 33,
 'Cowbell': 34,
 'Tearing': 35,
 'Fart': 36,
 'Chime': 37,
 'Burping_or_eructation': 38,
 'Computer_keyboard': 39,
 'Applause': 40}

In [9]:
train_df = cleanup.groupby('genre', group_keys=False).apply(lambda x: x.sample(frac=0.8, random_state=1))
valid_df = cleanup[~cleanup['unique_song'].isin(train_df['unique_song'].tolist())]
print(train_df.shape)
print(valid_df.shape)

(2970, 6)
(739, 6)


In [10]:
def get_features_labels(df):
    xs = []
    ys = []

    for index, row in df.iterrows():
        batched_arrs = row['batches']
        genre_id = genres[row
                          ['genre']]

        for array_set in batched_arrs:
            if array_set.shape == (3,128):
                xs.append(array_set)
                ys.append(genre_id)

    features = np.asarray(xs)
    labels = np.asarray(ys)
    
    assert(features.shape[0] == labels.shape[0])
    return features, np_utils.to_categorical(labels)

X_train, y_train = get_features_labels(train_df)
X_valid, y_valid = get_features_labels(valid_df)

X_train, y_train = shuffle(X_train, y_train,  random_state=42)
X_valid, y_valid = shuffle(X_valid, y_valid,  random_state=42)

print(X_train.shape,
y_train.shape,
X_valid.shape,
y_valid.shape)

print(np.bincount([np.argmax(x) for x in y_train]))
print(np.bincount([np.argmax(x) for x in y_valid]))

(191550, 3, 128) (191550, 41) (45339, 3, 128) (45339, 41)
[17718  1796  7221  1725  6835  1891  4894 10077  7979  3425  4094  3964
  2426  1798  5546   630  3165 10722  1335  2671  8985  9029  3482  2188
  2511  1868  7599 12882  2390  2504   845  5230  5705  2390   603  2018
  1556  6278  1141  6876  5558]
[5267  390 2378  457 1148  610 1547 2689 2208  444 1016  900  459  407
 1315  111 1073 2569  199  507 2133 1997  852  505  903  355 1230 2522
  445  314  168  639 1070  474  307  685  369 1734  355 1310 1278]


In [11]:
from keras.callbacks import EarlyStopping 

In [12]:
xt = X_train.reshape(X_train.shape[0], 3, 128, 1)
xv = X_valid.reshape(X_valid.shape[0], 3, 128, 1)

print(xt.shape)

model = Sequential()
model.add(Conv2D(16, (3, 3), activation='relu', padding='same',input_shape=(3, 128, 1)))


model.add(Conv2D(32, (3, 3), activation='relu', padding='same',input_shape=(3, 128, 1)))
model.add(Conv2D(54, (3, 3), activation='relu', padding='same',input_shape=(3, 128, 1)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same',input_shape=(3, 128, 1)))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(41, activation='softmax'))
model.summary()

(191550, 3, 128, 1)
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 3, 128, 16)        160       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 128, 32)        4640      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 3, 128, 54)        15606     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 3, 128, 128)       62336     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 1, 64, 128)        0         
_________________________________________________________________
d

In [13]:
# es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print('Train...')
model.fit(xt, y_train,
          batch_size=64,
          nb_epoch=15 ,validation_data=(xv, y_valid), verbose=1, shuffle=False) 

Train...
Instructions for updating:
Use tf.cast instead.


  if __name__ == '__main__':


Train on 191550 samples, validate on 45339 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f0dc4b0ea58>

In [123]:
my_model = model.save(save_model('2dcnn_spec2'))