# Interface 2018/12 Snack sound classification

Classification task of 6 snack bag shaking sounds.

https://shop.cqpub.co.jp/hanbai/books/MIF/MIF201812.html

This notebook download, convert and preprocess dataset.

In [1]:
import sys
sys.path.append('../..')
from lib_train import *
%matplotlib inline

DATAROOT = Path('Snack/data')

{'sampling_rate': 44100, 'duration': 2, 'hop_length': 694, 'fmin': 20, 'fmax': 22050, 'n_mels': 128, 'n_fft': 2560, 'model': 'mobilenetv2', 'labels': ['babystar', 'bbq', 'corn', 'kappaebi', 'potechi', 'vegetable'], 'folder': PosixPath('.'), 'n_fold': 1, 'valid_limit': None, 'random_state': 42, 'test_size': 0.2, 'samples_per_file': 1, 'batch_size': 32, 'learning_rate': 0.0001, 'metric_save_ckpt': 'val_acc', 'epochs': 100, 'verbose': 2, 'best_weight_file': 'best_model_weight.h5', 'rt_process_count': 1, 'rt_oversamples': 10, 'pred_ensembles': 10, 'runtime_model_file': None, 'label2int': {'babystar': 0, 'bbq': 1, 'corn': 2, 'kappaebi': 3, 'potechi': 4, 'vegetable': 5}, 'num_classes': 6, 'samples': 88200, 'rt_chunk_samples': 4410, 'mels_onestep_samples': 4410, 'mels_convert_samples': 92610, 'dims': [128, 128, 1], 'metric_save_mode': 'auto', 'logdir': 'logs', 'data_balancing': 'over_sampling', 'X_train': 'X_train.npy', 'y_train': 'y_train.npy', 'X_test': 'X_test.npy', 'y_test': 'y_test.npy',

Using TensorFlow backend.


## Prepare dataset

In [7]:
! wget http://www.cqpub.co.jp/interface/download/2018/12/Snack-data.zip

--2018-10-30 14:44:06--  http://www.cqpub.co.jp/interface/download/2018/12/Snack-data.zip
Resolving www.cqpub.co.jp (www.cqpub.co.jp)... 219.101.148.16
Connecting to www.cqpub.co.jp (www.cqpub.co.jp)|219.101.148.16|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1008569782 (962M) [application/x-zip-compressed]
Saving to: ‘Snack-data.zip’


2018-10-30 14:45:52 (9.08 MB/s) - ‘Snack-data.zip’ saved [1008569782/1008569782]



In [8]:
! unzip -q Snack-data.zip
! rm Snack-data.zip
! echo Listing sub folders:
! ls Snack/data
! echo Listing some of files:
! ls Snack/data/jack | head

Listing sub folders:
jack  laptop  smartphone  usb  voicerecorder
Listing some of files:
babystar_B_1_0000.wav
babystar_B_1_0001.wav
babystar_B_1_0002.wav
babystar_B_1_0003.wav
babystar_B_1_0004.wav
babystar_B_1_0005.wav
babystar_B_1_0006.wav
babystar_B_1_0007.wav
babystar_B_1_0008.wav
babystar_B_1_0009.wav
ls: write error: Broken pipe


In [2]:
files = list(DATAROOT.glob('*/*[0-9].wav'))
X_files = [str(f) for f in files]
y_labels = [str(f.name).split('_')[0] for f in files]
print('Classes', sorted(list(set(y_labels))))
y = [conf.label2int[y] for y in y_labels]

Classes ['babystar', 'bbq', 'corn', 'kappaebi', 'potechi', 'vegetable']


In [3]:
# Train
train_files = sorted([f for f in files if f.stem[-6] in '1'])
y_labels = [str(f.name).split('_')[0] for f in train_files]
y_train = [conf.label2int[y] for y in y_labels]
XX = mels_build_multiplexed_X(conf, train_files)
X_train, y_train = mels_demux_XX_y(XX, y_train)
np.save('X_train.npy', X_train)
np.save('y_train.npy', y_train)
X_train.shape, y_train.shape

((4799, 128, 128, 1), (4799,))

In [4]:
# Valid
valid_files = sorted([f for f in files if f.stem[-6] in '2'])
y_labels = [str(f.name).split('_')[0] for f in valid_files]
y_valid = [conf.label2int[y] for y in y_labels]

XX = mels_build_multiplexed_X(conf, valid_files)
X_valid, y_valid = mels_demux_XX_y(XX, y_valid)
np.save('X_valid.npy', X_valid)
np.save('y_valid.npy', y_valid)
X_valid.shape, y_valid.shape

((1206, 128, 128, 1), (1206,))

In [5]:
# Valid - list of mic for each samples
mic_valid = [str(f.parent.name) for f in valid_files]
np.save('mic_valid.npy', mic_valid)
mic_valid[:5]

['jack', 'jack', 'jack', 'jack', 'jack']