In [None]:
import os
import soundfile as sf
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.models import Sequential,load_model
from keras.layers.core import Dense, Activation, Flatten
from sklearn.model_selection import train_test_split
from keras.optimizers import SGD, Adam
from keras.callbacks import ModelCheckpoint
import h5py
from pydub import AudioSegment
import os
import datetime
from scipy.io.wavfile import write
import matplotlib.pyplot as plt

## 1. Data Processing

#### Convert Stereo to Mono

In [126]:
def stereo_to_mono(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for filename in os.listdir(input_dir):
        if filename.endswith(".wav"):
            sound = AudioSegment.from_wav(os.path.join(input_dir, filename))
            sound = sound.set_channels(1)
            name = filename.split(".")[0]
            sound.export(output_dir + name + ".wav", format="wav")

In [127]:
stereo_to_mono('data/', 'data_monowavs/')

#### Compress the wav into smaller format

Copy the 'dataProcessing.py' file into the monowavs folder, then run it to compress the wavs files.  
(bitrate = sample rate × number of channels × bits per sample)

#### Feature Extraction

In [5]:
#Feature extraction
def feature_extraction(x,fs):
    frame_length_s = 0.04 # window length in seconds
    frame_length = int(2**np.ceil(np.log2(fs*frame_length_s))) # 40ms window length in samples
    # set an overlap ratio of 50 %
    hop_length = frame_length//2

    # Compute STFT
    _,_,X = signal.stft(x, nfft=frame_length,noverlap=hop_length, fs=fs,nperseg=frame_length)
    number_frequencies, number_time_frames = X.shape[0]//2 -1, X.shape[1]
    X = np.abs(X[0:number_frequencies, :])

    # Segmentation
    segment_length_s = 0.5 # segment length in seconds
    segment_length = int(2**np.ceil(np.log2(segment_length_s/frame_length_s))) # ~0.5s in samples

    # Trim the frames that can't be fitted into the segment size
    trimmed_X = X[:, :-(number_time_frames%segment_length)]

    # Segmentation (number of freqs x number of frames x number of segment x 1). The last dimension is 'channel'.
    features = trimmed_X.reshape((number_frequencies,segment_length,-1,1), order='F')
    # Transpose the feature to be in form (number of segment x number of freqs x number of frames x 1)
    return features.transpose((2,0,1,3))

#### Read all files and extract training and label data

In [6]:
data_folder = 'data_monowavs/'

In [7]:
input_features = []
groundtruth_features = []
for filename in os.listdir(data_folder):
    if filename.endswith(".wav"):
        x,fs = sf.read(data_folder + filename)
        features = feature_extraction(x, fs)
        if "down" in filename:
            input_features.append(features)
        else:   
            groundtruth_features.append(features)
input_features = np.vstack(input_features)
groundtruth_features = np.vstack(groundtruth_features)

In [8]:
groundtruth_features.shape

(958, 511, 16, 1)

In [9]:
input_features.shape

(958, 511, 16, 1)

In [10]:
X_train, X_test, y_train, y_test = train_test_split \
(input_features,groundtruth_features,test_size=0.4,random_state=0)

#### Save features into .h5 files

In [11]:
def save_features(X_train,X_test,y_train,y_test):
    with h5py.File('data.hdf5', 'w') as f:
        f.create_dataset('X_train', data=X_train)
        f.create_dataset('X_test', data=X_test)
        f.create_dataset('y_train', data=y_train)
        f.create_dataset('y_test', data=y_test)

In [12]:
save_features(X_train,X_test,y_train,y_test)

## 2. Model

In [3]:
def get_model(features_shape):
    input_shape = (features_shape[1],features_shape[2], 1)# (number of freqs x number of frames in a segment x number of channels)
    model = Sequential()
    model.add(Conv2D(32, (5, 5),
            input_shape=input_shape,
            activation = "relu",
            padding = "same"))
    # model.add(MaxPooling2D(pool_size=(4, 4)))
    model.add(Conv2D(64, (5, 5),
            activation = "relu",
            padding = "same"))

    model.add(Conv2D(1, (10, 10),
            activation = "relu",
            padding = "same"))

    adam = Adam(lr=0.0003)
    model.compile(optimizer=adam, loss='mean_absolute_error', metrics=['mean_absolute_error'])
    model.summary()
    return model

model = get_model(X_train.shape)

In [4]:
model = get_model(X_train.shape)

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 511, 16, 32)       832       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 511, 16, 64)       51264     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 511, 16, 1)        6401      
Total params: 58,497
Trainable params: 58,497
Non-trainable params: 0
_________________________________________________________________


In [None]:
def read_features(path):
    with h5py.File(path, 'r') as f:
            X_train = f.get('X_train').value
            X_test = f.get('X_test').value
            y_train = f.get('y_train').value
            y_test = f.get('y_test').value
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = read_features('data.hdf5')

In [5]:
model.fit(X_train, y_train, batch_size=32, validation_data=(X_test, y_test), shuffle=True, epochs=100)

Instructions for updating:
Use tf.cast instead.
Train on 574 samples, validate on 384 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch

<keras.callbacks.History at 0x7f2cf5bc4860>

In [6]:
model.save('model-{date:%Y-%m-%d %H:%M:%S}.h5'.format( date=datetime.datetime.now() ))

In [7]:
model.fit(X_train, y_train, batch_size=16, validation_data=(X_test, y_test), shuffle=True, epochs=200)

Train on 574 samples, validate on 384 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
E

<keras.callbacks.History at 0x7f2cec59b940>

In [8]:
model.save('model-{date:%Y-%m-%d %H:%M:%S}.h5'.format( date=datetime.datetime.now() ))

In [9]:
model.fit(X_train, y_train, batch_size=64, validation_data=(X_test, y_test), shuffle=True, epochs=300)

Train on 574 samples, validate on 384 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
E

<keras.callbacks.History at 0x7f2cfdc3b400>

In [10]:
model.save('model-{date:%Y-%m-%d %H:%M:%S}.h5'.format( date=datetime.datetime.now() ))

In [None]:
def get_model(features_shape):
    input_shape = (features_shape[1],features_shape[2], 1)# (number of freqs x number of frames in a segment x number of channels)
    model = Sequential()
    model.add(Conv2D(32, (5, 5),
            input_shape=input_shape,
            activation = "relu",
            padding = "same"))
    # model.add(MaxPooling2D(pool_size=(4, 4)))
    model.add(Conv2D(64, (5, 5),
            activation = "relu",
            padding = "same"))

    model.add(Conv2D(1, (10, 10),
            activation = "relu",
            padding = "same"))

    adam = Adam(lr=0.05)
    model.compile(optimizer=adam, loss='mean_absolute_error', metrics=['mean_absolute_error'])
    model.summary()
    return model

model = get_model(X_train.shape)

In [None]:
model.fit(X_train, y_train, batch_size=16, validation_data=(X_test, y_test), shuffle=True, epochs=100)

In [None]:
model.save('model-{date:%Y-%m-%d %H:%M:%S}.h5'.format( date=datetime.datetime.now() ))