In [1]:
from glob import glob
import numpy as np
from keras import Sequential
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Dropout, LSTM, ReLU, Bidirectional, CuDNNLSTM, ELU
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler, LabelEncoder
data_path = "/Users/soltan/Programs/kaggle/raw_data/train/train/*"

import librosa

Using TensorFlow backend.


In [2]:
def normalize(img):
    '''
    Normalizes an array
    (subtract mean and divide by standard deviation)
    '''
    eps = 0.001
    if np.std(img) != 0:
        img = (img - np.mean(img)) / np.std(img)
    else:
        img = (img - np.mean(img)) / eps
    return img


def extract_features(file_name):
    sound_clip, sr = librosa.load(file_name, sr=None)
    spectrogram = librosa.feature.melspectrogram(sound_clip,
                                                 sr=sr)
    raw = librosa.power_to_db(spectrogram).astype(np.float32)
    return normalize(raw)

In [3]:
ft = extract_features("../raw_data/train/train/100002-1.wav")
print(ft.shape)

(128, 32)


In [4]:
from tqdm import tqdm
data_dir = np.array(glob(data_path))
features, labels = [], []
for file in tqdm(data_dir):
    file_name = file.split("/")[-1]
    file_name = file.split(".")[0]
    name, label = file_name.split("-")[0], file_name.split("-")[1]
    features.append(extract_features(file))
    labels.append(label)



100%|██████████| 9000/9000 [00:59<00:00, 151.28it/s]


In [5]:
from sklearn.model_selection import train_test_split
inputs_train, inputs_test, targets_train, targets_test = train_test_split(features, labels, test_size=0.2)

In [6]:

ss = StandardScaler()
X_train = np.array(inputs_train)
X_val = np.array( inputs_test )


lb = LabelEncoder()
y_train = to_categorical(lb.fit_transform(targets_train))
y_val = to_categorical(lb.fit_transform(targets_test))


In [7]:
from tensorflow.keras.layers import Attention

input_shape = X_train[0].shape
optimizer = Adam()
n_classes = 2

model = Sequential()

model.add(LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True, input_shape=input_shape))
model.add(LSTM(units=32,  dropout=0.05, recurrent_dropout=0.35, return_sequences=False))
model.add(Dense(units=n_classes, activation="softmax"))
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['acc'])
early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')


In [None]:
history = model.fit(X_train, y_train, batch_size=200, epochs=35,
                    validation_data=(X_val, y_val),
                    callbacks=[early_stop])

Train on 7200 samples, validate on 1800 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35


In [None]:
test_path = "/Users/soltan/Programs/kaggle/raw_data/test/test/*"
from tqdm import tqdm
test_dir = np.array(glob(test_path))

fout = open("../submission.txt", "w")
fout.write("name,label\n")
for file in tqdm(test_dir):
    name = file.split("/")[-1]
    ft = extract_features(file)
    ft = ss.transform([ft])
    pred = model.predict_classes([ft])[0]
    fout.write("{},{}\n".format(name, pred))

fout.close()

