In [None]:
import pandas as pd
import librosa
import numpy as np

In [None]:
# Import data.
folder = '/Users/davidlichacz/Downloads/stern/train/'
train = pd.read_csv(folder + 'train.csv')

# Create column containing the location of each sound file.
train['File'] = train.apply(lambda row: folder + 'Train/' + str(row.ID) + '.wav', axis=1)

# Create array of labels. There are two files that have difficulty being read so drop them from list.
classes, labels = np.unique(train['Class'].drop([183, 930]), return_inverse=True)

In [None]:
# Function that extracts data from sound files.
def func(file_name): 
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

In [None]:
# Define empty array to hold data
array = np.empty((193, ))

# Populate array by applying function 
for index, row in train.iterrows():
    if index not in [183, 930]:
        mfccs,chroma,mel,contrast,tonnetz = func(row.File)
        feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
        array = np.vstack([array, feature])
        

In [None]:
# Delete empty first row of array
array = np.delete(array, 0, 0)

In [None]:
# Save array for future import.
np.save('array.npy', array)

In [None]:
# Create train and test sets, 70/30 split
x = list(range(5433))
cut = 3803
random.shuffle(x)
train_list = x[:cut]
test_list = x[cut:]

x_train = array[[train_list]]
x_test = array[[test_list]]

y_train = labels[[train_list]]
y_test = labels[[test_list]]

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten

In [None]:
# Reshape features for input into neural network
X = x_train.reshape(3803,193,1)
X_test = x_test.reshape(1630, 193, 1)

In [None]:
from keras.utils import to_categorical

# One-hot encode labels
Y = to_categorical(y_train)
Y_test = to_categorical(y_test)

In [None]:
# Create model
model = Sequential()
model.add(Conv1D(100, kernel_size=5, strides=1,
                 activation='relu',
                 batch_input_shape=(None, 193, 1)))
model.add(MaxPooling1D(pool_size=2, strides=2))
model.add(Conv1D(100, 5, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(200, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [None]:
model.build()
model.summary()

In [None]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.SGD(lr=0.01),
              metrics=['accuracy'])

In [None]:
model.fit(X, Y, batch_size=50, epochs=20, verbose=1)

In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])