In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation

In [2]:
# this is just a demo     the data here are just 10,000 sample instances from the gmd set
# these npy files are generated using the code of "Training Data Transformation" file

X1 = np.load('gmd_stft_sample.npy')
X2 = np.load('gmd_mel_sample.npy')
X3 = np.load('gmd_mfcc_sample.npy')

# the label here have only the most common 3 classes: "D", "S", "H"
y = np.load('gmd_3label.npy')

X1.shape, X2.shape, X3.shape, y.shape

((10000, 1025, 11, 1), (10000, 128, 11, 1), (10000, 20, 11, 1), (10000, 3))

In [3]:
x_train, x_test, y_train, y_test = train_test_split(X1, y, test_size=0.2, random_state=42)
x_train.shape,x_test.shape

((8000, 1025, 11, 1), (2000, 1025, 11, 1))

### Primary Choice: ConvNet

In [4]:
model = Sequential()                   # initialize an empty network

model.add(Conv2D(10, 3, 1, input_shape = (1025,11,1),  activation = 'relu'))          # Convolutional Layer
     
    # 10 (3,3) size kernels    moving at stride (1,1) over the entire input to convolve features
    # leaky relu might be another choice of activation

model.add(MaxPooling2D(pool_size=(2,2)))                                              # Pooling layer (dimension reduction)

model.add(Conv2D(20, 3, 1, activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))

# model.add(Dropout(0.2))                                                             Dropout Layer

model.add(Flatten())                                                                  # Flattens 2d feature maps to 1d 
model.add(Dense(128, activation = 'relu'))                                            # fully-connected layer
# model.add(Dropout(0.2))

model.add(Dense(3, activation = 'sigmoid'))                                           # output layer
                                                                                      # multi-label activation : sigmoid

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['categorical_accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 1023, 9, 10)       100       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 511, 4, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 499, 2, 20)        7820      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 249, 1, 20)        0         
_________________________________________________________________
flatten (Flatten)            (None, 4980)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               637568    
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 3

In [5]:
# this is just for demo so there's only 5 training interation here

history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    validation_split=0.2, 
                    epochs=5, 
                    batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### 1D Conv Network

Just put in the raw 1-dimensional audio_wav as input to convolve. (Not a good choice)

In [6]:
# just turn every audio_wav_resample row into 1d numpy array, and stacked all together as a 3d array of
# (number of instances, length of audio_wav_resample, 1 channel)

X4 = np.load('gmd_1d_wav.npy')
X4.shape, y.shape

((10000, 5513, 1), (10000, 3))

In [7]:
x_train, x_test, y_train, y_test = train_test_split(X4, y, test_size=0.2, random_state=42)

x_train.shape, y_train.shape

((8000, 5513, 1), (8000, 3))

In [8]:
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.optimizers import Adam

In [9]:
model = Sequential()

model.add(Conv1D(24, 3, activation='relu', input_shape=(5513, 1)))
model.add(MaxPooling1D(2))

model.add(Conv1D(48, 3, activation='relu'))
model.add(MaxPooling1D(2))

model.add(Flatten())

model.add(Dense(64, activation='relu'))
model.add(Dense(3, activation='sigmoid'))

model.compile(Adam(lr=.01), loss='categorical_crossentropy', metrics=['categorical_accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 5511, 24)          96        
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 2755, 24)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 2753, 48)          3504      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 1376, 48)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 66048)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4227136   
_________________________________________________________________
dense_3 (Dense)              (None, 3)                

In [10]:
history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    validation_split=0.2, 
                    epochs=5, 
                    batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Simple Linear Model

A network of multiple fully-connected layers, basically a large linear probability regression model. 

In [12]:
def Linear_Network():
    inputs = keras.layers.Input(shape=(5513))

    x = keras.layers.Dense(256, activation="relu", name="dense_1")(inputs)
    x = keras.layers.Dropout(0.2, name="dropout_1")(x)

    x = keras.layers.Dense(64, activation="relu", name="dense_2")(x)
    x = keras.layers.Dropout(0.25, name="dropout_2")(x)

    outputs = keras.layers.Dense(3, activation="sigmoid", name="ouput")(
        x
    )

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss=keras.losses.CategoricalCrossentropy(),
        metrics=["categorical_accuracy", keras.metrics.AUC(name="auc")],
    )

    return model

model = Linear_Network()
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 5513)]            0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               1411584   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                16448     
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
ouput (Dense)                (None, 3)                 195       
Total params: 1,428,227
Trainable params: 1,428,227
Non-trainable params: 0
___________________________________________________

In [13]:
# the input is the same as the above 1D conv network

history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    validation_split=0.2, 
                    epochs=5, 
                    batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
