In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Conv2D, MaxPooling2D
from tensorflow.keras.layers import Flatten, Dense, Dropout, Activation
from tensorflow.keras.optimizers import Adam

In [2]:
# this is just a demo     the data here are just 10,000 sample instances from the gmd set
# these npy files are generated using the code of "Training Data Transformation" file

X1 = np.load('gmd_stft_sample.npy')
y = np.load('gmd_label_sample.npy')

X1.shape, y.shape

((10000, 1025, 8, 1), (10000, 10))

In [3]:
x_train, x_test, y_train, y_test = train_test_split(X1, y, test_size=0.2, random_state=42)
x_train.shape,x_test.shape

((8000, 1025, 8, 1), (2000, 1025, 8, 1))

### Primary Choice: ConvNet

In [4]:
model = Sequential()                   # initialize an empty network

model.add(Conv2D(10, 3, 1, input_shape = (1025,8,1),  activation = 'relu', padding='same'))          # Convolutional Layer

    # 10 (3,3) size kernels    moving at stride (1,1) over the entire input to convolve features
    # leaky relu might be another choice of activation

model.add(MaxPooling2D(pool_size=(2,2)))                                              # Pooling layer (dimension reduction for feature maps)

model.add(Conv2D(20, 3, 1, activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))

# model.add(Dropout(0.2))                                                             # Dropout Layer

model.add(Flatten())                                                                  # Flattens 2d feature maps to 1d 
model.add(Dense(1024, activation = 'relu'))                                           # fully-connected layer
model.add(Dense(1024, activation = 'relu')) 
model.add(Dropout(0.2))

model.add(Dense(10, activation = 'sigmoid'))                                          # output layer : 10 nodes representing each class
                                                                                      # multi-label activation : sigmoid
    
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 1025, 8, 10)       100       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 512, 4, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 510, 2, 20)        1820      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 255, 1, 20)        0         
_________________________________________________________________
flatten (Flatten)            (None, 5100)              0         
_________________________________________________________________
dense (Dense)                (None, 1024)              5223424   
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              1

In [5]:
# this is just for demo so there's only 4 training interation here

history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    validation_split=0.2, 
                    epochs=4, 
                    batch_size=64)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


### 1D Conv Network

Just put in the raw 1-dimensional audio_wav as input to convolve. (Not a good choice)

In [6]:
# just turn every audio_wav row into 1d numpy array, and stacked all together as a 3d array of
# (number of instances, length of audio_wav, 1 channel)

X2 = np.load('gmd_1d_sample.npy')
X2.shape, y.shape

((10000, 4000, 1), (10000, 10))

In [7]:
x_train, x_test, y_train, y_test = train_test_split(X2, y, test_size=0.2, random_state=42)

x_train.shape, y_train.shape

((8000, 4000, 1), (8000, 10))

In [8]:
model = Sequential()

model.add(Conv1D(10, 3, activation='relu', input_shape=(4000, 1)))
model.add(MaxPooling1D(2))

model.add(Conv1D(20, 3, activation='relu'))
model.add(MaxPooling1D(2))

model.add(Flatten())

model.add(Dense(1024, activation='relu'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(10, activation='sigmoid'))

model.compile(Adam(lr=.01), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 3998, 10)          40        
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 1999, 10)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 1997, 20)          620       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 998, 20)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 19960)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 1024)              20440064  
_________________________________________________________________
dense_4 (Dense)              (None, 1024)             

In [9]:
history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    validation_split=0.2, 
                    epochs=5, 
                    batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Simple Linear Model

A network of multiple fully-connected layers, basically a super large linear probability regression model. 

In [10]:
def Linear_Network():
    inputs = keras.layers.Input(shape=(4000))

    x = keras.layers.Dense(1024, activation="relu", name="dense_1")(inputs)
    x = keras.layers.Dropout(0.2, name="dropout_1")(x)

    x = keras.layers.Dense(1024, activation="relu", name="dense_2")(x)
    x = keras.layers.Dropout(0.3, name="dropout_2")(x)

    outputs = keras.layers.Dense(10, activation="sigmoid", name="ouput")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        Adam(learning_rate=.001), 
        loss='binary_crossentropy', 
        metrics=['accuracy']
    )

    return model

model = Linear_Network()
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 4000)]            0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              4097024   
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
ouput (Dense)                (None, 10)                10250     
Total params: 5,156,874
Trainable params: 5,156,874
Non-trainable params: 0
___________________________________________________

In [11]:
# the input is the same as the above 1D conv network

history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    validation_split=0.2, 
                    epochs=5, 
                    batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
