In [1]:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers
from tensorflow.keras import models
import tensorflow.keras.backend as K
import pathlib
import os
import pickle
import glob
import numpy as np
import pandas as pd
import time
import random
import matplotlib.pyplot as plt
from plotnine import *
import seaborn as sn
%matplotlib inline

import sys
sys.path.append("/tf/model/vggish")

import vggish_params 
import vggish_input
import mel_features

from kapre import STFT, Magnitude, MagnitudeToDecibel
from kapre.composed import get_melspectrogram_layer, get_log_frequency_spectrogram_layer

seed = 30
tf.random.set_seed(seed)
gpus = tf.config.list_physical_devices('GPU')

In [2]:
## need this if training on GPU
## tensorflow, get your shit together

if len(gpus)>0:

    from tensorflow.compat.v1 import ConfigProto
    from tensorflow.compat.v1 import InteractiveSession

    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)

In [3]:
filenames = list(glob.glob('/tf/data/ADReSS-IS2020/Full_wave_enhanced_audio/*/*'))
random.shuffle(filenames)

In [4]:
train_cutoff = int(len(filenames)*0.7)
val_cutoff = int(len(filenames)*0.85)

train_files = filenames[:train_cutoff]
val_files = filenames[train_cutoff:val_cutoff]
test_files = filenames[val_cutoff:]

In [5]:
meta_cc = pd.read_csv('/tf/data/ADReSS-IS2020/cc_meta_data.txt', sep=";", header=0, 
                  names = ['ID', 'Age', 'Gender', 'MMSE'])
meta_cd = pd.read_csv('/tf/data/ADReSS-IS2020/cd_meta_data.txt', sep=";", header=0, 
                      names = ['ID', 'Age', 'Gender', 'MMSE'])

meta = meta_cc.assign(Group = 'cc').append(meta_cd.assign(Group = 'cd')).reset_index()
meta.MMSE = pd.to_numeric(meta.MMSE.replace(' NA', 30))
meta.ID = meta.ID.str.strip()

In [6]:
def createDataset(files):
    
    allExamples = []
    allScores = []
    
    for filename in files:
        examples = vggish_input.wavfile_to_examples(filename)
        allExamples.append(examples)
        id = filename.split("/")[-1].split(".")[0]
        mmse = meta[meta["ID"]==id].iloc[0]["MMSE"]
        for i in range(examples.shape[0]):
            allScores.append(mmse)
    
    return tf.data.Dataset.from_tensor_slices((np.expand_dims(np.vstack(allExamples), axis=-1), np.array(allScores)))

train_dataset = createDataset(train_files)
val_dataset = createDataset(val_files)
test_dataset = createDataset(test_files)

In [7]:
for waveform, y in train_dataset.take(1):
    input_shape = waveform.shape
    print(input_shape)
    print(y)

(96, 64, 1)
tf.Tensor(26, shape=(), dtype=int64)


In [8]:
BATCH_SIZE = 256
SHUFFLE_BUFFER_SIZE = 100

train_dataset_b = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
val_dataset_b = val_dataset.batch(BATCH_SIZE)
test_dataset_b = test_dataset.batch(BATCH_SIZE)

In [9]:
with open("vggish_model_weights.pkl", 'rb') as file:
    model_wts = pickle.load(file)

In [39]:
vggish = models.Sequential()

conv1  = layers.Conv2D(filters=64, kernel_size=(3,3), strides=1, padding='same', activation='relu', name='conv1',
                    input_shape=(vggish_params.NUM_FRAMES, vggish_params.NUM_BANDS, 1))

vggish.add(conv1)

# Set its weights, defined in model_vars
conv1.set_weights([model_wts['vggish/conv1/weights:0'], model_wts['vggish/conv1/biases:0']])
conv1.trainable = True # Freeze this layer

pool1  = layers.MaxPooling2D(pool_size=(2,2), strides=2, name='pool1')
vggish.add(pool1)
# Note: no weights for pooling layers

conv2  = layers.Conv2D(filters=128, kernel_size=(3,3), strides=1, padding='same', activation='relu', name='conv2')
vggish.add(conv2)
conv2.set_weights([model_wts['vggish/conv2/weights:0'], model_wts['vggish/conv2/biases:0']])
conv2.trainable = False

pool2  = layers.MaxPooling2D(pool_size=(2,2), strides=2, name='pool2')
vggish.add(pool2)

conv3_1= layers.Conv2D(filters=256, kernel_size=(3,3), strides=1, padding='same', activation='relu', name='conv3_1')
vggish.add(conv3_1)
conv3_1.set_weights([model_wts['vggish/conv3/conv3_1/weights:0'], model_wts['vggish/conv3/conv3_1/biases:0']])
conv3_1.trainable = False

conv3_2= layers.Conv2D(filters=256, kernel_size=(3,3), strides=1, padding='same', activation='relu', name='conv3_2')
vggish.add(conv3_2)
conv3_2.set_weights([model_wts['vggish/conv3/conv3_2/weights:0'], model_wts['vggish/conv3/conv3_2/biases:0']])
conv3_2.trainable = False

pool3  = layers.MaxPooling2D(pool_size=(2,2), strides=2, name='pool3')
vggish.add(pool3)

conv4_1= layers.Conv2D(filters=512, kernel_size=(3,3), strides=1, padding='same', activation='relu', name='conv4_1')
vggish.add(conv4_1)
conv4_1.set_weights([model_wts['vggish/conv4/conv4_1/weights:0'], model_wts['vggish/conv4/conv4_1/biases:0']])
conv4_1.trainable = False

conv4_2= layers.Conv2D(filters=512, kernel_size=(3,3), strides=1, padding='same', activation='relu', name='conv4_2')
vggish.add(conv4_2)
conv4_2.set_weights([model_wts['vggish/conv4/conv4_2/weights:0'], model_wts['vggish/conv4/conv4_2/biases:0']])
conv4_2.trainable = False

pool4  = layers.MaxPooling2D(pool_size=(2,2), strides=2, name='pool4')
vggish.add(pool4)

vggish.add(layers.Flatten())

fc1_1  = layers.Dense(4096, activation='relu', name='fc1_1')
vggish.add(fc1_1)
fc1_1.set_weights([model_wts['vggish/fc1/fc1_1/weights:0'], model_wts['vggish/fc1/fc1_1/biases:0']])
fc1_1.trainable = False

fc1_2  = layers.Dense(4096, activation='relu', name='fc1_2')
vggish.add(fc1_2)
fc1_2.set_weights([model_wts['vggish/fc1/fc1_2/weights:0'], model_wts['vggish/fc1/fc1_2/biases:0']])
fc1_2.trainable = False

# Note: first trainable layer in network
fc2 = layers.Dense(vggish_params.EMBEDDING_SIZE, activation='relu', name='fc2')
vggish.add(fc2)
fc2.set_weights([model_wts['vggish/fc2/weights:0'], model_wts['vggish/fc2/biases:0']])
fc2.trainable = True

vggish.add(layers.Dropout(0.25))

vggish.add(layers.Dense(128, activation='relu'))

vggish.add(layers.Dropout(0.25))

vggish.add(layers.Dense(64, activation='relu'))

vggish.add(layers.Dropout(0.25))

vggish.add(layers.Dense(64, activation='relu'))

vggish.add(layers.Dropout(0.25))

vggish.add(layers.Dense(1, activation='relu'))

vggish.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 96, 64, 64)        640       
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 48, 32, 64)        0         
_________________________________________________________________
conv2 (Conv2D)               (None, 48, 32, 128)       73856     
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 24, 16, 128)       0         
_________________________________________________________________
conv3_1 (Conv2D)             (None, 24, 16, 256)       295168    
_________________________________________________________________
conv3_2 (Conv2D)             (None, 24, 16, 256)       590080    
_________________________________________________________________
pool3 (MaxPooling2D)         (None, 12, 8, 256)       

In [40]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)

vggish.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=1.0),
    loss='mse',
    metrics='mse'
)

In [None]:
EPOCHS = 10000
history = vggish.fit(
    train_dataset_b, 
    validation_data=val_dataset_b,  
    epochs=EPOCHS,
    callbacks=[callback]
)

Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000
Epoch 62/10000
Epoch 63/10000
Epoch 64/10000
Epoch 65/10000
Epoch 66/10000
Epoch 67/10000
Epoc

Epoch 118/10000
Epoch 119/10000
Epoch 120/10000
 3/23 [==>...........................] - ETA: 1s - loss: 58.1916 - mse: 58.1916

In [None]:
metrics = history.history
plt.plot(history.epoch, np.sqrt(metrics['mse']), np.sqrt(metrics['val_mse']))
plt.legend(['rmse', 'val_rmse'])
plt.show()

In [None]:
np.sqrt(vggish.evaluate(train_dataset.batch(BATCH_SIZE)))

In [None]:
np.sqrt(vggish.evaluate(test_dataset.batch(BATCH_SIZE)))