In [None]:
# import libraries
!pip install tensorflow
# tensorflow_io 0.28 is compatible with TensorFlow 2.11
!pip install tensorflow_io

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
#import pacakges
import os

from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio

# **Load the Yamnet model**

In [None]:
# load yamnet model
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

In [None]:
# classes which the Yamnet model can classify into
class_map_path = yamnet_model.class_map_path().numpy().decode('utf-8')
class_names =list(pd.read_csv(class_map_path)['display_name'])

for name in class_names[:20]:
  print(name)
print('...')

Speech
Child speech, kid speaking
Conversation
Narration, monologue
Babbling
Speech synthesizer
Shout
Bellow
Whoop
Yell
Children shouting
Screaming
Whispering
Laughter
Baby laughter
Giggle
Snicker
Belly laugh
Chuckle, chortle
Crying, sobbing
...


# **Data preprocessing**

In [None]:
# Utility functions for loading audio files and making sure the sample rate is correct.
@tf.function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [None]:
# a fold column for train-valid split. The training data will be split into an 8:2 ratio. 
# Only 250 wav files are utilized for training because the extract_embeddings function finds N frames for one file, which can cause the number of training data to blow.
import random
all_folds = [1] * 320 + [5] * 80
random.shuffle(all_folds)

In [None]:
# load the path of control (non-patient) and dementia (patient) datasets
import glob
control_filenames=glob.glob("/content/drive/MyDrive/Colab Notebooks/dementia/English/Pitt/train/control/*.wav")
control_labels=(len(control_filenames[0:200]))*[0]
demantia_filenames=glob.glob("/content/drive/MyDrive/Colab Notebooks/dementia/English/Pitt/train/dementia/*.wav")
dementia_labels=(len(demantia_filenames[0:200]))*[1]

In [None]:
# combine two datasets into one 
all_filenames=control_filenames[0:200]+demantia_filenames[200:400]
all_labels=control_labels+dementia_labels

In [None]:
# put training data into a DataFrame to visualize the training data
df = pd.DataFrame({'filename': all_filenames, 'label': all_labels, 'fold':all_folds})
print(df)

                                              filename  label  fold
0    /content/drive/MyDrive/Colab Notebooks/dementi...      0     5
1    /content/drive/MyDrive/Colab Notebooks/dementi...      0     1
2    /content/drive/MyDrive/Colab Notebooks/dementi...      0     1
3    /content/drive/MyDrive/Colab Notebooks/dementi...      0     1
4    /content/drive/MyDrive/Colab Notebooks/dementi...      0     1
..                                                 ...    ...   ...
395  /content/drive/MyDrive/Colab Notebooks/dementi...      1     1
396  /content/drive/MyDrive/Colab Notebooks/dementi...      1     1
397  /content/drive/MyDrive/Colab Notebooks/dementi...      1     1
398  /content/drive/MyDrive/Colab Notebooks/dementi...      1     1
399  /content/drive/MyDrive/Colab Notebooks/dementi...      1     1

[400 rows x 3 columns]


In [None]:
# shuffle the Dataframe and the order of datasets during the training will be random
df=df.sample(frac=1)

In [None]:
# load paths, labels and folds into a tf.Dataset objectt
filenames = df['filename']
labels = df['label']
folds = df['fold']

main_ds= tf.data.Dataset.from_tensor_slices((filenames, labels, folds))
main_ds.element_spec

(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [None]:
def load_wav_for_map(filename, label, fold):
  return load_wav_16k_mono(filename), label, fold

In [None]:
# load and resample the training data
main_ds = main_ds.map(load_wav_for_map)
main_ds.element_spec

(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [None]:
# Yamnet is a feature extractor to extract features from the audio file.
def extract_embedding(wav_data, label, fold):
  ''' run YAMNet to extract embedding from the wav data '''
  scores, embeddings, spectrogram = yamnet_model(wav_data)
  num_embeddings = tf.shape(embeddings[0:20])[0]
  return (embeddings[0:20],
            tf.repeat(label, num_embeddings),
            tf.repeat(fold, num_embeddings))

In [None]:
# Extract embeddings using Yamnet
main_ds = main_ds.map(extract_embedding).unbatch()
main_ds.element_spec

(TensorSpec(shape=(1024,), dtype=tf.float32, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [None]:
# split the data into train and validation set
cached_ds = main_ds.cache()
train_ds = cached_ds.filter(lambda embedding, label, fold: fold < 4)
val_ds = cached_ds.filter(lambda embedding, label, fold: fold == 5)

# remove the folds column now that it's not needed anymore
remove_fold_column = lambda embedding, label, fold: (embedding, label)

train_ds = train_ds.map(remove_fold_column)
val_ds = val_ds.map(remove_fold_column)

train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)

## Create the model


In [None]:
# Build and compile the model
my_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024), dtype=tf.float32,
                          name='input_embedding'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(2)
], name='my_model')

my_model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_14 (Dense)            (None, 512)               524800    
                                                                 
 dense_15 (Dense)            (None, 2)                 1026      
                                                                 
Total params: 525,826
Trainable params: 525,826
Non-trainable params: 0
_________________________________________________________________


In [None]:
my_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 optimizer='adam',
                 metrics=['accuracy'])

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                            patience=10,
                                            restore_best_weights=True)

In [None]:
history = my_model.fit(train_ds,
                       epochs=50, batch_size=16, 
                       validation_data=val_ds,
                       callbacks=callback)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50


# Test the model using a ADRess 2021 data

In [None]:
# Perform same data preprocessing as the training data
import glob
control_test_filenames=glob.glob('/content/drive/MyDrive/Colab Notebooks/dementia/English/ADReSS-2021/audio/control/*.wav')
control_test_labels=(len(control_test_filenames))*[0]
dementia_test_filenames=glob.glob('/content/drive/MyDrive/Colab Notebooks/dementia/English/ADReSS-2021/audio/dementia/*.wav')
dementia_test_labels=(len(dementia_test_filenames))*[1]
all_test_filenames=control_test_filenames+dementia_test_filenames
all_test_labels=control_test_labels+dementia_test_labels

In [None]:
all_test_folds = [6]*1444

In [None]:
test_ds=tf.data.Dataset.from_tensor_slices((all_test_filenames, all_test_labels, all_test_folds))

In [None]:
test_ds = test_ds.map(load_wav_for_map)
test_ds.element_spec

(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None),
 TensorSpec(shape=(), dtype=tf.int32, name=None),
 TensorSpec(shape=(), dtype=tf.int32, name=None))

In [None]:
test_ds = test_ds.map(extract_embedding).unbatch()
test_ds.element_spec

(TensorSpec(shape=(1024,), dtype=tf.float32, name=None),
 TensorSpec(shape=(), dtype=tf.int32, name=None),
 TensorSpec(shape=(), dtype=tf.int32, name=None))

In [None]:
remove_fold_column = lambda embedding, label, fold: (embedding, label)

In [None]:
test_ds = test_ds.map(remove_fold_column)

In [None]:
test_ds = test_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)

In [None]:
# Evaluate the model using the test set
loss, accuracy = my_model.evaluate(test_ds, verbose=1)
print(loss)
print(accuracy)

1.1386741399765015
0.5


In [None]:
# Save the model
my_model.save('/content/drive/MyDrive/Colab Notebooks/dementia/tensorflow_model_0510.h5')

In [None]:
# Load the model
my_model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/dementia/tensorflow_model.h5')