In [12]:
from __future__ import print_function
import os
import tensorflow as tf
import numpy as np
import sklearn
from sklearn.model_selection import GridSearchCV
from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix, classification_report
# from plot_metrics import plot_accuracy, plot_loss, plot_roc_curve

from keras.models import Model
from keras.layers import Input, Conv1D, BatchNormalization, Activation, \
                         Dropout, MaxPooling1D, GlobalAveragePooling1D, \
                         GlobalMaxPooling1D, Lambda, Concatenate, Dense, regularizers
from keras.utils import np_utils
from keras import backend as K
from keras import optimizers, activations
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import load_model
np.random.seed(15)  # for reproducibility

os.environ["CUDA_VISIBLE_DEVICES"]="0"

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
K.set_session(sess)
print("Using GPU: ", K.tensorflow_backend._get_available_gpus())

K.set_image_dim_ordering('tf')

Using GPU:  ['/job:localhost/replica:0/task:0/device:GPU:0']


In [7]:

def retrieve_file(file_name):
    path = '../feedforward/'
    outfile = path + file_name
    X = np.load(outfile)
    X = X['arr_0']
    return X

def retrieve_file_no_aug(file_name):
    path = '../data/raw_data/processed_topic/'
    outfile = path + file_name
    X = np.load(outfile)
    X = X['arr_0']
    return X

def preprocess(X, max_len, num_bins):
    """
    Preprocess input Xs to a numpy array where every training sample is zero padded 
    to constant time dimension (max_len) and contains num_bins frequency bins.
    
    Args:
        X: numpy array of numpy arrays (X), each of which is of different time dimension 
           but same mel dimension (usually 128)
        max_len: Length up to which each np.array in X is padded with 0s
        num_bins: Constant mel dimension
    
    Returns:
        X_proc: single numpy array of shape (X.shape[0], max_len, num_bins), which is fed into 1D CNN  
    """
    X_proc = np.zeros([X.shape[0], max_len, num_bins])
    for idx, x in enumerate(X):
        if x.shape[0] < max_len:
            # Pad sequence (only in time dimension) with 0s
            x = np.pad(x, pad_width=((0, max_len - x.shape[0]), (0,0)), mode='constant')
        else:
            # Trim sequence to be within max_len timesteps
            x = x[:max_len, :]
        # Update processed sequences
        X_proc[idx, :, :] = x
    return X_proc

In [8]:
def standard_confusion_matrix(y_test, y_test_pred):
    """
    Make confusion matrix with format:
                  -----------
                  | TP | FP |
                  -----------
                  | FN | TN |
                  -----------
    Parameters
    ----------
    y_true : ndarray - 1D
    y_pred : ndarray - 1D

    Returns
    -------
    ndarray - 2D
    """
    cnf_matrix = confusion_matrix(y_test, y_test_pred)
    print("\nConfusion Matrix: (sklearn)\n")
    print(cnf_matrix)
    [[tn, fp], [fn, tp]] = cnf_matrix
    return np.array([[tp, fp], [fn, tn]])


def model_performance(model, X_train, X_test, y_train, y_test):
    """
    Evaluation metrics for network performance.
    """
    y_test_pred = np.argmax(model.predict(X_test), axis=-1)
    y_train_pred = np.argmax(model.predict(X_train), axis=-1)

    # Computing confusion matrix for test dataset
    conf_matrix = standard_confusion_matrix(y_test, y_test_pred)
    print("\nConfusion Matrix:\n")
    print(conf_matrix)
    
    target_name=['non-depressed','depressed']
    clf_report = classification_report(y_test, y_test_pred, target_names=target_name)
    print("\nClassification Report (sklearn):\n")
    print(clf_report)
    
    print("\nF1-Score: {}".format(sklearn.metrics.f1_score(y_test, y_test_pred)))
    return y_train_pred, y_test_pred, conf_matrix, clf_report

In [9]:
X_train_no_aug = retrieve_file_no_aug('train_samples.npz')
y_train_no_aug = retrieve_file_no_aug('train_labels.npz')

X_test = retrieve_file_no_aug('test_samples.npz')
y_test = retrieve_file_no_aug('test_labels.npz')

X_train3 = retrieve_file('train_samples_200.npz')
y_train3 = retrieve_file('train_labels_200.npz')

X_train=np.concatenate((X_train_no_aug,X_train3), axis=0)
y_train=np.concatenate((y_train_no_aug,y_train3), axis=0)

NB_CLASSES = 2
# Maximum time duration among training samples 
MAX_LEN = np.max([X_train[i].shape[0] for i in range(len(X_train))])
print("Maximum length of training X: {} (timesteps)".format(MAX_LEN))
# Number of mel bins in training samples
NUM_BINS = X_train[0].shape[1]
print("Number of mel bins: ", NUM_BINS)

# Preprocess input Xs
X_train = preprocess(X_train, max_len=MAX_LEN, num_bins=NUM_BINS)
X_test = preprocess(X_test, max_len=MAX_LEN, num_bins=NUM_BINS)
print(X_train.shape, X_test.shape)

# Convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)
print(Y_train.shape, Y_test.shape)

Maximum length of training X: 5499 (timesteps)
Number of mel bins:  128
(307, 5499, 128) (33, 5499, 128)
(307, 2) (33, 2)


In [11]:
from keras.optimizers import Adam
cnn_model=load_model('cnn_augm_run500_f10.666666666667.h5')
cnn_model.compile(loss='categorical_crossentropy',
                       optimizer=Adam(lr=0.000625),
                       metrics=['accuracy'])

y_train_pred, y_test_pred, conf_matrix, clf_report = model_performance(cnn_model, X_train, X_test, y_train, y_test)



Confusion Matrix: (sklearn)

[[19  2]
 [ 5  7]]

Confusion Matrix:

[[ 7  2]
 [ 5 19]]

Classification Report (sklearn):

               precision    recall  f1-score   support

non-depressed       0.79      0.90      0.84        21
    depressed       0.78      0.58      0.67        12

  avg / total       0.79      0.79      0.78        33


F1-Score: 0.666666666667


In [13]:
cnn_featurizer = load_model("cnn_featurizer_augm_run500_0.666666666667f1.h5")
cnn_featurizer.compile(loss='categorical_crossentropy',
                       optimizer=Adam(lr=0.000625),
                       metrics=['accuracy'])
#cnn_featurizer.summary()



In [14]:
train_audio_features = cnn_featurizer.predict(X_train)
print(train_audio_features.shape)
test_audio_features = cnn_featurizer.predict(X_test)
print(test_audio_features.shape)

(307, 64)
(33, 64)


In [15]:
np.savetxt(fname="train_audio_features_augm200.txt", X=train_audio_features)
np.savetxt(fname="test_audio_features_augm200.txt", X=test_audio_features)

In [17]:
np.loadtxt("train_audio_features_augm200.txt") == train_audio_features

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ..., 
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]], dtype=bool)