In [30]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [31]:
import gc
import nltk
import math
from smart_open import open
from nltk.corpus import stopwords
import sklearn
from sklearn import preprocessing
from sklearn.metrics import classification_report
from keras.layers import Dropout
from matplotlib import pyplot as plt
nltk.download('stopwords')
import numpy as np
import pandas as pd
from gensim.models.keyedvectors import KeyedVectors

import tensorflow as tf
from tensorflow import keras
from keras.layers import LSTM, Dense, Bidirectional
from keras import layers
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input
from keras.layers import concatenate


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [32]:
#Text + Audio 
class Highway(layers.Layer):

  def __init__(self):
    super(Highway, self).__init__()

  def build(self, input_shape):
    n_sentences = input_shape[1]
    n_features = input_shape[2]
    carry_bias = keras.initializers.Constant(value=-1.0)
    random_dist = keras.initializers.RandomNormal(mean=0.0, stddev=0.1, seed=42)

    carry_bias_2 = keras.initializers.Constant(value= 0.1)

    # Create weight matrices and bias vector. Transform Gate (transforms input feature) and bias.
    self.W_T = self.add_weight(shape=(n_features, n_features),initializer = random_dist,trainable=True)
    self.b_T = self.add_weight(shape=( n_sentences, n_features),initializer = carry_bias, trainable=True)
   
    #Update gate and bias
    self.W = self.add_weight(shape=( n_features, n_features),initializer = random_dist, trainable=True)
    self.b = self.add_weight(shape=( n_sentences, n_features),initializer = carry_bias_2, trainable=True)
   
  def call(self, inputs):
    x = inputs
    tensor_t = tf.sigmoid(tf.matmul(x, self.W_T) + self.b_T, name="transform_gate")
    tensor_h = tf.nn.relu(tf.matmul(x, self.W) + self.b, name="activation")
    #tensor C determines how much of the previous state should be carried over to the new state
    tensor_c = tf.subtract(1.0, tensor_t, name="carry_gate")
    
    return tf.add(tf.multiply(tensor_h, tensor_t), tf.multiply(x, tensor_c), "y")

class text_audio:
  #Define an input layer with 250 timesteps and 74 features
  input1 = Input(shape=(250,74), name = 'Audio_input')
  highway1 = Highway()(input1)
  #Applies another Highway layer to the output of the previous layer (highway1), 
  #in order to allow network to learn complex nature of the audio input data
  highway5 = Highway()(highway1)
  #Apply a third Highway layer
  highway6 = Highway()(highway5)
  #output layer with 74 units, same as number of input features
  dense_audio = Dense(74)(highway6)
  #print("shape of dense_audio ", dense_audio.shape)
  input3 = Input(shape = (250,5100), name = 'Text_input')
  # Dense layer with 1000 output units to the input layer
  dense4 = Dense(1000)(input3)
  dense5 = Dense(500)(dense4)
  #reduces the number of input features and extracts more important features
  dense6 = Dense(250)(dense5)
  #final Dense layer with 74 output units
  dense_text = Dense(74)(dense6)
  #print("shape of dense_text ", dense_text.shape)
  # merge input models
  merge_tensor = concatenate([dense_audio,dense_text], axis = 1)
  #128 is number of memory cells in the layer which is a hyperparameter
  #20% of the LSTM units will be randomly dropped out during training.
  #recurrent_dropout=0.2 (20% of the connections between memory cells in
  #the LSTM layer will be randomly dropped out during training)
  lstm = LSTM(128, dropout = 0.2, recurrent_dropout = 0.2)(merge_tensor)
  output = Dense(1, activation='sigmoid')(lstm)
  model = Model(inputs=[input1, input3], outputs=output)

  print(model.summary())
  
  optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

  def run_model(self):
    self.model.compile(optimizer=self.optimizer, loss='binary_crossentropy')

    return self.model

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Audio_input (InputLayer)       [(None, 250, 74)]    0           []                               
                                                                                                  
 Text_input (InputLayer)        [(None, 250, 5100)]  0           []                               
                                                                                                  
 highway_6 (Highway)            (None, 250, 74)      47952       ['Audio_input[0][0]']            
                                                                                                  
 dense_13 (Dense)               (None, 250, 1000)    5101000     ['Text_input[0][0]']             
                                                                                            

In [None]:
dev_location = "dev_data"
test_location = "test_data"
train_location = "train_data"

devData = np.array(pd.read_csv('/content/drive/MyDrive/diacwoz/dev_split_Depression_AVEC2017.csv',delimiter=',',encoding='utf-8'))[:, 0:2]
testData = np.array(pd.read_csv('/content/drive/MyDrive/diacwoz/full_test_split.csv',delimiter=',',encoding='utf-8'))[:, 0:2]
trainData = np.array(pd.read_csv('/content/drive/MyDrive/diacwoz/train_split_Depression_AVEC2017.csv',delimiter=',',encoding='utf-8'))[:, 0:2]

dataset = np.concatenate((devData, np.concatenate((testData, trainData))))   
max_num_words = 17
pretrained_model = KeyedVectors.load_word2vec_format('/content/drive/MyDrive/GoogleNews-vectors-negative300.bin', binary=True)
stop_words = set(stopwords.words('english'))

def getData(patientID, data_loc):
  retData = [int(patientID)]
  text_data = getTextData(patientID, data_loc)
  audio_data = audioData(patientID, data_loc, text_data)
  return text_data, audio_data

def getTextData(patientID, data_loc):
  fileName = "/content/drive/MyDrive/diacwoz/"+ str(data_loc) + "/" + str(int(patientID)) + "_TRANSCRIPT.csv"
  #print("filename is ", fileName)
  arr_file = np.array(pd.read_csv(fileName, delimiter='\t',encoding='utf-8', engine='python'))

  for i in range(len(arr_file)):
    if(arr_file[i][2] != 'Participant'):
      np.delete(arr_file, i)
      i-=1

  # Remove Speaker Columnn
  arr_file = np.delete(arr_file, 2, 1)
  
  # Convert text into word vectors. 300 is dimension of
  # the word vectors being used
  word_vecs = np.zeros((1, max_num_words*300))
  for i in range(len(arr_file)):
    sentence = arr_file[i][2]
    word_vec = wordToVec(sentence)
    word_vecs = np.concatenate((word_vecs, word_vec), axis = 0)
  word_vecs = np.delete(word_vecs, 0, 0)  

  # Delete Sentences and Replace With W2Vs
  arr_file = np.delete(arr_file, 2, 1)
  arr_file = np.concatenate((arr_file, word_vecs), axis = 1)
  return arr_file

def dataExistence(patientID, data_type):
  for id in data_type:
    if(patientID == id[0]):
      return True
  return False

#Remove stopwords (and, is, of etc.) which reduces text data dimensionality
def remove_StopWords(sentence):
    filtered_sentence = [] 
    for w in sentence: 
        if w not in stop_words: 
            filtered_sentence.append(w)
    return filtered_sentence

def wordToVec(sentence):
  global max_num_words, stop_words, pretrained_model
  sentence = str(sentence).split(" ")
  sentence = remove_StopWords(sentence)
  index_word = 0
  wordMatrix = np.zeros(max_num_words*300)
  for j in range(min(max_num_words, len(sentence))):
    try:
      word = sentence[j]
      if(word[0] == '<'):
        if(word.find('>')!=-1):
          word = word[1:-1]
        else:
          word = word[1:]
      else:
        if(word.find('>')!=-1):
          word = word[0:-1]
      ss = np.array(pretrained_model[word])
      wordMatrix[index_word*300:(index_word+1)*300] = ss
      index_word+=1
    except Exception as e:
      continue
  wordMatrix = np.array(wordMatrix.reshape(1,-1))
  return wordMatrix

def audioDataHelper(X):
    for i in range(X.shape[0]):
        if(X[i,1] == 0):
            X[i,0] = 0
            for j in range(7):
                X[i,j+1] = 0
    X = np.array(X)
    return X
    
def audioData(patientID, location, textD):
  fileName = "/content/drive/MyDrive/diacwoz/"+ str(location) + "/" + str(int(patientID)) + "_COVAREP.csv"
  data = pd.read_csv(fileName,header = None)
  data = data.iloc[:,:].values
  data = audioDataHelper(data)
  # print("Audio Raw Data:" + str(data.shape))
  sentenceDatas = []
  for sentence in textD:
    sentenceStartime = sentence[0]
    sentenceEndTime = sentence[1]
    startIndex = math.floor(sentenceStartime/0.01)
    endIndex = math.ceil(sentenceEndTime/0.01)
    sentenceData = data[startIndex: endIndex]
    sentenceData = np.average(sentenceData, axis = 0)
    #reshape to 1D Array with one row and variable number of columns
    sentenceData = np.array(sentenceData.reshape(1, -1))
    sentenceDatas.append(sentenceData)
  
  sentenceDatas = np.array(sentenceDatas)
  sentenceDatas = np.reshape(sentenceDatas, (textD.shape[0],-1))
  return sentenceDatas

y_train = []
y_test = []
audio_train = []
text_train = []
audio_test = []
text_test = []

for datapoint in dataset:
  if(dataExistence(datapoint[0], devData)):
    text,audio = getData(datapoint[0], dev_location)
    audio_train.append(audio)
    text_train.append(text)
    y_train.append(datapoint[1])
  elif(dataExistence(datapoint[0], testData)):
    # Data Point in Test Set
    text,audio = getData(datapoint[0], test_location)
    audio_test.append(audio)
    text_test.append(text)
    y_test.append(datapoint[1])
  elif(dataExistence(datapoint[0], trainData)):
    text,audio = getData(datapoint[0], train_location)
    audio_train.append(audio)
    text_train.append(text)
    y_train.append(datapoint[1])

def refactor(arr, size):
  arrsize = arr.shape[0]
  temp = np.zeros((size, arr.shape[1]))
  for i in range(min(len(arr), size)):
    temp[i] = arr[i]
  return temp

numberOfSentences = 250

devData = []
trainData = []
testData = []
gc.collect()

for i in range(len(audio_train)):
  audio_train[i] = refactor(audio_train[i], numberOfSentences)
  text_train[i] = refactor(text_train[i], numberOfSentences)

for i in range(len(audio_test)):
  audio_test[i] = refactor(audio_train[i], numberOfSentences)
  text_test[i] = refactor(text_train[i], numberOfSentences)

audio_test = np.array(audio_test)
text_test = np.array(text_test)
text_test = text_test[:,:,2:]

audio_train = np.array(audio_train)
text_train = np.array(text_train)
text_train = text_train[:,:,2:]

dataset = []
gc.collect()

print(audio_train.shape,text_train.shape)

y_train = np.array(y_train)
y_test = np.array(y_test)
audio_train = np.nan_to_num(audio_train)
text_train = np.nan_to_num(text_train)
#print(audio_train.shape)
#print(text_train.shape)
#print(y_train.shape)

for i in range(audio_train.shape[0]):
  #normalize to avoid features with large values dominate
  audio_train[i] = sklearn.preprocessing.normalize(audio_train[i])
  text_train[i] = sklearn.preprocessing.normalize(text_train[i])

audio_test = np.nan_to_num(audio_test)
text_test = np.nan_to_num(text_test)

for i in range(audio_test.shape[0]):
  audio_test[i] = sklearn.preprocessing.normalize(audio_test[i])
  text_test[i] = sklearn.preprocessing.normalize(text_test[i])


In [35]:
print("With Gating sentence level")
text_audio_model = text_audio()
result_model = text_audio_model.run_model()

result_model.fit([audio_train,text_train], y_train, validation_split = 0.2, epochs=25, batch_size = 125)

pred = result_model.predict([audio_test,text_test])
pred2 = result_model.predict([audio_train,text_train])
#print("test prediction " ,pred)
#print("\ntrain prediction ", pred2)
y_pred_train = (pred2 >= 0.5).astype(int)
y_pred = (pred >= 0.5).astype(int)
print("Training score: ", classification_report(y_train, y_pred_train))
print("Validation score: ", classification_report(y_test, y_pred))


With Gating sentence level
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Training score:                precision    recall  f1-score   support

         0.0       0.82      0.99      0.90       100
         1.0       0.95      0.48      0.63        42

    accuracy                           0.84       142
   macro avg       0.89      0.73      0.77       142
weighted avg       0.86      0.84      0.82       142

Validation score:                precision    recall  f1-score   support

         0.0       0.75      0.82      0.78        33
         1.0       0.45      0.36      0.40        14

    accuracy                           0.68        47
   macro avg       0.60      0.59      0.59        47
weighted avg       0.66      0.68      0.67    