In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras import models
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten,Dense, Dropout
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from sklearn.utils.class_weight import compute_class_weight

Load Dataset:<br>
train_dataset=original training set with no augmentation<br>
back_dataset=training set with back-translation<br>
aug_dataset=training set with eda-paraphrasing augmentation

In [2]:
train_dataset=pd.read_csv(r'..\DAIC\Preprocessed\train_dataset.csv')
back_dataset=pd.read_csv(r'..\DAIC\Preprocessed\back_dataset.csv')
aug_dataset=pd.read_csv(r'..\DAIC\Preprocessed\aug_dataset.csv')
test_dataset=pd.read_csv(r'..\DAIC\Preprocessed\test_dataset.csv')
val_dataset=pd.read_csv(r'..\DAIC\Preprocessed\dev_dataset.csv')

Checking the no of datapoints and class balances.

In [None]:
print(f'The samples in training dataset is: ',(len(train_dataset['response'])),'and the distribution is ',(train_dataset['PHQ8_Binary'].value_counts()))
print(f'The samples in back dataset is: ',(len(back_dataset['response'])),'and the distribution is ',(back_dataset['PHQ8_Binary'].value_counts()))
print(f'The samples in aug dataset is: ',(len(aug_dataset['response'])),'and the distribution is ',(aug_dataset['PHQ8_Binary'].value_counts()))
print(f'The samples in validation dataset is: ',(len(val_dataset['response'])),'and the distribution is ',(val_dataset['PHQ8_Binary'].value_counts()))
print(f'The samples in test dataset is: ',(len(test_dataset['response'])),'and the distribution is ',(test_dataset['PHQ8_Binary'].value_counts()))

Let's start with the training dataset. First of all we will do the classification without under/oversampling, word2vec and glove. After which we will use sampling balancing. We will do the test in the validation set and subsequently test set. We will do the same for back and aug dataset. At last we, will also try incorporating val into training dataset, as we have a separate test dataset for testing.

In [3]:
X_train=train_dataset['response']
X_back=back_dataset['response']
X_aug=aug_dataset['response']
X_val=val_dataset['response']
X_test=test_dataset['response']

y_train=train_dataset['PHQ8_Binary']
y_back=back_dataset['PHQ8_Binary']
y_aug=aug_dataset['PHQ8_Binary']
y_val=val_dataset['PHQ8_Binary']
y_test=test_dataset['PHQ8_Binary']

<h2>Creating model</h2>

<h1>Word2Vec</h1>
<h3>Train_set</h3>

In [4]:
import gensim

word2vec_path='..\GoogleNews-vectors-negative300.bin\GoogleNews-vectors-negative300.bin'
word2vec=gensim.models.KeyedVectors.load_word2vec_format(word2vec_path,binary=True)

<h3>Train_Set</h3>

In [5]:
tokenizer=Tokenizer(num_words=6300)
tokenizer.fit_on_texts(train_dataset['response'])
X_train_seq=tokenizer.texts_to_sequences(train_dataset['response'])
X_val_seq=tokenizer.texts_to_sequences(X_val)
X_test_seq=tokenizer.texts_to_sequences(X_test)
length=len(max(X_train_seq,key=len))
X_train_pad=pad_sequences(X_train_seq,maxlen=length)
X_val_pad=pad_sequences(X_val_seq,maxlen=length)
X_test_pad=pad_sequences(X_test_seq,maxlen=length)

class_weights = compute_class_weight('balanced', classes=[0, 1], y=train_dataset['PHQ8_Binary'])
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

In [6]:
embedding_dim=300
word_index=tokenizer.word_index
embedding_matrix=np.zeros((len(word_index)+1,embedding_dim))

for word,i in word_index.items():
    if word in word2vec:
        embedding_matrix[i]=word2vec[word]

In [16]:
def train_model():
    inputs=tf.keras.Input(length,embedding_dim)
    x=Embedding(input_dim=len(word_index)+1,
                        output_dim=embedding_dim,
                        weights=[embedding_matrix],
                        input_length=length,
                        trainable=False)(inputs)
    x=Conv1D(filters=8,kernel_size=5,activation='relu',)(x)
    x=MaxPooling1D(pool_size=2)(x)
    x=Conv1D(filters=4,kernel_size=3,activation='relu')(x)
    x=Flatten()(x)
    x=Dropout(0.3)(x)
    outputs=Dense(1,activation='sigmoid')(x)

    model=models.Model(inputs,outputs)

    return model

In [17]:
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_train_pad,y_train,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32,class_weight=class_weight_dict)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [20]:
#Random Undersampling
rus=RandomUnderSampler(random_state=42)
X_train_pad_un,y_train_un=rus.fit_resample(X_train_pad,y_train)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.003)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_train_pad_un,y_train_un,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [22]:
#Random OverSampling
smote=SMOTE(random_state=42)
X_train_pad_smote,y_train_smote=smote.fit_resample(X_train_pad,y_train)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.003)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_train_pad_smote,y_train_smote,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

<h3>Back_set</h3>

In [24]:
tokenizer=Tokenizer(num_words=6300)
tokenizer.fit_on_texts(back_dataset['response'])
X_back_seq=tokenizer.texts_to_sequences(back_dataset['response'])
X_val_seq=tokenizer.texts_to_sequences(X_val)
X_test_seq=tokenizer.texts_to_sequences(X_test)
length=len(max(X_back_seq,key=len))
X_back_pad=pad_sequences(X_back_seq,maxlen=length)
X_val_pad=pad_sequences(X_val_seq,maxlen=length)
X_test_pad=pad_sequences(X_test_seq,maxlen=length)

class_weights = compute_class_weight('balanced', classes=[0, 1], y=back_dataset['PHQ8_Binary'])
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

In [25]:
embedding_dim=300
word_index=tokenizer.word_index
embedding_matrix=np.zeros((len(word_index)+1,embedding_dim))

for word,i in word_index.items():
    if word in word2vec:
        embedding_matrix[i]=word2vec[word]

In [26]:
def train_model():
    inputs=tf.keras.Input(length,embedding_dim)
    x=Embedding(input_dim=len(word_index)+1,
                        output_dim=embedding_dim,
                        weights=[embedding_matrix],
                        input_length=length,
                        trainable=False)(inputs)
    x=Conv1D(filters=8,kernel_size=5,activation='relu',)(x)
    x=MaxPooling1D(pool_size=2)(x)
    x=Conv1D(filters=4,kernel_size=3,activation='relu')(x)
    x=Flatten()(x)
    x=Dropout(0.3)(x)
    outputs=Dense(1,activation='sigmoid')(x)

    model=models.Model(inputs,outputs)

    return model

In [27]:
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_back_pad,y_back,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32,class_weight=class_weight_dict)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [29]:
#Random Undersampling
rus=RandomUnderSampler(random_state=42)
X_back_pad_un,y_back_un=rus.fit_resample(X_back_pad,y_back)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_back_pad_un,y_back_un,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [31]:
#Random OverSampling
smote=SMOTE(random_state=42)
X_back_pad_smote,y_back_smote=smote.fit_resample(X_back_pad,y_back)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.007)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_back_pad_smote,y_back_smote,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

<h3>Aug_set</h3>

In [33]:
tokenizer=Tokenizer(num_words=6300)
tokenizer.fit_on_texts(aug_dataset['response'])
X_aug_seq=tokenizer.texts_to_sequences(aug_dataset['response'])
X_val_seq=tokenizer.texts_to_sequences(X_val)
X_test_seq=tokenizer.texts_to_sequences(X_test)
length=len(max(X_aug_seq,key=len))
X_aug_pad=pad_sequences(X_aug_seq,maxlen=length)
X_val_pad=pad_sequences(X_val_seq,maxlen=length)
X_test_pad=pad_sequences(X_test_seq,maxlen=length)

class_weights = compute_class_weight('balanced', classes=[0, 1], y=aug_dataset['PHQ8_Binary'])
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

In [34]:
embedding_dim=300
word_index=tokenizer.word_index
embedding_matrix=np.zeros((len(word_index)+1,embedding_dim))

for word,i in word_index.items():
    if word in word2vec:
        embedding_matrix[i]=word2vec[word]

In [35]:
def train_model():
    inputs=tf.keras.Input(length,embedding_dim)
    x=Embedding(input_dim=len(word_index)+1,
                        output_dim=embedding_dim,
                        weights=[embedding_matrix],
                        input_length=length,
                        trainable=False)(inputs)
    x=Conv1D(filters=8,kernel_size=5,activation='relu',)(x)
    x=MaxPooling1D(pool_size=2)(x)
    x=Conv1D(filters=4,kernel_size=3,activation='relu')(x)
    x=Flatten()(x)
    x=Dropout(0.3)(x)
    outputs=Dense(1,activation='sigmoid')(x)

    model=models.Model(inputs,outputs)

    return model

In [36]:
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_aug_pad,y_aug,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32,class_weight=class_weight_dict)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [38]:
#Random Undersampling
rus=RandomUnderSampler(random_state=5)
X_aug_pad_un,y_aug_un=rus.fit_resample(X_aug_pad,y_aug)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_aug_pad_un,y_aug_un,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [40]:
#Random OverSampling
smote=SMOTE(random_state=9)
X_aug_pad_smote,y_aug_smote=smote.fit_resample(X_aug_pad,y_aug)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.007)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_aug_pad_smote,y_aug_smote,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

<h1>GlOvE</h1>
<h2>Creating Model</h2>

In [42]:
embedding_index={}
glove_path='../glove.6B.100d.txt'
with open(glove_path,'r',encoding='utf-8') as f:
    for line in f:
        values=line.split()
        word=values[0]
        coefs=np.asarray(values[1:],dtype='float32')
        embedding_index[word]=coefs

<h3>Train_Set</h3>

In [43]:
tokenizer=Tokenizer(num_words=6300)
tokenizer.fit_on_texts(train_dataset['response'])
X_train_seq=tokenizer.texts_to_sequences(train_dataset['response'])
X_val_seq=tokenizer.texts_to_sequences(X_val)
X_test_seq=tokenizer.texts_to_sequences(X_test)
length=len(max(X_train_seq,key=len))
X_train_pad=pad_sequences(X_train_seq,maxlen=length)
X_val_pad=pad_sequences(X_val_seq,maxlen=length)
X_test_pad=pad_sequences(X_test_seq,maxlen=length)

class_weights = compute_class_weight('balanced', classes=[0, 1], y=train_dataset['PHQ8_Binary'])
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

In [44]:
embedding_dim=100
word_index=tokenizer.word_index
embedding_matrix=np.zeros((len(word_index)+1,embedding_dim))

for word,i in word_index.items():
    embedding_vector=embedding_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i]=embedding_vector

In [45]:
def train_model():
    inputs=tf.keras.Input(length,embedding_dim)
    x=Embedding(input_dim=len(word_index)+1,
                        output_dim=embedding_dim,
                        weights=[embedding_matrix],
                        input_length=length,
                        trainable=False)(inputs)
    x=Conv1D(filters=8,kernel_size=5,activation='relu',)(x)
    x=MaxPooling1D(pool_size=2)(x)
    x=Conv1D(filters=4,kernel_size=3,activation='relu')(x)
    x=Flatten()(x)
    x=Dropout(0.3)(x)
    outputs=Dense(1,activation='sigmoid')(x)

    model=models.Model(inputs,outputs)

    return model

In [46]:
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.007)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_train_pad,y_train,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32,class_weight=class_weight_dict)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [48]:
#Random Undersampling
rus=RandomUnderSampler(random_state=42)
X_train_pad_un,y_train_un=rus.fit_resample(X_train_pad,y_train)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.003)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_train_pad_un,y_train_un,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [50]:
#Random OverSampling
smote=SMOTE(random_state=42)
X_train_pad_smote,y_train_smote=smote.fit_resample(X_train_pad,y_train)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.003)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_train_pad_smote,y_train_smote,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

<h3>Back_set</h3>

In [52]:
tokenizer=Tokenizer(num_words=6300)
tokenizer.fit_on_texts(back_dataset['response'])
X_back_seq=tokenizer.texts_to_sequences(back_dataset['response'])
X_val_seq=tokenizer.texts_to_sequences(X_val)
X_test_seq=tokenizer.texts_to_sequences(X_test)
length=len(max(X_back_seq,key=len))
X_back_pad=pad_sequences(X_back_seq,maxlen=length)
X_val_pad=pad_sequences(X_val_seq,maxlen=length)
X_test_pad=pad_sequences(X_test_seq,maxlen=length)

class_weights = compute_class_weight('balanced', classes=[0, 1], y=back_dataset['PHQ8_Binary'])
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

In [53]:
embedding_dim=100
word_index=tokenizer.word_index
embedding_matrix=np.zeros((len(word_index)+1,embedding_dim))

for word,i in word_index.items():
    embedding_vector=embedding_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i]=embedding_vector

In [54]:
def train_model():
    inputs=tf.keras.Input(length,embedding_dim)
    x=Embedding(input_dim=len(word_index)+1,
                        output_dim=embedding_dim,
                        weights=[embedding_matrix],
                        input_length=length,
                        trainable=False)(inputs)
    x=Conv1D(filters=8,kernel_size=5,activation='relu',)(x)
    x=MaxPooling1D(pool_size=2)(x)
    x=Conv1D(filters=4,kernel_size=3,activation='relu')(x)
    x=Flatten()(x)
    x=Dropout(0.3)(x)
    outputs=Dense(1,activation='sigmoid')(x)

    model=models.Model(inputs,outputs)

    return model

In [55]:
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_back_pad,y_back,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32,class_weight=class_weight_dict)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [57]:
#Random Undersampling
rus=RandomUnderSampler(random_state=42)
X_back_pad_un,y_back_un=rus.fit_resample(X_back_pad,y_back)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_back_pad_un,y_back_un,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [59]:
#Random OverSampling
smote=SMOTE(random_state=42)
X_back_pad_smote,y_back_smote=smote.fit_resample(X_back_pad,y_back)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.007)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_back_pad_smote,y_back_smote,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

<h3>Aug_set</h3>

In [61]:
tokenizer=Tokenizer(num_words=6300)
tokenizer.fit_on_texts(aug_dataset['response'])
X_aug_seq=tokenizer.texts_to_sequences(aug_dataset['response'])
X_val_seq=tokenizer.texts_to_sequences(X_val)
X_test_seq=tokenizer.texts_to_sequences(X_test)
length=len(max(X_aug_seq,key=len))
X_aug_pad=pad_sequences(X_aug_seq,maxlen=length)
X_val_pad=pad_sequences(X_val_seq,maxlen=length)
X_test_pad=pad_sequences(X_test_seq,maxlen=length)

class_weights = compute_class_weight('balanced', classes=[0, 1], y=aug_dataset['PHQ8_Binary'])
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

In [62]:
embedding_dim=100
word_index=tokenizer.word_index
embedding_matrix=np.zeros((len(word_index)+1,embedding_dim))

for word,i in word_index.items():
    embedding_vector=embedding_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i]=embedding_vector

In [63]:
def train_model():
    inputs=tf.keras.Input(length,embedding_dim)
    x=Embedding(input_dim=len(word_index)+1,
                        output_dim=embedding_dim,
                        weights=[embedding_matrix],
                        input_length=length,
                        trainable=False)(inputs)
    x=Conv1D(filters=8,kernel_size=5,activation='relu',)(x)
    x=MaxPooling1D(pool_size=2)(x)
    x=Conv1D(filters=4,kernel_size=3,activation='relu')(x)
    x=Flatten()(x)
    x=Dropout(0.3)(x)
    outputs=Dense(1,activation='sigmoid')(x)

    model=models.Model(inputs,outputs)

    return model

In [64]:
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_aug_pad,y_aug,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32,class_weight=class_weight_dict)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [66]:
#Random Undersampling
rus=RandomUnderSampler(random_state=5)
X_aug_pad_un,y_aug_un=rus.fit_resample(X_aug_pad,y_aug)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_aug_pad_un,y_aug_un,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))

In [68]:
#Random OverSampling
smote=SMOTE(random_state=9)
X_aug_pad_smote,y_aug_smote=smote.fit_resample(X_aug_pad,y_aug)
cnn=train_model()
optimizer=tf.keras.optimizers.Adam(learning_rate=0.007)
cnn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
cnn.fit(X_aug_pad_smote,y_aug_smote,epochs=15,validation_data=(X_val_pad,y_val),batch_size=32)

y_test_pred=cnn.predict(X_test_pad)
y_test_pred=(y_test_pred>=0.5).astype(int).reshape(-1)

print('Test Set Performance:')
print(classification_report(y_test,y_test_pred,target_names=['Controlled','Depression'],zero_division=0.0))