<a href="https://colab.research.google.com/github/VarunB1234/Depression-Detection/blob/main/CNN_Audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
from google.colab import drive
drive.mount('/content/drive')

import numpy as np
import pandas as pd
import gc
import sklearn
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# -------------------------------
# Data Preprocessing Functions
# -------------------------------

def makedata(X):
    for i in range(X.shape[0]):
        if X[i, 1] == 0:
            X[i, 0] = 0
            for j in range(7):
                X[i, j + 1] = 0
    return X

def upsample(X_train, Y_train):
    X_train_0 = X_train[Y_train == 0]
    X_train_1 = X_train[Y_train == 1]
    Y_train_1 = Y_train[Y_train == 1]

    size = X_train_0.shape[0] - X_train_1.shape[0]
    X_train = list(X_train)
    Y_train = list(Y_train)

    while size > 0:
        size -= 1
        index = np.random.randint(0, X_train_1.shape[0])
        leave_index = np.random.randint(0, len(X_train))
        X_add = X_train_1[index]
        X_leave = X_train[leave_index]

        Y_add = Y_train_1[index]
        Y_leave = Y_train[leave_index]

        X_train[leave_index] = X_add
        X_train.append(X_leave)

        Y_train[leave_index] = Y_add
        Y_train.append(Y_leave)

    return np.array(X_train), np.array(Y_train)

# -------------------------------
# Load train/dev data
# -------------------------------

size = 40000

train = np.array(pd.read_csv('/content/drive/My Drive/MCA Dataset/train_split_Depression_AVEC2017.csv', delimiter=',', encoding='utf-8'))[:, 0:2]
dev = np.array(pd.read_csv('/content/drive/My Drive/MCA Dataset/dev_split_Depression_AVEC2017.csv', delimiter=',', encoding='utf-8'))[:, 0:2]

X_train, Y_train = [], []

# Load train
for i in range(len(train)):
    data = pd.read_csv(f'/content/drive/My Drive/MCA Dataset/train_data/{int(train[i][0])}_COVAREP.csv', header=None)
    X_temp = data.iloc[:, :].values
    X_temp = makedata(X_temp)
    X_temp = X_temp[X_temp.shape[0]-size:]
    X_train.append(X_temp)
    Y_train.append(train[i][1])

train = []
gc.collect()

# Load dev
for i in range(len(dev)):
    data = pd.read_csv(f'/content/drive/My Drive/MCA Dataset/dev_data/{int(dev[i][0])}_COVAREP.csv', header=None)
    X_temp = data.iloc[:, :].values
    X_temp = makedata(X_temp)
    X_temp = X_temp[X_temp.shape[0]-size:]
    X_train.append(X_temp)
    Y_train.append(dev[i][1])

dev = []
gc.collect()

# Stack to proper shape
X_train = np.stack(X_train, axis=0)
Y_train = np.array(Y_train)

# -------------------------------
# Upsample
# -------------------------------

X_upsample, Y_upsample = upsample(X_train, Y_train)

# -------------------------------
# CNN Model
# -------------------------------

class CNN_audio:
    def __init__(self):
        model = Sequential()
        model.add(Conv1D(60, 10, activation='relu', input_shape=(40000, 74)))
        model.add(MaxPooling1D(pool_size=3))
        model.add(Conv1D(30, 5, activation='relu'))
        model.add(MaxPooling1D(pool_size=3))
        model.add(Conv1D(15, 5, activation='relu'))
        model.add(MaxPooling1D(pool_size=3))
        model.add(Flatten())
        model.add(Dropout(0.5))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        self.classifier = model

    def modelFit(self, X, Y, epoch=7):
        self.classifier.fit(X, Y, epochs=epoch, validation_data=(X_val, Y_val))

    def modelPredict(self, X):
        return self.classifier.predict(X)

# -------------------------------
# Manual validation split
# -------------------------------

X_train_split, X_val, Y_train_split, Y_val = train_test_split(
    X_upsample, Y_upsample, test_size=0.1, random_state=42, stratify=Y_upsample
)

# -------------------------------
# Train the Model
# -------------------------------

model = CNN_audio()
model.modelFit(X_train_split, Y_train_split, epoch=7)

# -------------------------------
# Classification Report
# -------------------------------

Y_probs = model.modelPredict(X_val)
Y_pred = (Y_probs > 0.5).astype(int).flatten()

print("Classification Report:\n")
print(classification_report(Y_val, Y_pred))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3s/step - accuracy: 0.5334 - loss: 39.9625 - val_accuracy: 0.5000 - val_loss: 3.2751
Epoch 2/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 3s/step - accuracy: 0.5594 - loss: 2.7828 - val_accuracy: 0.5000 - val_loss: 2.9515
Epoch 3/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 3s/step - accuracy: 0.5244 - loss: 1.9478 - val_accuracy: 0.5000 - val_loss: 1.3192
Epoch 4/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 3s/step - accuracy: 0.6743 - loss: 0.7376 - val_accuracy: 0.8500 - val_loss: 0.5006
Epoch 5/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 3s/step - accuracy: 0.8520 - loss: 0.3512 - val_accuracy: 0.7500 - val_loss: 0.5858
Epoch 6/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 3s/step - accuracy: 0.8996 - loss: 0.2642 - val_accuracy: 0.7500 - val_loss: 0.4472
Epoch 7/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m