In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import concurrent.futures
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
from tensorflow import keras
from keras.utils import np_utils
from keras import Sequential
from keras import regularizers
from keras.layers import Conv2D,Dense,MaxPooling2D,Flatten,Dropout,BatchNormalization
from keras.callbacks import Callback,EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    classification_report,
    confusion_matrix
)
import glob
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.base import BaseEstimator, TransformerMixin
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.ensemble import VotingClassifier
from sklearn import model_selection

In [None]:
def combine_data(path):
    dirs = os.listdir(path)
    combine_df = []

    for filepath in dirs:
        source = filepath.split('.')[0]
        if filepath.endswith('.csv'):
            X = pd.read_csv(path + filepath, parse_dates=['timestamp'], index_col='timestamp')
            X['source'] = source
            combine_df.append(X)

    return combine_df

In [None]:
combine_df = combine_data('/content/drive/MyDrive/Motion_Activity/data/condition/')

In [None]:
conditions = []
for condition in combine_df:
    condition_df = pd.DataFrame(columns=['mean_activity', 'std_activity', 'zero_activity_proportion', 'source'])
    condition_df['mean_activity'] = condition.activity.resample('H').mean()
    condition_df['std_activity'] = condition.activity.resample('H').std()
    condition_df['zero_activity_proportion'] = [data[1].tolist().count(0) for data in condition.activity.resample('H')]
    condition_df['source'] = condition.source
    conditions.append(condition_df)

In [None]:
combine_df = combine_data('/content/drive/MyDrive/Motion_Activity/data/control/')

In [None]:
controls = []
for control in combine_df:
    control_df = pd.DataFrame(columns=['mean_activity', 'std_activity', 'zero_activity_proportion', 'source'])
    control_df['mean_activity'] = control.activity.resample('H').mean()
    control_df['std_activity'] = control.activity.resample('H').std()
    control_df['zero_activity_proportion'] = [data[1].tolist().count(0) for data in control.activity.resample('H')]
    control_df['source'] = control.source
    controls.append(control_df)

In [None]:
def nextday(dates):
    for date in dates:
        yield date
def zero_count(series):
    return list(series).count(0)
def extractfeatures(X, date):
    mask = X['date'] == date
    d = {
        'mean_log_activity': X[mask]['log_activity'].mean(),
        'std_log_activity': X[mask]['log_activity'].std(),
        'min_log_activity': X[mask]['log_activity'].min(),
        'max_log_activity': X[mask]['log_activity'].max(),
        'zero_proportion_activity': zero_count(X[mask]['log_activity'])
    }
    return d

In [None]:
class ExtractData(BaseEstimator, TransformerMixin):

    def __init__(self, path):
        self.path = path
        self.X = []

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        dirs = os.listdir(self.path)

        for filepath in sorted(dirs, key=lambda x: x.split('_')[0]):
            condition = filepath.split('.')[0]
            if filepath.endswith('.csv'):
                X = pd.read_csv(self.path + filepath)
                X['log_activity'] = np.log(X['activity'] + 1)
                dates = X.date.unique()

                for date in nextday(dates):
                    d = extractfeatures(X, date)
                    d['source'] = condition
                    self.X.append(d)


        return pd.DataFrame(self.X)

In [None]:
e = ExtractData(path='/content/drive/MyDrive/Motion_Activity/data/condition/')
conditions = e.fit_transform(X=None, y=None)
conditions['state'] = 1

In [None]:
e = ExtractData(path='/content/drive/MyDrive/Motion_Activity/data/control/')
controls = e.fit_transform(X=None, y=None)
controls['state'] = 0

In [None]:
full_df = controls.append(conditions, ignore_index=True)
full_df.head()

Unnamed: 0,mean_log_activity,std_log_activity,min_log_activity,max_log_activity,zero_proportion_activity,source,state
0,5.232499,1.494106,0.0,7.976595,24,control_28,0
1,4.103117,2.658716,0.0,8.657129,359,control_28,0
2,4.057168,2.61779,0.0,8.007367,366,control_28,0
3,4.09446,2.587575,0.0,7.85205,350,control_28,0
4,4.118694,2.542314,0.0,8.100768,344,control_28,0


In [None]:
def custom_train_test_split(train_set, test_set):
    X_train = train_set.drop('label', axis=1)
    y_train = train_set.label
    X_test = test_set.drop('label', axis=1)
    y_test = test_set.label

    return X_train, X_test, y_train, y_test

In [None]:
class CustomClassifierCV(BaseEstimator, TransformerMixin):

    def __init__(self, base_clf):
        self.base_clf = base_clf

    def fit(self, X, y=None):
        X['label'] = y
        participants = X.source.unique()
        folds = []

        predictions = [] # predicted labels
        actuals = [] # actual labels

        for p in participants:
            folds.append(X[X['source'] == p])

        for i in range(len(folds)):
            test_set = folds[i]
            train_fold = [elem for idx , elem in enumerate(folds) if idx != i]

            train_set = pd.concat(train_fold)
            X_train, X_test, y_train, y_test = custom_train_test_split(train_set.drop(['source'], axis=1),test_set.drop(['source'], axis=1))

            self.base_clf.fit(X_train, y_train)
            predictions.append(self.predict(X_test))
            actuals.append(test_set.label.iloc[0])

            self.score(predictions, actuals)

    def predict(self, X):
        predictions = self.base_clf.predict(X)
        ones = predictions.tolist().count(1)
        zeroes = predictions.tolist().count(0)

        return 1 if ones > zeroes else 0

    def score(self, predictions, actuals):
        print(classification_report(predictions, actuals))

In [None]:
X = full_df.drop(['state'], axis=1)
y = full_df.state
forest = RandomForestClassifier(n_estimators=100)
custom_clfCV = CustomClassifierCV(forest)
custom_clfCV.fit(X, y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg     

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
custom_clfCV = CustomClassifierCV(knn)
custom_clfCV.fit(X, y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg     

In [None]:
xgb_classifier = xgboost.XGBClassifier()
custom_clfCV = CustomClassifierCV(xgb_classifier)
custom_clfCV.fit(X, y)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg     

In [None]:
X['label'] = y
participants = X.source.unique()
folds = []

predictions = [] # predicted labels
actuals = [] # actual labels

for p in participants:
    folds.append(X[X['source'] == p])

for i in range(len(folds)):
    test_set = folds[i]

train_fold = [elem for idx , elem in enumerate(folds) if idx != i]
train_set = pd.concat(train_fold)

X_train, X_test, y_train, y_test = custom_train_test_split(train_set.drop(['source'], axis=1),test_set.drop(['source'], axis=1))

estimators = []
model1 = forest; estimators.append(("forest", model1))
model2 = knn; estimators.append(("knn", model2))
model3 = xgb_classifier; estimators.append(("xgb_classifier", model3))

ensemble = VotingClassifier(estimators)
results = model_selection.cross_val_score(ensemble, X_train, y_train)
ml_model=results.mean()
print(ml_model)

0.6773647984267452


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_train = y_train.astype(int)
y_test = y_test.astype(int)

X_train = np.reshape(X_train.drop('source', axis=1).values, (X_train.shape[0], 1, X_train.shape[1]-1))
X_test = np.reshape(X_test.drop('source', axis=1).values, (X_test.shape[0], 1, X_test.shape[1]-1))

X_train = X_train.astype(float)
X_test = X_test.astype(float)

model = Sequential()
model.add(LSTM(64, input_shape=(1, X_train.shape[2])))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train,y_train,validation_data=(X_test,y_test),batch_size=32,epochs=15)

model1 = model.predict(X_test)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
import IPython.display as ipd

audio_fpath = '/content/drive/MyDrive/audio_files/'
audio_clips = os.listdir(audio_fpath)
print("No. of .wav files in audio folder = ",len(audio_clips))

No. of .wav files in audio folder =  188


In [None]:
# def load_audio_files(directory_path, sr=11025):
#     audio_data_list = []
#     sampling_rate_list = []

#     audio_files = [file for file in os.listdir(directory_path) if file.endswith(".wav")]

#     for audio_file in audio_files:
#         audio_fpath = os.path.join(directory_path, audio_file)
#         audio_data, sr = librosa.load(audio_fpath, sr=sr)
#         audio_data_list.append(audio_data)
#         sampling_rate_list.append(sr)

#     return audio_data_list, sampling_rate_list

# if __name__ == "__main__":
#     audio_directory = '/content/drive/MyDrive/audio_files/'
#     sample_rate = 11025

#     audio_data_list, sampling_rate_list = load_audio_files(audio_directory, sr=sample_rate)

#     for i, (audio_data, sr) in enumerate(zip(audio_data_list, sampling_rate_list)):
#         print(f"Audio File {i + 1}:")
#         print(f"Type of audio_data: {type(audio_data)}")
#         print(f"Shape of audio_data: {audio_data.shape}")
#         print(f"Sampling Rate (sr): {sr}\n")


In [None]:
def plot_spectrogram_for_audio(audio_fpath, output_dir, sr=11025):
    x, sr = librosa.load(audio_fpath, sr=sr)

    X = librosa.stft(x)
    Xdb = librosa.amplitude_to_db(abs(X))

    filename = os.path.splitext(os.path.basename(audio_fpath))[0]
    output_file_path = os.path.join(output_dir, f'{filename}_spectrogram.png')

    plt.figure(figsize=(14, 5))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()
    plt.savefig(output_file_path)
    plt.close()

if __name__ == "__main__":
    audio_directory = '/content/drive/MyDrive/audio_files/'
    output_directory = '/content/drive/MyDrive/spectrogram_output/'
    sample_rate = 11025

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    audio_files = [file for file in os.listdir(audio_directory) if file.endswith(".wav")]

    for audio_file in audio_files:
        audio_fpath = os.path.join(audio_directory, audio_file)
        print(f"Plotting and saving spectrogram for: {audio_fpath}")
        plot_spectrogram_for_audio(audio_fpath, output_directory, sr=sample_rate)

In [None]:
train_csv1=pd.read_csv('/content/drive/MyDrive/train_split.csv').set_index('Participant_ID')
test_csv=pd.read_csv('/content/drive/MyDrive/full_test_split.csv').set_index('Participant_ID')
dev_csv=pd.read_csv('/content/drive/MyDrive/dev_split_Depression_AVEC2017.csv').set_index('Participant_ID')

train_csv=pd.concat([train_csv1,dev_csv])

In [None]:
total=train_csv.shape[0]
input_path="/content/drive/MyDrive/spectrogram_output/"
X_train = np.empty(shape=(total, 500, 1400, 3))
y_train = np.empty(shape=(total),dtype='int')
idx=0

for dir in os.listdir(input_path):
  for i in train_csv.index:
    if int(dir[:3]) == int(i):
      X_train[idx] = np.expand_dims(cv2.imread(input_path + dir, 0), axis=2)
      y_train[idx] = int(train_csv['PHQ8_Binary'][i])
      idx += 1



total=test_csv.shape[0]
X_test = np.empty(shape=(total, 500, 1400, 3))
y_test = np.empty(shape=(total),dtype='int')
idx=0

for dir in os.listdir(input_path):
  for i in test_csv.index:
    if int(dir[:3]) == int(i):
      X_test[idx] = np.expand_dims(cv2.imread(input_path + dir, 0), axis=2)
      y_test[idx] = int(test_csv['PHQ_Binary'][i])
      idx += 1

In [None]:
model = Sequential()
model.add(Conv2D(32, (5,5), activation='relu', input_shape=(500, 1400, 3)))
model.add(MaxPooling2D((4, 4), strides=4))
model.add(Conv2D(32, (3, 3), activation='relu',kernel_regularizer=regularizers.l2(0.01)))
model.add(MaxPooling2D((1, 3), strides=(1,3)))
model.add(Flatten())
model.add(Dense(128, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
model.add(Dropout(0.6))
model.add(Dense(256, activation='relu',kernel_regularizer=regularizers.l2(0.0001)))
model.add(Dropout(0.8))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam',
            loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
X_train=X_train/255
X_test=X_test/255

In [None]:
callbacks=EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=12,
    verbose=1,
    baseline=None,
    restore_best_weights=True,
)

In [None]:
# datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
# train_generator = datagen.flow_from_directory(
#         X_train,
#         batch_size=10,
#         target_size=(512,512),
#         class_mode='binary')
# val_generator = datagen.flow_from_directory(
#         X_test,
#         color_mode='rgb',
#         target_size=(512,512),
#         class_mode='binary')

In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),batch_size=10,epochs=15,callbacks=callbacks)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15

In [None]:
model2=model.predict(X_test)

In [None]:
input_path="/content/drive/MyDrive/CK+48/"
total=0
for dir_ in os.listdir(input_path):
    count=0
    for f in os.listdir(input_path+dir_+"/"):
        count+=1
    total+=count

total

In [None]:
TOP_EMOTIONS = ["happy", "surprise", "anger", "sadness", "fear"]
dep_emotion_score=[-0.1,-0.45,0.5,0.2,0.1]

In [None]:
img_arr = np.empty(shape=(total, 48, 48, 1))
img_label=np.empty(shape=(total))
idx=0
label_to_text={}
label=0
for dir_ in os.listdir(input_path):
    if dir_ in TOP_EMOTIONS:
      for f in os.listdir(input_path + dir_ + "/"):
            img_arr[idx] = np.expand_dims(cv2.imread(input_path + dir_ + "/" + f, 0), axis=2)
            img_label[idx] = label
            idx += 1
      label_to_text[label] = dir_
      label += 1
img_label = np_utils.to_categorical(img_label)

In [None]:
X_train,X_test,y_train,y_test=train_test_split(img_arr,img_label,stratify=img_label,shuffle=True)

In [None]:
X_train=X_train/255
X_test=X_test/255

In [None]:
model=Sequential()
model.add(Conv2D(64,kernel_size=(3,3),activation='elu',padding='same',input_shape=(48,48,1)))
model.add(BatchNormalization())
model.add(Conv2D(64,kernel_size=(3,3),activation='elu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2),strides=2))
model.add(Conv2D(128,kernel_size=(3,3),activation='elu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128,kernel_size=(3,3),activation='elu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2),strides=2))
model.add(Conv2D(256,kernel_size=(3,3),activation='elu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(256,kernel_size=(3,3),activation='elu',padding='same'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(128,activation='elu'))
model.add(Dense(len(label_to_text),activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
callbacks=EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=12,
    verbose=1,
    baseline=None,
    restore_best_weights=True,
)

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True,
)
train_datagen.fit(X_train)

In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),batch_size=32,epochs=20,callbacks=callbacks)

In [None]:
def predict(arr):
  yhat_test=0
  l=[]
  for i in model.predict(arr):
    for j in range(5):
      yhat_test+=i[j]*dep_emotion_score[j]
      if yhat_test>0.5:
        l.append(1)
      else:
        l.append(0)
  return l

model3=model.predict(X_test)

In [None]:
estimators = []
model4 = model1; estimators.append(("forest", model4))
model5 = model2; estimators.append(("knn", model5))
model6 = model3; estimators.append(("xgb_classifier", model6))

ensemble = VotingClassifier(estimators)
results = model_selection.cross_val_score(ensemble, X_train, y_train)
dl_model=results.mean()
print(dl_model)

In [None]:
# model3= np.round(model3).astype(int)
# model2= np.round(model2).astype(int)
# model1= np.round(model1).astype(int)
# # ml_model=np.round(ml_model).astype(int)
# model1 = model2.reshape((-1,))  # Reshape to a 1D array
# model2 = model2.reshape((-1,))
# model3 = model3.reshape((-1,))

# ensemble_predictions = np.round( model1 + model2 + model3) / 3

# # Make final prediction based on majority voting
# final_predictions = np.where(ensemble_predictions >= 0.5, 1, 0)

# # Calculate ensemble accuracy
# ensemble_accuracy = accuracy_score(y_test, final_predictions)
# print("Ensemble Accuracy:", ensemble_accuracy)