Step 1 Data Preprocessing:

In [266]:
import pandas as pd

# Load the dataset from a CSV file
data = pd.read_csv('trainmodelfeature.csv')
# Check the structure of the dataset
# print(data.head())

# Separate features (X) and labels (y)
X = data[['MFCC Mean', 'MFCC VAR', 'F0 mean']]
y = data['Depressed']

# Check the first few rows of features and labels
# print("Features (X):")
# print(X.head())

# print("Labels (y):")
# print(y.head())

Step 2 Data Splitting:

In [267]:
from sklearn.model_selection import train_test_split

# Split the dataset into training (70%), validation (15%), and test (15%) sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=.5, random_state=42)

# Check the sizes of the splits
print("Training set size:", len(X_train))
# print(X_train)
print("Validation set size:", len(X_val))
# print(X_val)
print("Test set size:", len(X_test))
# print(X_test)

Training set size: 25
Validation set size: 8
Test set size: 9


step 3 Feature Scaling

In [268]:
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler to the training data and transform it
X_train_scaled = scaler.fit_transform(X_train)

# Transform the validation and test sets using the same scaler
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Check the scaled data
print("Scaled Training Data:")
print(X_train_scaled[:5])  # Display the first 5 rows
# print(X_test_scaled[:5])
# print(X_val_scaled)

# Note: You don't need to scale the target labels (y) in a classification problem.


Scaled Training Data:
[[-0.05241798 -0.03978329  0.66430586]
 [-0.97069874 -0.97036359 -0.54157756]
 [-1.19996659 -1.21719515 -1.21110021]
 [ 0.98352651  0.96258016  0.53925594]
 [ 1.11186427  1.13386696 -2.05508721]]


Step 4 data sequencing

In [269]:
import numpy as np

def create_sequences(data, sequence_length):
    sequences = []
    labels = []

    for i in range(len(data) - sequence_length):
        sequence = data[i:i+sequence_length]
        label = data[i+sequence_length]
        sequences.append(sequence)
        labels.append(label)

    return np.array(sequences), np.array(labels)

# Set the sequence length (number of time steps in each sequence)
sequence_length = 2

# Create sequences for training, validation, and test sets
X_train_sequences, y_train_sequences = create_sequences(X_train_scaled, sequence_length)
X_val_sequences, y_val_sequences = create_sequences(X_val_scaled, sequence_length)
X_test_sequences, y_test_sequences = create_sequences(X_test_scaled, sequence_length)

# Check the shape of the created sequences
print("Shape of Training Sequences:", X_train_sequences.shape)
print("Shape of Validation Sequences:", X_val_sequences.shape)
print("Shape of Test Sequences:", X_test_sequences.shape)
print("Shape of y val Sequences:", y_val_sequences.shape)


Shape of Training Sequences: (23, 2, 3)
Shape of Validation Sequences: (6, 2, 3)
Shape of Test Sequences: (7, 2, 3)
Shape of y val Sequences: (6, 3)


Step 5 Model Architecture:

In [270]:
import tensorflow as tf
from tensorflow import keras
from keras import layers, Sequential,models
from keras.layers import LSTM, Dense 
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import SimpleRNN, Dense
# Define the RNN model
model = Sequential()
num_features= 3

# Add the first LSTM layer with return_sequences=True to output sequences for the subsequent layers
model.add(LSTM(64, return_sequences=True, input_shape=(sequence_length, num_features)))

# Add the second LSTM layer
model.add(LSTM(64, return_sequences=True))

# Add the third LSTM layer
model.add(LSTM(64, return_sequences=True))

# Add the fourth LSTM layer
model.add(LSTM(64))

# Add the first fully connected (FC) layer
model.add(Dense(32, activation='relu'))

# Add the second FC layer with a single output unit for binary classification
model.add(Dense(3, activation='softmax')) # For multi-class classification with 3 classes

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print a summary of the model architecture
model.summary()


Model: "sequential_471"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_182 (LSTM)             (None, 2, 64)             17408     
                                                                 
 lstm_183 (LSTM)             (None, 2, 64)             33024     
                                                                 
 lstm_184 (LSTM)             (None, 2, 64)             33024     
                                                                 
 lstm_185 (LSTM)             (None, 64)                33024     
                                                                 
 dense_1365 (Dense)          (None, 32)                2080      
                                                                 
 dense_1366 (Dense)          (None, 3)                 99        
                                                                 
Total params: 118659 (463.51 KB)
Trainable params: 1

step 6 Compile the Model:

In [275]:
# Compile the model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
batch_size = 32
epochs = 10
history = model.fit(X_train_sequences, y_train_sequences, batch_size=32, epochs=25, validation_data=(X_val_sequences, y_val_sequences))
# model.fit(X_train_sequences, y_train_sequences,epochs=10, validation_data=(X_val_sequences, y_val_sequences))

AttributeError: 'KerasClassifier' object has no attribute 'compile'

Step 7 Training the Model:

In [272]:
# Evaluate the model on the test sequences
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

def decode_label(label):
    return np.argmax(label)

def encode_label(label):
    one_hot_encoded = np.zeros(len(label))
    one_hot_encoded[label] = 1
    return one_hot_encoded

test_loss, test_accuracy = model.evaluate(X_test_sequences, y_test_sequences, verbose=0)

# Print the test results
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

# Make predictions on the test set
y_pred = model.predict(X_test_sequences)

# Convert predictions from one-hot encoding to integers
y_pred_decoded = np.array([decode_label(label) for label in y_pred])

# Convert the ground truth labels from one-hot encoding to integers
y_test_sequences_decoded = np.array([decode_label(label) for label in y_test_sequences])

threshold = 0.5
y_pred_binary = (y_pred_decoded > threshold).astype(int)

accuracy = accuracy_score(y_test_sequences_decoded, y_pred_binary)
precision = precision_score(y_test_sequences_decoded, y_pred_binary, average='weighted') # Changed the average setting here
recall = recall_score(y_test_sequences_decoded, y_pred_binary, average='weighted') # Changed the average setting here
f1 = f1_score(y_test_sequences_decoded, y_pred_binary, average='weighted') # Changed the average setting here

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Test Loss: 0.6429865956306458
Test Accuracy: 0.4285714328289032
Accuracy: 0.2857142857142857
Precision: 0.08163265306122448
Recall: 0.2857142857142857
F1 Score: 0.126984126984127


  _warn_prf(average, modifier, msg_start, len(result))


Step 8 Evaluate the Model:

In [273]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the RNN model
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(sequence_length, num_features)))
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(64))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Define model checkpoint to save the best model during training
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, save_weights_only=True, monitor='val_loss', mode='min')

# Train the model
history = model.fit(X_train_sequences, y_train_sequences, epochs=10, batch_size=32, validation_data=(X_val_sequences, y_val_sequences), callbacks=[early_stopping, model_checkpoint])

# Save the trained model
model.save('final_model.h5')

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test_sequences, y_test_sequences)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


Test Loss: 0.6740748286247253, Test Accuracy: 0.2857142984867096


Step 9 Hyperparameter Tuning:



In [274]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier

# Generate synthetic data for classification
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a function to create the Keras model
def create_model(activation='relu', dropout_rate=0.2, optimizer='adam'):
    model = Sequential()
    model.add(Dense(32, input_dim=X_train.shape[1], activation=activation))
    model.add(Dense(16, activation=activation))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Create a KerasClassifier based on the model creation function
model = KerasClassifier(build_fn=create_model, verbose=0)

# Define hyperparameters and values to search
param_grid = {
    'model__activation': ['relu', 'tanh'],
    'model__dropout_rate': [0.2, 0.4],
    'model__optimizer': ['adam', 'sgd']
}

# Use GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)
# grid_result = grid_search.fit(X_train_sequences, y_train_sequences)


# Get the best model and hyperparameters
best_model = grid_search.best_estimator_
# print(f"Best Parameters: {grid_search.best_params_}")

# Evaluate the best model on the test set
y_pred = best_model.predict(X_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
test_accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)
print("Test Accuracy:", test_accuracy)
print("Test Precision:", precision)
print("Test Recall:", recall)
print("Test F1 Score:", f1)

  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)


Confusion Matrix:
[[84  9]
 [25 82]]
Test Accuracy: 0.83
Test Precision: 0.9010989010989011
Test Recall: 0.7663551401869159
Test F1 Score: 0.8282828282828283


Step 10 Model Optimization:

Step 11 Data Augmentation:

step 12 Post-processing: