In [30]:
!pip install librosa tensorflow numpy pandas scikit-learn



In [31]:
import os
import librosa
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from google.colab import drive
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Dropout, TimeDistributed, Reshape

In [32]:
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
# Define dataset directory
dataset_path = "/content/drive/MyDrive/AI POWER METAL HEALTH/voice data/"  # Change path if needed

In [35]:
# Function to extract MFCC features
def extract_features(audio_file, max_pad=300):
    y, sr = librosa.load(audio_file, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    if mfcc.shape[1] < max_pad:
        pad_width = max_pad - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_pad]
    return mfcc

In [36]:
# Prepare dataset
X, y = [], []
for folder in class_labels:
    folder_path = os.path.join(dataset_path, folder)
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if file_path.endswith(".wav"):  # Ensure it's an audio file
            features = extract_features(file_path)
            X.append(features)
            y.append(label_mapping[folder])

In [37]:

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)


In [39]:
# Split dataset into training & validation (80-20)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [42]:
# Reshape X for CNN-LSTM input (Add timesteps and channel dimensions)
# Assuming you have 1 timestep per sample:
X_train = X_train[..., np.newaxis]
X_train = X_train[:, np.newaxis, ...]  # Add timesteps dimension

X_val = X_val[..., np.newaxis]
X_val = X_val[:, np.newaxis, ...]  # Add timesteps dimension

# Define CNN-LSTM Model
model = Sequential([
    TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(1, 40, 300, 1)), # Update input_shape
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Conv2D(64, (3, 3), activation='relu')),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Flatten()),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(class_labels), activation='softmax')  # 14 classes
])

  super().__init__(**kwargs)


In [43]:

# Compile Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print Model Summary
model.summary()

In [44]:
# Train Model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=32)

Epoch 1/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 37ms/step - accuracy: 0.1082 - loss: 2.6203 - val_accuracy: 0.2411 - val_loss: 2.4115
Epoch 2/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - accuracy: 0.2376 - loss: 2.2621 - val_accuracy: 0.3518 - val_loss: 1.8771
Epoch 3/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.4090 - loss: 1.6892 - val_accuracy: 0.6625 - val_loss: 1.2129
Epoch 4/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - accuracy: 0.5867 - loss: 1.1567 - val_accuracy: 0.6589 - val_loss: 0.8498
Epoch 5/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.6709 - loss: 0.9248 - val_accuracy: 0.7250 - val_loss: 0.8643
Epoch 6/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.7001 - loss: 0.8223 - val_accuracy: 0.8214 - val_loss: 0.5565
Epoch 7/50
[1m70/70[0m [32m━━━━

In [45]:
# Save Model
model.save("/content/drive/My Drive/OAF_Audio/mental_health_cnn_lstm.h5")



In [46]:

# Print Final Accuracy
train_acc = history.history['accuracy'][-1] * 100
val_acc = history.history['val_accuracy'][-1] * 100
print(f"Final Training Accuracy: {train_acc:.2f}%")
print(f"Final Validation Accuracy: {val_acc:.2f}%")

Final Training Accuracy: 84.73%
Final Validation Accuracy: 79.11%


Final Training Accuracy: 99.44%
Final Validation Accuracy: 99.75%
