In [None]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Flatten, Dropout, Input
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.utils import plot_model

from google.colab import drive
drive.mount('/content/drive')

# Define the path to the directory containing the feature files
directory_path = '/content/drive/MyDrive/FYP_dataset/features'

# Get a list of all feature file paths in the directory
feature_files = glob.glob(directory_path + '/*.csv')

# Define lists to store the feature and target data
X = []
y_a = []

# Iterate over each feature file
for file in feature_files:
    # Read the feature file into a DataFrame
    df = pd.read_csv(file)

    # Extract the features and target variables
    features = df.drop(['Arousal_Value', 'frameTime'], axis=1).values
    arousal = df['Arousal_Value'].values

    # Append the data to the lists
    X.append(features)
    y_a.append(arousal)

# Concatenate the feature and target arrays
X = np.concatenate(X)
y_a = np.concatenate(y_a)

# Scale the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape the input data for LSTM
X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

# Define the number of folds for cross-validation
num_folds = 5

# Initialize lists to store evaluation results
mse_results = []

# Define different combinations of DNN layers to experiment with
dnn_combinations = [
    {'layers': [512, 256], 'dropout': 0.3},
    {'layers': [512, 256, 128], 'dropout': 0.5},
    {'layers': [512, 256, 128, 64], 'dropout': 0.5},
]

# Create a directory to save model diagrams
import os
if not os.path.exists('model_diagrams'):
    os.makedirs('model_diagrams')

# Perform tenfold cross-validation for BiLSTM
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
for fold, (train_index, test_index) in enumerate(kf.split(X_reshaped)):
    X_train, X_test = X_reshaped[train_index], X_reshaped[test_index]
    y_a_train, y_a_test = y_a[train_index], y_a[test_index]

    mse_combinations = []

    for i, dnn_config in enumerate(dnn_combinations):
        # Build the BiLSTM model for Arousal
        input_layer = Input(shape=(X_train.shape[1], 1))
        lstm_layer = Bidirectional(CuDNNLSTM(64, return_sequences=True))(input_layer)
        flat_layer = Flatten()(lstm_layer)

        # Add DNN layers after BiLSTM based on the configuration
        for layer_size in dnn_config['layers']:
            dnn_layer = Dense(layer_size, activation='relu')(flat_layer)
            flat_layer = Dropout(dnn_config['dropout'])(dnn_layer)

        # Add output layer
        output_layer = Dense(1)(flat_layer)

        model = Model(inputs=input_layer, outputs=output_layer)
        model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))

        # Train the Arousal model
        model.fit(X_train, y_a_train, epochs=25, batch_size=32, verbose=0)

        # Evaluate the model on the test set
        predictions_a = model.predict(X_test)

        # Reshape predictions_a to match the shape of y_a_test
        predictions_a = predictions_a.reshape(y_a_test.shape)

        # Calculate the MSE for this combination
        mse_combinations.append(mean_squared_error(y_a_test, predictions_a))

        # Save the model diagram as an image
        model_name = f'model_{fold}_combination_{i}.png'
        plot_model(model, to_file=f'model_diagrams/{model_name}', show_shapes=True, show_layer_names=True)

    mse_results.append(mse_combinations)

# Calculate the mean MSE for each combination across all folds
mean_mse_results = np.mean(mse_results, axis=0)

# Plot the MSE for each combination
plt.figure(figsize=(10, 6))
for i, dnn_config in enumerate(dnn_combinations):
    plt.plot(dnn_config['layers'], mean_mse_results[i], marker='o', label=f'Layers: {dnn_config["layers"]}, Dropout: {dnn_config["dropout"]}')
plt.xlabel('DNN Layers')
plt.ylabel('Mean Squared Error (MSE)')
plt.title('Mean Squared Error for Different DNN Combinations')
plt.legend()
plt.grid(True)
plt.show()

# Find the best DNN combination based on the lowest mean MSE
best_combination_index = np.argmin(mean_mse_results)
best_dnn_config = dnn_combinations[best_combination_index]

print('Best DNN Combination:')
print(f'Layers: {best_dnn_config["layers"]}')
print(f'Dropout: {best_dnn_config["dropout"]}')
print(f'Mean MSE for Best DNN Combination: {mean_mse_results[best_combination_index]:.4f}')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
