In [None]:
import os
import scipy.io
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences  # Correct import

# Create the new folder for the experiment
experiment_folder = r'D:\\thesis24\\EEG_ANTENATAL_OPD\\mat_eeg_final\\bp_eeg\\aftr_amp\\experiment5'
if not os.path.exists(experiment_folder):
    os.makedirs(experiment_folder)
print(f"Folder created at {experiment_folder}")

class EEGDataset:
    def __init__(self, data_folder):
        self.data = []
        self.labels = []
        self.load_data(data_folder)

    def load_data(self, folder):
        for file in os.listdir(folder):
            if file.endswith('.mat'):
                filepath = os.path.join(folder, file)
                mat = scipy.io.loadmat(filepath)
                data = mat['data']  # Assuming 'data' is the variable name in .mat files
                label = self.get_label_from_filename(file)
                self.data.append(data)
                self.labels.append(label)

        # Convert lists to numpy arrays (allowing variable-length sequences)
        self.labels = LabelEncoder().fit_transform(self.labels)

    def get_label_from_filename(self, filename):
        if '_RS_' in filename:
            return 'RS'
        elif '_M_' in filename:
            return 'M'
        elif '_AM_' in filename:
            return 'AM'

# Load the dataset
data_folder = r'D:\\thesis24\\EEG_ANTENATAL_OPD\\mat_eeg_final\\bp_eeg\\aftr_amp'
dataset = EEGDataset(data_folder)

# Padding all sequences to have the same number of time points
# Find the maximum sequence length
max_length = max([data.shape[1] for data in dataset.data])

# Pad the data so that all sequences are of the same length
padded_data = [pad_sequences([trial], maxlen=max_length, dtype='float32', padding='post', truncating='post')[0] for trial in dataset.data]

# Flatten the data for traditional machine learning models
padded_data = np.array(padded_data)  # Convert list to numpy array after padding
num_samples = len(padded_data)
flattened_data = padded_data.reshape(num_samples, -1)  # Reshape 3D EEG data to 2D

# Standardize the features
scaler = StandardScaler()
flattened_data = scaler.fit_transform(flattened_data)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(flattened_data, dataset.labels, test_size=0.2, random_state=42)

# Train a random forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Folder created at D:\\thesis24\\EEG_ANTENATAL_OPD\\mat_eeg_final\\bp_eeg\\aftr_amp\\experiment5
