In [1]:
import numpy as np
import scipy.io as sio
from scipy.stats import kurtosis, skew
from scipy.signal import welch
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense

# Load the data
file_path = 'C:\\Users\\UC\\Documents\\NeuMa\\22117124\\new.mat'
new = sio.loadmat(file_path)
Label = new['label_list'].flatten()
EEG = new['EEG']

# Feature extraction for ML
def extract_ml_features(data):
    features = []
    for i in range(data.shape[2]):  # Iterate over samples
        sample = data[:, :, i]
        sample_features = []
        for j in range(data.shape[0]):  # Iterate over channels
            channel_data = sample[j, :]
            # Statistical features
            mean = np.mean(channel_data)
            var = np.var(channel_data)
            skewness = skew(channel_data)
            kurt = kurtosis(channel_data)
            # Frequency domain features using Welch's method
            freqs, psd = welch(channel_data)
            psd_mean = np.mean(psd)
            psd_std = np.std(psd)
            # Combine all features
            sample_features.extend([mean, var, skewness, kurt, psd_mean, psd_std])
        features.append(sample_features)
    return np.array(features)

ml_features = extract_ml_features(EEG)

# Feature extraction for DL using CNN
input_shape = (EEG.shape[0], EEG.shape[1], 1)

input_layer = Input(shape=input_shape)
conv1 = Conv2D(32, (3, 3), activation='relu')(input_layer)
pool1 = MaxPooling2D((2, 2))(conv1)
conv2 = Conv2D(64, (3, 3), activation='relu')(pool1)
pool2 = MaxPooling2D((2, 2))(conv2)
flatten = Flatten()(pool2)
dense1 = Dense(128, activation='relu')(flatten)
output_layer = Dense(64, activation='relu')(dense1)  # Output for feature extraction

cnn_model = Model(inputs=input_layer, outputs=output_layer)

# Reshape data for CNN input
data_cnn = EEG.reshape(EEG.shape[2], EEG.shape[0], EEG.shape[1], 1)

cnn_features = cnn_model.predict(data_cnn)

# Combine ML and DL features
combined_features = np.concatenate((ml_features, cnn_features), axis=1)

# Handle imbalanced data using SMOTE
smote = SMOTE()
combined_features_resampled, labels_resampled = smote.fit_resample(combined_features, Label)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(combined_features_resampled, labels_resampled, test_size=0.2, random_state=42)

# Train ensemble classifiers
# Random Forest
rf_clf = RandomForestClassifier(n_estimators=100)
rf_clf.fit(X_train, y_train)

# Gradient Boosting
gb_clf = GradientBoostingClassifier(n_estimators=100)
gb_clf.fit(X_train, y_train)

# Custom Ensemble (Voting)
ensemble_clf = VotingClassifier(estimators=[('rf', rf_clf), ('gb', gb_clf)], voting='soft')
ensemble_clf.fit(X_train, y_train)

# Evaluate the ensemble classifier
y_pred = ensemble_clf.predict(X_test)
print(classification_report(y_test, y_pred))

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'rf__n_estimators': [50, 100, 200],
    'gb__n_estimators': [50, 100, 200],
}

grid_search = GridSearchCV(estimator=ensemble_clf, param_grid=param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best parameters
print(grid_search.best_params_)

# Evaluate the best model
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test)
print(classification_report(y_test, y_pred_best))


[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
              precision    recall  f1-score   support

           0       0.67      0.77      0.71      1409
           1       0.71      0.60      0.65      1340

    accuracy                           0.69      2749
   macro avg       0.69      0.68      0.68      2749
weighted avg       0.69      0.69      0.68      2749

{'gb__n_estimators': 50, 'rf__n_estimators': 200}
              precision    recall  f1-score   support

           0       0.67      0.78      0.72      1409
           1       0.72      0.59      0.65      1340

    accuracy                           0.69      2749
   macro avg       0.69      0.69      0.69      2749
weighted avg       0.69      0.69      0.69      2749



In [5]:
import numpy as np
import scipy.io as sio
from scipy.stats import kurtosis, skew
from scipy.signal import welch
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Reshape, LSTM
import optuna
import pywt
from xgboost import XGBClassifier  # Make sure to import XGBClassifier

# Load the data
file_path = 'C:\\Users\\UC\\Documents\\NeuMa\\22117124\\new.mat'
new = sio.loadmat(file_path)
Label = new['label_list'].flatten()
EEG = new['EEG']

# Feature extraction for ML
def extract_ml_features(data):
    features = []
    for i in range(data.shape[2]):  # Iterate over samples
        sample = data[:, :, i]
        sample_features = []
        for j in range(data.shape[0]):  # Iterate over channels
            channel_data = sample[j, :]
            # Statistical features
            mean = np.mean(channel_data)
            var = np.var(channel_data)
            skewness = skew(channel_data)
            kurt = kurtosis(channel_data)
            # Frequency domain features using Welch's method
            freqs, psd = welch(channel_data)
            psd_mean = np.mean(psd)
            psd_std = np.std(psd)
            # Combine all features
            sample_features.extend([mean, var, skewness, kurt, psd_mean, psd_std])
        features.append(sample_features)
    return np.array(features)

# Wavelet transform features
def extract_wavelet_features(data):
    features = []
    for i in range(data.shape[2]):  # Iterate over samples
        sample = data[:, :, i]
        sample_features = []
        for j in range(data.shape[0]):  # Iterate over channels
            channel_data = sample[j, :]
            coeffs = pywt.wavedec(channel_data, 'db4', level=4)
            for coeff in coeffs:
                sample_features.extend([np.mean(coeff), np.std(coeff)])
        features.append(sample_features)
    return np.array(features)

ml_features = extract_ml_features(EEG)
wavelet_features = extract_wavelet_features(EEG)
ml_features = np.concatenate((ml_features, wavelet_features), axis=1)

# Feature extraction for DL using CNN and LSTM
input_shape = (EEG.shape[0], EEG.shape[1], 1)

input_layer = Input(shape=input_shape)
conv1 = Conv2D(32, (3, 3), activation='relu')(input_layer)
pool1 = MaxPooling2D((2, 2))(conv1)
conv2 = Conv2D(64, (3, 3), activation='relu')(pool1)
pool2 = MaxPooling2D((2, 2))(conv2)
flatten = Flatten()(pool2)

# Reshape for LSTM layer
reshape_layer = Reshape((flatten.shape[1], 1))(flatten)
lstm_layer = LSTM(64)(reshape_layer)
dense1 = Dense(128, activation='relu')(lstm_layer)
output_layer = Dense(64, activation='relu')(dense1)  # Output for feature extraction

cnn_rnn_model = Model(inputs=input_layer, outputs=output_layer)
cnn_rnn_model.compile(optimizer='adam', loss='mse')

# Reshape data for CNN input
data_cnn = EEG.reshape(EEG.shape[2], EEG.shape[0], EEG.shape[1], 1)

cnn_rnn_features = cnn_rnn_model.predict(data_cnn)

# Combine ML and DL features
combined_features = np.concatenate((ml_features, cnn_rnn_features), axis=1)

# Handle imbalanced data using SMOTE
smote = SMOTE()
combined_features_resampled, labels_resampled = smote.fit_resample(combined_features, Label)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(combined_features_resampled, labels_resampled, test_size=0.2, random_state=42)

# Advanced Ensemble with Stacking and Hyperparameter Optimization
def objective(trial):
    rf_n_estimators = trial.suggest_int('rf_n_estimators', 50, 300)
    gb_n_estimators = trial.suggest_int('gb_n_estimators', 50, 300)
    xgb_n_estimators = trial.suggest_int('xgb_n_estimators', 50, 300)

    rf_clf = RandomForestClassifier(n_estimators=rf_n_estimators)
    gb_clf = GradientBoostingClassifier(n_estimators=gb_n_estimators)
    xgb_clf = XGBClassifier(n_estimators=xgb_n_estimators)

    stacking_clf = StackingClassifier(estimators=[
        ('rf', rf_clf), 
        ('gb', gb_clf),
        ('xgb', xgb_clf)
    ], final_estimator=RandomForestClassifier(n_estimators=100))

    stacking_clf.fit(X_train, y_train)
    y_pred = stacking_clf.predict(X_test)
    accuracy = np.mean(y_pred == y_test)
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
print("Best parameters found:")
print(study.best_params)

# Train the best model found by Optuna
best_params = study.best_params
rf_clf = RandomForestClassifier(n_estimators=best_params['rf_n_estimators'])
gb_clf = GradientBoostingClassifier(n_estimators=best_params['gb_n_estimators'])
xgb_clf = XGBClassifier(n_estimators=best_params['xgb_n_estimators'])

stacking_clf = StackingClassifier(estimators=[
    ('rf', rf_clf), 
    ('gb', gb_clf),
    ('xgb', xgb_clf)
], final_estimator=RandomForestClassifier(n_estimators=100))

stacking_clf.fit(X_train, y_train)

# Evaluate the optimized model
y_pred_optimized = stacking_clf.predict(X_test)
print("Optimized model evaluation:")
print(classification_report(y_test, y_pred_optimized))


[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m741s[0m 2s/step


[I 2024-05-19 23:11:20,870] A new study created in memory with name: no-name-457f96df-a829-4a48-92be-92b86714731f
[I 2024-05-19 23:55:56,765] Trial 0 finished with value: 0.7169879956347763 and parameters: {'rf_n_estimators': 283, 'gb_n_estimators': 276, 'xgb_n_estimators': 172}. Best is trial 0 with value: 0.7169879956347763.
[I 2024-05-20 00:15:01,426] Trial 1 finished with value: 0.7169879956347763 and parameters: {'rf_n_estimators': 292, 'gb_n_estimators': 122, 'xgb_n_estimators': 224}. Best is trial 0 with value: 0.7169879956347763.
[I 2024-05-20 00:22:26,178] Trial 2 finished with value: 0.6911604219716261 and parameters: {'rf_n_estimators': 86, 'gb_n_estimators': 52, 'xgb_n_estimators': 80}. Best is trial 0 with value: 0.7169879956347763.
[I 2024-05-20 00:45:58,749] Trial 3 finished with value: 0.7093488541287741 and parameters: {'rf_n_estimators': 199, 'gb_n_estimators': 176, 'xgb_n_estimators': 293}. Best is trial 0 with value: 0.7169879956347763.
[I 2024-05-20 01:22:44,915] T

Best parameters found:
{'rf_n_estimators': 265, 'gb_n_estimators': 89, 'xgb_n_estimators': 300}
Optimized model evaluation:
              precision    recall  f1-score   support

           0       0.71      0.79      0.75      1409
           1       0.75      0.66      0.70      1340

    accuracy                           0.73      2749
   macro avg       0.73      0.72      0.72      2749
weighted avg       0.73      0.73      0.72      2749

