In [1]:
import numpy as np
import scipy.io as sio
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from scipy.signal import welch
from scipy.stats import skew, kurtosis

# Load data from .mat file
file_path = 'C:\\Users\\UC\\Documents\\NeuMa\\22117124\\featuress.mat'
feet = sio.loadmat(file_path)
Data_array = np.transpose(feet['data_array'], (2, 0, 1))
Label_array = feet['label_array']

# Define feature extraction functions
def extract_mean(data):
    return np.mean(data, axis=-1)

def extract_std(data):
    return np.std(data, axis=-1)

def extract_skewness(data):
    return skew(data, axis=-1)

def extract_kurtosis(data):
    return kurtosis(data, axis=-1)

def extract_max(data):
    return np.max(data, axis=-1)

def extract_min(data):
    return np.min(data, axis=-1)

def extract_range(data):
    return np.max(data, axis=-1) - np.min(data, axis=-1)

def extract_energy(data):
    return np.sum(data**2, axis=-1)  # Energy of the signal

def extract_frequency_domain_features(data, fs=250):
    f, Pxx = welch(data, fs=fs, axis=-1)
    max_power_idx = np.argmax(Pxx, axis=-1)
    max_power_freq = f[max_power_idx]
    return max_power_freq, np.max(Pxx, axis=-1)  # Frequency with max power and max power value

def extract_zero_crossing_rate(data):
    return np.sum(np.diff(np.sign(data), axis=-1) != 0, axis=-1) / (data.shape[-1] - 1)

# Combine all feature extraction functions into a list
feature_extractors = [
    extract_mean,
    extract_std,
    extract_skewness,
    extract_kurtosis,
    extract_max,
    extract_min,
    extract_range,
    extract_energy,
    extract_zero_crossing_rate
]

# Extract features from data array using all defined feature extractors
def extract_features(data):
    features = []
    for extractor in feature_extractors:
        extracted = extractor(data)
        if isinstance(extracted, tuple):
            features.extend(extracted)  # Unpack tuple if returned
        else:
            features.append(extracted)
    return np.concatenate(features)

# Extract features from data array
featuress = np.array([extract_features(data) for data in Data_array])

# Define the number of splits for KFold cross-validation
n_splits = 10
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Initialize lists to store evaluation metrics across folds
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
confusion_matrices = []

# Perform KFold cross-validation
for train_index, val_index in kf.split(featuress):
    X_train, X_val = featuress[train_index], featuress[val_index]
    y_train, y_val = Label_array[train_index], Label_array[val_index]
    
    # Apply SMOTE to balance classes in training data
    smote = SMOTE(sampling_strategy='auto', random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
    
    # Scale features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_resampled)
    X_val_scaled = scaler.transform(X_val)
    
    # Initialize individual classifiers
    svm_clf = SVC(kernel='rbf', random_state=42)  # SVM classifier
    rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)  # Random Forest classifier
    
    # Create a voting classifier with SVM and RF
    ensemble_clf = VotingClassifier(estimators=[('svm', svm_clf), ('rf', rf_clf)], voting='hard')
    
    # Train the ensemble classifier
    ensemble_clf.fit(X_train_scaled, y_train_resampled)
    
    # Evaluate the ensemble classifier on the validation data
    y_pred = ensemble_clf.predict(X_val_scaled)
    
    # Compute evaluation metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')
    confusion = confusion_matrix(y_val, y_pred)
    
    # Store metrics in lists
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    confusion_matrices.append(confusion)

# Compute mean of evaluation metrics across folds
mean_accuracy = np.mean(accuracy_scores)
mean_precision = np.mean(precision_scores)
mean_recall = np.mean(recall_scores)
mean_f1 = np.mean(f1_scores)

# Print mean evaluation metrics
print("Mean Accuracy:", mean_accuracy)
print("Mean Precision:", mean_precision)
print("Mean Recall:", mean_recall)
print("Mean F1-score:", mean_f1)


Mean Accuracy: 0.6438775726505761
Mean Precision: 0.6249395781837802
Mean Recall: 0.6438775726505761
Mean F1-score: 0.6142465587961585


In [2]:
X_train.shape. y_train.shape

AttributeError: 'tuple' object has no attribute 'y_train'