In [1]:
# Importing libraries
import os
import pandas as pd
import numpy as np
import pywt
import scipy.stats as stats

In [2]:
# Subject metadata
subject_metadata = {
    "sub-001": ("F", 57, 16), "sub-002": ("F", 78, 22), "sub-003": ("M", 70, 14), "sub-004": ("F", 67, 20),
    "sub-005": ("M", 70, 22), "sub-006": ("F", 61, 14), "sub-007": ("F", 79, 20), "sub-008": ("M", 62, 16),
    "sub-009": ("F", 77, 23), "sub-010": ("M", 69, 20), "sub-011": ("M", 71, 22), "sub-012": ("M", 63, 18),
    "sub-013": ("F", 64, 20), "sub-014": ("M", 77, 14), "sub-015": ("M", 61, 18), "sub-016": ("F", 68, 14),
    "sub-017": ("F", 61, 6),  "sub-018": ("F", 73, 23), "sub-019": ("F", 62, 14), "sub-020": ("M", 71, 4),
    "sub-021": ("M", 79, 22), "sub-022": ("F", 68, 20), "sub-023": ("M", 60, 16), "sub-024": ("F", 69, 20),
    "sub-025": ("F", 79, 20), "sub-026": ("F", 61, 18), "sub-027": ("F", 67, 16), "sub-028": ("M", 49, 20),
    "sub-029": ("F", 53, 16), "sub-030": ("F", 56, 20), "sub-031": ("F", 67, 22), "sub-032": ("F", 59, 20),
    "sub-033": ("F", 72, 20), "sub-034": ("F", 75, 18), "sub-035": ("F", 57, 22), "sub-036": ("F", 58, 9),
    "sub-037": ("M", 57, 30), "sub-038": ("M", 62, 30), "sub-039": ("M", 70, 30), "sub-040": ("M", 61, 30),
    "sub-041": ("F", 77, 30), "sub-042": ("M", 74, 30), "sub-043": ("M", 72, 30), "sub-044": ("F", 64, 30),
    "sub-045": ("F", 70, 30), "sub-046": ("M", 63, 30), "sub-047": ("F", 70, 30), "sub-048": ("M", 65, 30),
    "sub-049": ("F", 62, 30), "sub-050": ("M", 68, 30), "sub-051": ("F", 75, 30), "sub-052": ("F", 73, 30),
    "sub-053": ("M", 70, 30), "sub-054": ("M", 78, 30), "sub-055": ("M", 67, 30), "sub-056": ("F", 64, 30),
    "sub-057": ("M", 64, 30), "sub-058": ("M", 62, 30), "sub-059": ("M", 77, 30), "sub-060": ("F", 71, 30),
    "sub-061": ("F", 63, 30), "sub-062": ("M", 67, 30), "sub-063": ("M", 66, 30), "sub-064": ("M", 66, 30),
    "sub-065": ("F", 71, 30), "sub-066": ("M", 73, 20), "sub-067": ("M", 66, 24), "sub-068": ("M", 78, 25), 
    "sub-069": ("M", 70, 22), "sub-070": ("F", 67, 22), "sub-071": ("M", 62, 20), "sub-072": ("M", 65, 18), 
    "sub-073": ("F", 57, 22), "sub-074": ("F", 53, 20), "sub-075": ("F", 71, 22), "sub-076": ("M", 44, 24), 
    "sub-077": ("M", 61, 22), "sub-078": ("M", 62, 22), "sub-079": ("F", 60, 18), "sub-080": ("F", 71, 20), 
    "sub-081": ("F", 61, 18), "sub-082": ("M", 63, 27), "sub-083": ("F", 68, 20), "sub-084": ("F", 71, 24), 
    "sub-085": ("M", 64, 26), "sub-086": ("M", 49, 26), "sub-087": ("M", 73, 24), "sub-088": ("M", 55, 24)
}

In [3]:
# Function to calculate Discrete Wavelet Transform coefficients
def calculate_dwt_coefficients(signal, wavelet='db4', level=4):
    coeffs = pywt.wavedec(data=signal, wavelet=wavelet, level=level)
    return coeffs

In [4]:
# Function to calculate Hjorth parameters
def calculate_hjorth_parameters(signal):
    first_deriv = np.diff(signal)
    second_deriv = np.diff(first_deriv)
    var_zero = np.var(signal)
    var_d1 = np.var(first_deriv)
    var_d2 = np.var(second_deriv)
    
    activity = var_zero
    mobility = np.sqrt(var_d1 / var_zero)
    complexity = np.sqrt((var_d2 / var_d1) / mobility)
    
    features = {}
    features['hjorth_activity'] = activity
    features['hjorth_mobility'] = mobility
    features['hjorth_complexity'] = complexity
    return features

In [5]:
# Function to calculate Shannon entropy
def calculate_shannon_entropy(signal):
    values, counts = np.unique(signal, return_counts=True)
    prob = counts / len(signal)
    shannon_entropy = -np.sum(prob * np.log2(prob))
    return shannon_entropy

In [6]:
# Function to calculate Sure entropy
def calculate_sure_entropy(signal, threshold=0.1):
    N = len(signal)
    count = np.sum(np.abs(signal) <= threshold)
    sure_entropy = N - count + np.sum(np.minimum(signal**2, threshold**2))
    return sure_entropy

In [7]:
# Function to calculate statistical features for DWT coefficients
def calculate_statistical_features(coeffs):
    features = {}
    for i, coeff in enumerate(coeffs):
        prefix = f"dwt_{i}"
        
        # Hjorth parameters
        hjorth_params = calculate_hjorth_parameters(coeff)
        features[f"hjorth_activity_{prefix}"] = hjorth_params['hjorth_activity']
        features[f"hjorth_mobility_{prefix}"] = hjorth_params['hjorth_mobility']
        features[f"hjorth_complexity_{prefix}"] = hjorth_params['hjorth_complexity']
        
        # Shannon entropy
        shannon_entropy = calculate_shannon_entropy(coeff)
        features[f"shannon_entropy_{prefix}"] = shannon_entropy
        
        # Sure entropy
        sure_entropy = calculate_sure_entropy(coeff)
        features[f"sure_entropy_{prefix}"] = sure_entropy

        # Variance
        features[f"var_{prefix}"] = np.var(coeff)
        
        # Skewness
        features[f"skew_{prefix}"] = stats.skew(coeff)
        
        # Kurtosis
        features[f"kurt_{prefix}"] = stats.kurtosis(coeff)
    
    return features

In [8]:
# Function to extract features
def extract_features(signals):
    features = []
    for signal in signals:
        # Calculate DWT coefficients
        coeffs = calculate_dwt_coefficients(signal)
        
        # Calculate statistical features
        stats_features = calculate_statistical_features(coeffs)
        features.extend(stats_features.values())
    
    return features

In [9]:
# Function to truncate data
def truncate_data(data, max_samples):
    return data[:max_samples]

# Function to segment data into epochs
def segment_data(data, epoch_length):
    num_samples = data.shape[0]
    num_epochs = num_samples // epoch_length
    epochs = []
    for i in range(num_epochs):
        start_idx = i * epoch_length
        end_idx = start_idx + epoch_length
        epochs.append(data[start_idx:end_idx])
    return epochs

In [10]:
# Define channel labels
channels = ['Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'O2']

# Adjusted Feature Labels
selected_feature_types = ['hjorth_activity', 'hjorth_mobility', 'hjorth_complexity', 'shannon_entropy', 'sure_entropy', 'var', 'skew', 'kurt']
dwt_labels = [f"dwt_{i}" for i in range(5)]
selected_feature_labels = [f"{feature_type}_{dwt}_{channel}" for feature_type in selected_feature_types for dwt in dwt_labels for channel in channels]

# Additional metadata labels
additional_labels = ['epoch', 'subject', 'gender', 'age', 'class', 'mmse']
final_labels = additional_labels + selected_feature_labels

In [11]:
# Set the duration to certain minutes
desired_duration_minutes = 5.1

# Convert the desired duration to the number of samples
max_samples = int(desired_duration_minutes * 60 * 500)  # Convert minutes to samples

# Set the epoch length
epoch_length = 2 * 500  # Samples per epoch

In [12]:
# Folder containing all subjects' data
folder_path = r"C:\Users\Izwan\Desktop\alzheimer-prediction\converted-files"

In [13]:
# Initialize a list to store all features
all_features = []

# Iterate over all files and directories in the folder
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith(".csv"):
            
            # Extract subject ID from filename
            subject_id = file.split("_")[0]

            # Extract metadata
            gender, age, mmse = subject_metadata[subject_id]
            classification = (
                'A' if 1 <= int(subject_id.split('-')[1]) <= 36 else 
                'C' if 37 <= int(subject_id.split('-')[1]) <= 65 else 
                'F' if 66 <= int(subject_id.split('-')[1]) <= 88 else 'Unknown'
            )
                
            # Read the file
            file_path = os.path.join(root, file)
            print(f"Reading file: {file}")
            data = pd.read_csv(file_path)
        
            # Truncate the data to the maximum number of samples
            data = truncate_data(data, max_samples)
            
            # Segment the data into epochs
            epochs = segment_data(data, epoch_length)
            
            # Process each epoch
            for epoch_idx, epoch in enumerate(epochs):
                subject_features = []
                for channel in channels:
                    if channel in data.columns:
                        # Extract features for the channel
                        subject_features.extend(extract_features([epoch[channel].values]))
                    else:
                        print(f"Channel {channel} missing in file {file}.")
                        # Placeholder for missing features
                        subject_features.extend([np.nan] * len(selected_feature_labels))
                
                # Combine metadata, epoch, and features into a single list
                combined_features = [epoch_idx + 1, subject_id, gender, age, classification, mmse] + subject_features
                all_features.append(combined_features)

Reading file: sub-001_task-eyesclosed_eeg.csv
Reading file: sub-002_task-eyesclosed_eeg.csv
Reading file: sub-003_task-eyesclosed_eeg.csv
Reading file: sub-004_task-eyesclosed_eeg.csv
Reading file: sub-005_task-eyesclosed_eeg.csv
Reading file: sub-006_task-eyesclosed_eeg.csv
Reading file: sub-007_task-eyesclosed_eeg.csv
Reading file: sub-008_task-eyesclosed_eeg.csv
Reading file: sub-009_task-eyesclosed_eeg.csv
Reading file: sub-010_task-eyesclosed_eeg.csv
Reading file: sub-011_task-eyesclosed_eeg.csv
Reading file: sub-012_task-eyesclosed_eeg.csv
Reading file: sub-013_task-eyesclosed_eeg.csv
Reading file: sub-014_task-eyesclosed_eeg.csv
Reading file: sub-015_task-eyesclosed_eeg.csv
Reading file: sub-016_task-eyesclosed_eeg.csv
Reading file: sub-017_task-eyesclosed_eeg.csv
Reading file: sub-018_task-eyesclosed_eeg.csv
Reading file: sub-019_task-eyesclosed_eeg.csv
Reading file: sub-020_task-eyesclosed_eeg.csv
Reading file: sub-021_task-eyesclosed_eeg.csv
Reading file: sub-022_task-eyesclo

In [14]:
# Convert the list of features to a DataFrame
df_features = pd.DataFrame(all_features, columns=final_labels)

# Save the features to a CSV file
output_csv_file = "extracted-features.csv"
df_features.to_csv(output_csv_file, index=False)
print(f"Features extracted and saved to {output_csv_file}")

Features extracted and saved to extracted-features-5min.csv
