### Importing Libraries

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

from scipy.stats import skew, kurtosis
from scipy.fft import fft
from scipy.signal import welch
from scipy.integrate import simpson
from scipy.stats import entropy
import pywt
from joblib import Parallel, delayed  # For parallel processing

#### Naming all the channel in proper convention

```python
# Define the folder path
folder_path = "data_csv"

# Define new column names
new_column_names = [
    "Fp1", "AF3", "F7", "F3", "FC1", "FC5", "T7", "C3", "CP1", "CP5",
    "P7", "P3", "Pz", "PO3", "O1", "Oz", "O2", "PO4", "P4", "P8",
    "CP6", "CP2", "C4", "T8", "FC6", "FC2", "F4", "F8", "AF4", "Fp2",
    "Fz", "Cz"
]

# Process each CSV file
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)

        # Read CSV
        df = pd.read_csv(file_path)

        # Rename first 32 columns
        column_mapping = {f"Channel_{i+1}": new_column_names[i] for i in range(32)}
        df.rename(columns=column_mapping, inplace=True)

        # Save the modified CSV
        df.to_csv(file_path, index=False)

print("Column renaming completed for all CSV files!")

*These channels can be divided into 5 regions of the brain*:

| **Brain Region**  | **Electrode Channels**                                      |
|------------------|------------------------------------------------------------|
| **Frontal (F)**  | Fp1, Fp2, AF3, AF4, F7, F3, Fz, F4, F8                      |
| **Central (C)**  | FC1, FC2, FC5, FC6, C3, Cz, C4                              |
| **Temporal (T)** | T7, T8                                                      |
| **Parietal (P)** | CP1, CP2, CP5, CP6, P3, P4, P7, P8, Pz                      |
| **Occipital (O)**| O1, O2, Oz, PO3, PO4                                        |


## Feature Extraction

In [2]:
# Function to calculate time-domain features
def extract_time_features(signal):
    features = {}
    
    # Statistical features
    features['MEAN'] = np.mean(signal)
    features['STD'] = np.std(signal)
    features['MAX_VALUE'] = np.max(signal)
    features['MIN_VALUE'] = np.min(signal)
    features['SKEWNESS'] = skew(signal)
    features['KURTOSIS'] = kurtosis(signal)
    features['MEDIAN'] = np.median(signal)
    
    # First derivative features
    first_diff = np.diff(signal)
    features['1st_DIFF_MEAN'] = np.mean(first_diff)
    features['1st_DIFF_MAX'] = np.max(first_diff)
    
    # Second derivative features
    second_diff = np.diff(first_diff)
    features['2nd_DIFF_MEAN'] = np.mean(second_diff)
    features['2nd_DIFF_MAX'] = np.max(second_diff)
    
    # Hjorth parameters
    diff_signal = np.diff(signal)
    variance = np.var(signal)
    activity = variance
    mobility = np.sqrt(np.var(diff_signal) / variance)
    complexity = np.sqrt(np.var(np.diff(diff_signal)) / np.var(diff_signal)) / mobility
    features['HJORTH_ACTIVITY'] = activity
    features['HJORTH_MOBILITY'] = mobility
    features['HJORTH_COMPLEXITY'] = complexity
    
    return features

# Function to calculate frequency-domain features
def extract_frequency_features(signal, fs=128):
    features = {}

    # Ensure signal is writable
    signal = np.asarray(signal, dtype=np.float64, order='C').copy()

    # FFT-based spectral features
    freqs, psd = welch(signal, fs=fs, nperseg=1024)
    delta_band = (0.1, 3)
    theta_band = (3, 7)
    alpha_band = (7, 12)
    beta_band = (12, 30)
    gamma_band = (30, 40)
    whole_band = (0.1, 40)

    def bandpower(band):
        idx = np.logical_and(freqs >= band[0], freqs <= band[1])
        return simpson(psd[idx], freqs[idx])  # Updated to simpson

    features['FFT_DELTA'] = bandpower(delta_band)
    features['FFT_THETA'] = bandpower(theta_band)
    features['FFT_ALPHA'] = bandpower(alpha_band)
    features['FFT_BETA'] = bandpower(beta_band)
    features['FFT_GAMMA'] = bandpower(gamma_band)
    features['FFT_WHOLE'] = bandpower(whole_band)

    # Wavelet-based features
    coeffs = pywt.wavedec(signal, 'db4', level=5)
    wavelet_features = []
    for coeff in coeffs:
        wavelet_features.extend([
            np.min(coeff), np.max(coeff), np.mean(coeff),
            np.median(coeff), np.std(coeff), skew(coeff),
            kurtosis(coeff), np.sum(coeff**2) / len(coeff)
        ])
    wavelet_columns = [
        'MIN_WAV_VALUE', 'MAX_WAV_VALUE', 'MEAN_WAV_VALUE',
        'MEDIAN_WAV_VALUE', 'STD_WAV_VALUE', 'SKEWNESS_WAV_VALUE',
        'KURTOSIS_WAV_VALUE', 'WAV_BAND'
    ]
    for col, val in zip(wavelet_columns, wavelet_features):
        features[col] = val

    # Entropy-based features
    features['ENTROPY_SPECTRAL'] = entropy(psd)
    features['ENTROPY_SHANNON'] = entropy(np.histogram(signal, bins=20)[0])

    return features

#### Define Channel Groups

In [3]:
# Define channel groups for each brain region
channel_groups = {
    'FRONTAL': ['Fp1', 'Fp2', 'AF3', 'AF4', 'F7', 'F3', 'Fz', 'F4', 'F8'],
    'CENTRAL': ['FC1', 'FC2', 'FC5', 'FC6', 'C3', 'Cz', 'C4'],
    'TEMPORAL': ['T7', 'T8'],
    'PARIETAL': ['CP1', 'CP2', 'CP5', 'CP6', 'P3', 'P4', 'P7', 'P8', 'Pz'],
    'OCCIPITAL': ['O1', 'O2', 'Oz', 'PO3', 'PO4']
}

#### Function to Compute Region-Averaged Features

In [4]:
def compute_region_averages(trial_data):
    region_features = {}
    
    for region, channels in channel_groups.items():
        # Extract signals for all channels in the region
        region_signals = [trial_data[ch].values for ch in channels]
        
        # Compute features for each channel in the region
        channel_features = []
        for signal in region_signals:
            time_features = extract_time_features(signal)
            freq_features = extract_frequency_features(signal)
            channel_features.append({**time_features, **freq_features})
        
        # Average features across the region
        for feature in channel_features[0].keys():
            region_key = f'{region}_{feature}'
            region_features[region_key] = np.mean([cf[feature] for cf in channel_features])
    
    return region_features

#### Function to Process a Single Trial

In [5]:
def process_trial(trial_data, trial, subject_id):
    """
    Processes a single trial and extracts region-averaged features.
    """
    # Initialize feature dictionary for this trial
    trial_features = {'Participant_ID': subject_id, 'Trial': trial}
    
    # Compute region-averaged features
    region_features = compute_region_averages(trial_data)
    trial_features.update(region_features)
    
    # Add emotion labels
    trial_features['Valence'] = trial_data['Valence'].values[0]
    trial_features['Arousal'] = trial_data['Arousal'].values[0]
    trial_features['Dominance'] = trial_data['Dominance'].values[0]
    trial_features['Liking'] = trial_data['Liking'].values[0]
    
    return trial_features

#### Main function to extract the features

In [6]:
def extract_features_from_csv(input_folder, output_file):
    """
    Extracts region-averaged features from all CSV files in the input folder and saves them to a single output file.
    """
    all_trials = []  # List to store all trials' features
    
    # Loop through all CSV files in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith(".csv"):
            file_path = os.path.join(input_folder, filename)
            subject_id = filename.split('.')[0]  # Extract subject ID (e.g., 's01')
            
            # Load CSV file
            df = pd.read_csv(file_path)
            
            # Process each trial in parallel
            trials = Parallel(n_jobs=-1)(delayed(process_trial)(df[df['Trial'] == trial], trial, subject_id) 
                                        for trial in range(1, 41))
            all_trials.extend(trials)
    
    # Convert to DataFrame and save
    df_features = pd.DataFrame(all_trials)
    df_features.to_csv(output_file, index=False)
    print(f"Features extracted and saved to {output_file}")

In [7]:
# Define input and output paths
input_folder = "data_csv"  # Folder containing participant CSV files
output_file = "extracted_features.csv"  # Path to save the extracted features

# Run the feature extraction
extract_features_from_csv(input_folder, output_file)

Features extracted and saved to extracted_features.csv


#### Labelling the Emotions

In [8]:
def map_emotion(valence, arousal):
    """
    Maps valence and arousal values to an emotion category.
    
    Parameters:
    valence (float): Valence score (1 to 9).
    arousal (float): Arousal score (1 to 9).
    
    Returns:
    str: Emotion category.
    """
    if valence > 5 and arousal > 5:
        return "Happy/Excited"
    elif valence > 5 and arousal <= 5:
        return "Relaxed/Content"
    elif valence <= 5 and arousal > 5:
        return "Angry/Stressed"
    elif valence <= 5 and arousal <= 5:
        return "Sad/Bored"
    else:
        return "Unknown"

In [9]:
# Load the extracted features dataset
df = pd.read_csv("extracted_features.csv")

# Apply the mapping function to create the Emotion column
df['Emotion'] = df.apply(lambda row: map_emotion(row['Valence'], row['Arousal']), axis=1)

# Save the updated dataset
df.to_csv("extracted_features_with_emotion.csv", index=False)