<a href="https://colab.research.google.com/github/inhamjchoi/SafetyDataClass/blob/main/Ex06_1_Practice_2_ACC_Feature_Extraction_Advanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from scipy.signal import find_peaks
import warnings
warnings.filterwarnings('ignore')

In [None]:
def extract_features_from_window(data_x, data_y, data_z):
    """
    Extract features from a single window of X, Y, Z data

    Args:
        data_x, data_y, data_z: 1D arrays of window data

    Returns:
        dict: Dictionary containing features for this window
    """
    features = {}

    # Time domain features
    features['MeanX'] = np.mean(data_x)
    features['MeanY'] = np.mean(data_y)
    features['MeanZ'] = np.mean(data_z)

    features['SkewX'] = stats.skew(data_x)
    features['SkewY'] = stats.skew(data_y)
    features['SkewZ'] = stats.skew(data_z)

    features['MaxX'] = np.max(data_x)
    features['MaxY'] = np.max(data_y)
    features['MaxZ'] = np.max(data_z)

    features['MinX'] = np.min(data_x)
    features['MinY'] = np.min(data_y)
    features['MinZ'] = np.min(data_z)

    features['RangeX'] = np.max(data_x) - np.min(data_x)
    features['RangeY'] = np.max(data_y) - np.min(data_y)
    features['RangeZ'] = np.max(data_z) - np.min(data_z)

    features['StdX'] = np.std(data_x, ddof=1)
    features['StdY'] = np.std(data_y, ddof=1)
    features['StdZ'] = np.std(data_z, ddof=1)

    features['KurtosisX'] = stats.kurtosis(data_x, fisher=False)
    features['KurtosisY'] = stats.kurtosis(data_y, fisher=False)
    features['KurtosisZ'] = stats.kurtosis(data_z, fisher=False)

    # Correlations
    features['CorrXY'] = np.corrcoef(data_x, data_y)[0, 1]
    features['CorrXZ'] = np.corrcoef(data_x, data_z)[0, 1]
    features['CorrYZ'] = np.corrcoef(data_y, data_z)[0, 1]

    # Frequency domain features
    # Sampling frequency (adjust as needed)
    fs = 22  # Hz
    window_length = len(data_x)

    # Energy calculation
    def compute_energy(signal):
        # Remove DC component
        new_signal = signal - np.mean(signal)
        # Zero-pad to next power of 2
        n = 2 ** int(np.ceil(np.log2(window_length)))
        # Compute FFT
        dft = np.fft.fft(new_signal, n)
        # Power spectrum (one-sided)
        power = np.abs(dft[:n//2])**2 / n
        return np.sum(power)

    features['EnergyX'] = compute_energy(data_x)
    features['EnergyY'] = compute_energy(data_y)
    features['EnergyZ'] = compute_energy(data_z)

    # Entropy calculation
    def compute_entropy(signal):
        # Zero-pad to next power of 2
        n = 2 ** int(np.ceil(np.log2(window_length)))
        # Compute FFT
        dft = np.fft.fft(signal, n)
        # Power spectrum (one-sided)
        power = np.abs(dft[:n//2])**2 / n
        # Normalize
        temp = power - np.min(power)
        if np.max(temp) - np.min(temp) == 0:
            return 0
        norm_pow = 1e-12 + temp / (np.max(temp) - np.min(temp))
        # Entropy
        entropy = -np.sum(norm_pow * np.log2(norm_pow + 1e-12))
        return entropy

    features['EntropyX'] = compute_entropy(data_x)
    features['EntropyY'] = compute_entropy(data_y)
    features['EntropyZ'] = compute_entropy(data_z)

    return features


In [None]:
if __name__ == "__main__":

    print("=== ACC2_RawData.xlsx Sliding Window Feature Extraction ===\n")

    # Read accelerometer data from ACC2_RawData.xlsx
    # - Column 1: Time index (not used)
    # - Columns 2-4: X, Y, Z acceleration data
    # - Column 5: Activity label
    df = pd.read_excel('ACC2_RawData.xlsx', header=None)

    print("Raw data preview:")
    print(df.head())
    print(f"Data shape: {df.shape}")

    # Extract data columns
    rawdata_x = df.iloc[:, 1].values  # Column 2 -> index 1
    rawdata_y = df.iloc[:, 2].values  # Column 3 -> index 2
    rawdata_z = df.iloc[:, 3].values  # Column 4 -> index 3
    rawdata_labels = df.iloc[:, 4].values  # Column 5 -> index 4

    print(f"\nData lengths:")
    print(f"X data: {len(rawdata_x)} points")
    print(f"Y data: {len(rawdata_y)} points")
    print(f"Z data: {len(rawdata_z)} points")
    print(f"Labels: {len(rawdata_labels)} points")

    print(f"\nData ranges:")
    print(f"X: {np.min(rawdata_x)} to {np.max(rawdata_x)}")
    print(f"Y: {np.min(rawdata_y)} to {np.max(rawdata_y)}")
    print(f"Z: {np.min(rawdata_z)} to {np.max(rawdata_z)}")
    print(f"Unique labels: {np.unique(rawdata_labels)}")

    # Sliding window parameters
    window_size = 88  # nwind
    overlap = window_size // 2  # 50% overlap

    print(f"\nSliding window parameters:")
    print(f"Window size: {window_size}")
    print(f"Overlap: {overlap}")
    print(f"Step size: {window_size - overlap}")

    # Generate sliding window indices
    nx = len(rawdata_x)  # Assume all axes have same length
    step = window_size - overlap
    num_windows = (nx - overlap) // step

    print(f"Total possible windows: {num_windows}")

    # Create sliding windows
    print(f"\nCreating sliding windows...")

    sliding_windows_x = []
    sliding_windows_y = []
    sliding_windows_z = []
    sliding_windows_labels = []

    for i in range(num_windows):
        start_idx = i * step
        end_idx = start_idx + window_size

        if end_idx <= nx:
            # Extract window data
            window_x = rawdata_x[start_idx:end_idx]
            window_y = rawdata_y[start_idx:end_idx]
            window_z = rawdata_z[start_idx:end_idx]
            window_labels = rawdata_labels[start_idx:end_idx]

            sliding_windows_x.append(window_x)
            sliding_windows_y.append(window_y)
            sliding_windows_z.append(window_z)
            sliding_windows_labels.append(window_labels)

    num_data = len(sliding_windows_x)
    print(f"Created {num_data} sliding windows")

    # Initialize results storage
    results = []

    print(f"\nExtracting features from each window...")

    # Process each sliding window
    for i in range(num_data):
        if (i + 1) % 50 == 0:
            print(f"Processing window {i+1}/{num_data}...")

        # Get window data
        data_x = sliding_windows_x[i]
        data_y = sliding_windows_y[i]
        data_z = sliding_windows_z[i]
        data_labels = sliding_windows_labels[i]

        # Extract features for this window
        features = extract_features_from_window(data_x, data_y, data_z)

        # Get the mode (most frequent) label for this window
        window_label = stats.mode(data_labels, keepdims=False)[0]
        features['Action'] = window_label

        results.append(features)

    # Convert results to DataFrame
    feature_names = ['MeanX', 'MeanY', 'MeanZ', 'SkewX', 'SkewY', 'SkewZ',
                     'MaxX', 'MaxY', 'MaxZ', 'MinX', 'MinY', 'MinZ',
                     'RangeX', 'RangeY', 'RangeZ', 'StdX', 'StdY', 'StdZ',
                     'KurtosisX', 'KurtosisY', 'KurtosisZ',
                     'CorrXY', 'CorrXZ', 'CorrYZ',
                     'EnergyX', 'EnergyY', 'EnergyZ',
                     'EntropyX', 'EntropyY', 'EntropyZ',
                     'Action']

    all_result = pd.DataFrame(results, columns=feature_names)

    print(f"\n=== Feature Extraction Complete ===")
    print(f"Total windows processed: {len(all_result)}")
    print(f"Features per window: {len(feature_names)-1} + Action label")

    # Display sample results
    print(f"\nSample of extracted features:")
    print(all_result.head().round(4))

    print(f"\nAction label distribution:")
    print(all_result['Action'].value_counts())

    # Display some statistics
    print(f"\nFeature statistics (first few features):")
    stats_cols = ['MeanX', 'MeanY', 'MeanZ', 'StdX', 'StdY', 'StdZ']
    print(all_result[stats_cols].describe().round(4))

    # Save results to CSV (matching MATLAB output)
    output_file = 'finaldata.csv'
    all_result.to_csv(output_file, index=False)
    print(f"\nResults saved to: {output_file}")

    # Additional summary
    print(f"\n=== Summary ===")
    print(f"Original data points: {nx}")
    print(f"Window size: {window_size}")
    print(f"Overlap: {overlap}")
    print(f"Windows created: {num_data}")
    print(f"Features per window: 30 + 1 action label")
    print(f"Output file: {output_file}")

    print("\n=== Processing Complete ===")

=== ACC2_RawData.xlsx Sliding Window Feature Extraction ===

Raw data preview:
   0    1    2   3  4
0  1    7   93 -23  1
1  1 -110  127 -23  1
2  1  -42   98  32  1
3  1  -31   64  24  1
4  1  -13   25  11  1
Data shape: (13617, 5)

Data lengths:
X data: 13617 points
Y data: 13617 points
Z data: 13617 points
Labels: 13617 points

Data ranges:
X: -128 to 127
Y: -128 to 127
Z: -128 to 127
Unique labels: [1 2 3 4]

Sliding window parameters:
Window size: 88
Overlap: 44
Step size: 44
Total possible windows: 308

Creating sliding windows...
Created 308 sliding windows

Extracting features from each window...
Processing window 50/308...
Processing window 100/308...
Processing window 150/308...
Processing window 200/308...
Processing window 250/308...
Processing window 300/308...

=== Feature Extraction Complete ===
Total windows processed: 308
Features per window: 30 + Action label

Sample of extracted features:
     MeanX    MeanY    MeanZ   SkewX   SkewY   SkewZ  MaxX  MaxY  MaxZ  MinX  