In [None]:
import numpy as np
from scipy.stats import skew, kurtosis
from scipy.fft import fft
from typing import Tuple

class FeatureExtractor:
    def __init__(self):
        self.time_domain_features = [
            self.mean,
            self.std,
            self.energy,
            self.entropy,
            self.skewness,
            self.kurtosis
        ]
        self.frequency_domain_features = [
            self.fft_mean,
            self.fft_std
        ]

    def mean(self, data: np.ndarray) -> np.ndarray:
        return np.mean(data, axis=0)

    def std(self, data: np.ndarray) -> np.ndarray:
        return np.std(data, axis=0)

    def energy(self, data: np.ndarray) -> np.ndarray:
        return np.sum(np.square(data), axis=0)

    def entropy(self, data: np.ndarray) -> np.ndarray:
        prob = np.abs(data) / np.sum(np.abs(data), axis=0, keepdims=True)
        prob[prob == 0] = 1e-10  # To avoid log(0)
        return -np.sum(prob * np.log(prob), axis=0)

    def skewness(self, data: np.ndarray) -> np.ndarray:
        return skew(data, axis=0)

    def kurtosis(self, data: np.ndarray) -> np.ndarray:
        return kurtosis(data, axis=0)

    def fft_mean(self, data: np.ndarray) -> np.ndarray:
        freq_data = np.abs(fft(data, axis=0))
        return np.mean(freq_data, axis=0)

    def fft_std(self, data: np.ndarray) -> np.ndarray:
        freq_data = np.abs(fft(data, axis=0))
        return np.std(freq_data, axis=0)

    def extract_features(self, X: np.ndarray) -> np.ndarray:
        """
        Extract features from raw data.

        Args:
            X: Raw sensor data of shape (n_segments, n_timesteps, n_features).

        Returns:
            Feature array of shape (n_segments, n_features * n_extracted_features).
        """
        n_segments, n_timesteps, n_features = X.shape
        feature_list = []

        for segment in X:
            segment_features = []
            for feature_func in self.time_domain_features:
                segment_features.append(feature_func(segment))
            for feature_func in self.frequency_domain_features:
                segment_features.append(feature_func(segment))
            feature_list.append(np.concatenate(segment_features))

        return np.array(feature_list)


if __name__ == "__main__":
    # Load preprocessed data
    data_path = '/Users/divyam/Desktop/MSSE/CMPE255/Group Project/255GroupProject/processed_data'
    X_train = np.load(f'{data_path}/X_train.npy')
    X_test = np.load(f'{data_path}/X_test.npy')

    print(f"Loaded training data shape: {X_train.shape}")
    print(f"Loaded test data shape: {X_test.shape}")

    # Feature extraction
    extractor = FeatureExtractor()
    print("Extracting features for training data...")
    X_train_features = extractor.extract_features(X_train)
    print("Extracting features for test data...")
    X_test_features = extractor.extract_features(X_test)

    print(f"Feature data shape (train): {X_train_features.shape}")
    print(f"Feature data shape (test): {X_test_features.shape}")

    # Save extracted features
    np.save(f'{data_path}/X_train_features.npy', X_train_features)
    np.save(f'{data_path}/X_test_features.npy', X_test_features)
    print("Feature data saved successfully!")


Loaded training data shape: (6384, 125, 45)
Loaded test data shape: (2736, 125, 45)
Extracting features for training data...
Extracting features for test data...
Feature data shape (train): (6384, 360)
Feature data shape (test): (2736, 360)
Feature data saved successfully!
