In [None]:
import numpy as np
import pandas as pd
from scipy.fftpack import fft
from scipy.stats import entropy


preprocessed_data = pd.read_csv('preprocessed_data_kalman_outliers.csv')

# Window size 
window_size = 50

# Time domain features
def extract_time_domain_features(df, window_size):
    features = pd.DataFrame()
    features['mean_x'] = df['X_kalman'].rolling(window=window_size).mean()
    features['std_x'] = df['X_kalman'].rolling(window=window_size).std()
    features['min_x'] = df['X_kalman'].rolling(window=window_size).min()
    features['max_x'] = df['X_kalman'].rolling(window=window_size).max()
    features['sma'] = df[['X_kalman', 'Y_kalman', 'Z_kalman']].abs().sum(axis=1).rolling(window=window_size).sum()
    return features

# Frequency domain features
def extract_frequency_domain_features(df, window_size):
    features = pd.DataFrame()
    n = len(df)
    freq_domain = fft(df['X_kalman'].values)
    freq_amplitudes = np.abs(freq_domain[:n // 2])
    frequencies = np.fft.fftfreq(n, d=1.0)[:n // 2]
    
    features['dominant_frequency'] = frequencies[np.argmax(freq_amplitudes)]
    features['spectral_entropy'] = entropy(freq_amplitudes)
    features['energy'] = np.sum(freq_amplitudes ** 2)
    return features


time_domain_features = extract_time_domain_features(preprocessed_data, window_size)
frequency_domain_features = extract_frequency_domain_features(preprocessed_data, window_size)

all_features = pd.concat([preprocessed_data, time_domain_features, frequency_domain_features], axis=1)

all_features.to_csv('feature_engineered_data.csv', index=False)
print(all_features.head())
