### Imports

In [23]:
import pandas as pd

### Configuration

In [24]:
EXPERIMENT = "tara"
MODE = "test"
ACTIVITIES = ["relaxing", "eating", "walking", "studying"]

### Dataset

In [25]:
dataset: pd.DataFrame = pd.read_csv(f"state/{EXPERIMENT}_{MODE}_interpolated.csv")
dataset

Unnamed: 0,accel_z,accel_y,accel_x,gyro_z,gyro_y,gyro_x,gravity_z,gravity_y,gravity_x,ori_qz,...,bar_relativeAltitude,bar_pressure,gps_altitude,gps_latitude,gps_longitude,time,walking,relaxing,eating,studying
0,-0.023840,0.018390,0.008858,-0.054503,-0.005479,0.011691,-7.563114,-5.831582,-2.216261,0.663099,...,0.015858,1018.030701,40.507936,52.047900,4.382954,2023-06-16 15:39:53.073135400,1,0,0,0
1,0.002951,0.022089,-0.041490,-0.070068,0.088273,-0.000520,-7.004715,-6.675822,-1.343320,0.653123,...,0.026424,1018.029251,40.643834,52.047898,4.382956,2023-06-16 15:39:54.073135400,1,0,0,0
2,0.083199,-0.117540,0.250466,0.099236,-0.050622,-0.041580,-7.546371,-6.011996,-1.740925,0.642579,...,0.022198,1018.027916,40.755786,52.047895,4.382956,2023-06-16 15:39:55.073135400,1,0,0,0
3,-0.024142,-0.041310,-0.124078,0.268303,0.292325,0.254841,-7.262949,-6.160909,-2.257083,0.731410,...,0.065531,1018.026657,40.815552,52.047892,4.382956,2023-06-16 15:39:56.073135400,1,0,0,0
4,-0.343633,0.325876,0.097991,-0.121707,-0.284418,-0.060837,-6.545389,-6.867339,-2.387059,0.749090,...,0.122605,1018.028030,40.865129,52.047893,4.382956,2023-06-16 15:39:57.073135400,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3540,-0.027468,-0.002052,-0.012776,0.062467,0.000044,-0.000010,-9.806233,0.083322,0.035022,-0.157359,...,5.814162,1017.307434,41.526821,52.045305,4.384527,2023-06-16 16:38:53.073135400,0,0,0,1
3541,-0.029051,0.008580,0.018632,0.031754,-0.000316,-0.000033,-9.806149,0.079065,0.040456,-0.117536,...,6.261243,1017.307434,41.526821,52.045305,4.384527,2023-06-16 16:38:54.073135400,0,0,0,1
3542,-0.028269,0.002606,0.002662,-0.000234,-0.000159,0.000208,-9.806189,0.082902,0.039185,-0.113216,...,6.271812,1017.306366,41.526821,52.045305,4.384527,2023-06-16 16:38:55.073135400,0,0,0,1
3543,-0.028832,0.002296,0.002630,0.023920,0.000002,-0.000178,-9.806191,0.083961,0.039061,-0.108622,...,6.271812,1017.298370,41.526821,52.045305,4.384527,2023-06-16 16:38:56.073135400,0,0,0,1


## Feature engineering part 1: rolling mean/median/std/max/min/slope

In [26]:
window_sizes = [2, 5, 8]

columns_to_featurize = [
    'accel_z', 'accel_y', 'accel_x',
    'gyro_z', 'gyro_y', 'gyro_x',
    'gravity_z', 'gravity_y', 'gravity_x',
    'ori_qz', 'ori_qy', 'ori_qx', 'ori_qw',
    'magne_z', 'magne_y', 'magne_x',
    'bar_relativeAltitude', 'bar_pressure',
    'gps_altitude', 'gps_latitude', 'gps_longitude'
]

# Exclude time and label columns
for column in dataset[columns_to_featurize]:
    for window_size in window_sizes:
        rolling_mean = dataset[column].rolling(window_size).mean()
        rolling_median = dataset[column].rolling(window_size).median()
        rolling_std = dataset[column].rolling(window_size).std()
        rolling_max = dataset[column].rolling(window_size).max()
        rolling_min = dataset[column].rolling(window_size).min()

        new_column_mean_name = f"{column}_rolling_mean_{window_size}"
        new_column_median_name = f"{column}_rolling_median_{window_size}"
        new_column_std_name = f"{column}_rolling_std_{window_size}"
        new_column_max_name = f"{column}_rolling_max_{window_size}"
        new_column_min_name = f"{column}_rolling_min_{window_size}"

        dataset[new_column_mean_name] = rolling_mean
        dataset[new_column_median_name] = rolling_median
        dataset[new_column_std_name] = rolling_std
        dataset[new_column_max_name] = rolling_max
        dataset[new_column_min_name] = rolling_min

dataset

Unnamed: 0,accel_z,accel_y,accel_x,gyro_z,gyro_y,gyro_x,gravity_z,gravity_y,gravity_x,ori_qz,...,gps_longitude_rolling_mean_5,gps_longitude_rolling_median_5,gps_longitude_rolling_std_5,gps_longitude_rolling_max_5,gps_longitude_rolling_min_5,gps_longitude_rolling_mean_8,gps_longitude_rolling_median_8,gps_longitude_rolling_std_8,gps_longitude_rolling_max_8,gps_longitude_rolling_min_8
0,-0.023840,0.018390,0.008858,-0.054503,-0.005479,0.011691,-7.563114,-5.831582,-2.216261,0.663099,...,,,,,,,,,,
1,0.002951,0.022089,-0.041490,-0.070068,0.088273,-0.000520,-7.004715,-6.675822,-1.343320,0.653123,...,,,,,,,,,,
2,0.083199,-0.117540,0.250466,0.099236,-0.050622,-0.041580,-7.546371,-6.011996,-1.740925,0.642579,...,,,,,,,,,,
3,-0.024142,-0.041310,-0.124078,0.268303,0.292325,0.254841,-7.262949,-6.160909,-2.257083,0.731410,...,,,,,,,,,,
4,-0.343633,0.325876,0.097991,-0.121707,-0.284418,-0.060837,-6.545389,-6.867339,-2.387059,0.749090,...,4.382955,4.382956,5.360602e-07,4.382956,4.382954,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3540,-0.027468,-0.002052,-0.012776,0.062467,0.000044,-0.000010,-9.806233,0.083322,0.035022,-0.157359,...,4.384527,4.384527,0.000000e+00,4.384527,4.384527,4.384527,4.384527,0.0,4.384527,4.384527
3541,-0.029051,0.008580,0.018632,0.031754,-0.000316,-0.000033,-9.806149,0.079065,0.040456,-0.117536,...,4.384527,4.384527,0.000000e+00,4.384527,4.384527,4.384527,4.384527,0.0,4.384527,4.384527
3542,-0.028269,0.002606,0.002662,-0.000234,-0.000159,0.000208,-9.806189,0.082902,0.039185,-0.113216,...,4.384527,4.384527,0.000000e+00,4.384527,4.384527,4.384527,4.384527,0.0,4.384527,4.384527
3543,-0.028832,0.002296,0.002630,0.023920,0.000002,-0.000178,-9.806191,0.083961,0.039061,-0.108622,...,4.384527,4.384527,0.000000e+00,4.384527,4.384527,4.384527,4.384527,0.0,4.384527,4.384527


## PCA

In [27]:
from book.Chapter3.DataTransformation import PrincipalComponentAnalysis

dataset = PrincipalComponentAnalysis().apply_pca(dataset, columns_to_featurize, len(columns_to_featurize))
dataset

Unnamed: 0,accel_z,accel_y,accel_x,gyro_z,gyro_y,gyro_x,gravity_z,gravity_y,gravity_x,ori_qz,...,pca_12,pca_13,pca_14,pca_15,pca_16,pca_17,pca_18,pca_19,pca_20,pca_21
0,-0.023840,0.018390,0.008858,-0.054503,-0.005479,0.011691,-7.563114,-5.831582,-2.216261,0.663099,...,-0.010103,0.027760,-0.018532,-0.017443,-0.046898,0.031689,0.063796,0.131823,0.002484,-0.014856
1,0.002951,0.022089,-0.041490,-0.070068,0.088273,-0.000520,-7.004715,-6.675822,-1.343320,0.653123,...,0.008352,0.032153,-0.061326,-0.021201,0.000380,0.043054,0.049280,0.129422,0.011905,-0.008786
2,0.083199,-0.117540,0.250466,0.099236,-0.050622,-0.041580,-7.546371,-6.011996,-1.740925,0.642579,...,-0.092605,0.096222,-0.015881,0.062878,-0.063570,-0.006964,0.138489,0.121110,-0.016193,-0.022343
3,-0.024142,-0.041310,-0.124078,0.268303,0.292325,0.254841,-7.262949,-6.160909,-2.257083,0.731410,...,-0.107571,-0.140638,-0.255344,-0.003400,-0.004964,-0.080230,0.054262,0.113783,-0.001250,-0.025104
4,-0.343633,0.325876,0.097991,-0.121707,-0.284418,-0.060837,-6.545389,-6.867339,-2.387059,0.749090,...,0.089057,0.037180,0.149664,-0.085108,-0.073065,0.064698,0.154808,0.130672,0.011380,0.002727
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3540,-0.027468,-0.002052,-0.012776,0.062467,0.000044,-0.000010,-9.806233,0.083322,0.035022,-0.157359,...,-0.005713,-0.019440,0.005114,0.004124,-0.009706,-0.031207,-0.002569,0.001509,0.001316,0.016968
3541,-0.029051,0.008580,0.018632,0.031754,-0.000316,-0.000033,-9.806149,0.079065,0.040456,-0.117536,...,-0.010504,-0.018953,0.013085,0.001474,-0.010792,-0.006504,0.001604,0.005187,0.001775,-0.024875
3542,-0.028269,0.002606,0.002662,-0.000234,-0.000159,0.000208,-9.806189,0.082902,0.039185,-0.113216,...,-0.006161,-0.012409,0.011700,-0.004962,-0.006347,0.003179,-0.003190,0.004852,0.000989,-0.023785
3543,-0.028832,0.002296,0.002630,0.023920,0.000002,-0.000178,-9.806191,0.083961,0.039061,-0.108622,...,-0.007025,-0.017162,0.010308,-0.002687,-0.009410,-0.007179,-0.000638,0.006287,0.000851,-0.018241


## Fourier

In [28]:
import numpy as np

window_sizes = [30]
num_fourier_features = 5

columns_to_featurize = [
    'accel_z', 'accel_y', 'accel_x',
    'gyro_z', 'gyro_y', 'gyro_x',
    'gravity_z', 'gravity_y', 'gravity_x',
    'ori_qz', 'ori_qy', 'ori_qx', 'ori_qw',
    'magne_z', 'magne_y', 'magne_x',
    'bar_relativeAltitude', 'bar_pressure',
    'gps_altitude', 'gps_latitude', 'gps_longitude'
]

# Exclude time and label columns
for column in dataset[columns_to_featurize]:
    for window_size in window_sizes:
        rolling_mean = dataset[column].rolling(window_size).mean().fillna(0)
        new_column_mean_name = f"{column}_rolling_mean_{window_size}"
        dataset[new_column_mean_name] = rolling_mean
        
        data = rolling_mean.values
        
        # Compute FFT
        fft_result = np.fft.fft(data)
        
        # Mean Power Spectrum
        power_spectrum = np.abs(fft_result) ** 2
        new_column_power_name = f"{column}_power_window_{window_size}"
        dataset[new_column_power_name] = power_spectrum.mean()
        
        # Mean Amplitude Spectrum
        amplitude_spectrum = np.abs(fft_result)
        new_column_amplitude_name = f"{column}_amplitude_window_{window_size}"
        dataset[new_column_amplitude_name] = amplitude_spectrum.mean()
        
        # Total Power or Energy
        total_power = np.sum(power_spectrum)
        new_column_energy_name = f"{column}_total_power_{window_size}"
        dataset[new_column_energy_name] = total_power
        
        # Mean Frequency
        frequencies = np.fft.fftfreq(len(data))
        mean_frequency = np.sum(power_spectrum * frequencies) / total_power
        new_column_mean_freq_name = f"{column}_mean_frequency_{window_size}"
        dataset[new_column_mean_freq_name] = mean_frequency
dataset

Unnamed: 0,accel_z,accel_y,accel_x,gyro_z,gyro_y,gyro_x,gravity_z,gravity_y,gravity_x,ori_qz,...,gps_latitude_rolling_mean_30,gps_latitude_power_window_30,gps_latitude_amplitude_window_30,gps_latitude_total_power_30,gps_latitude_mean_frequency_30,gps_longitude_rolling_mean_30,gps_longitude_power_window_30,gps_longitude_amplitude_window_30,gps_longitude_total_power_30,gps_longitude_mean_frequency_30
0,-0.023840,0.018390,0.008858,-0.054503,-0.005479,0.011691,-7.563114,-5.831582,-2.216261,0.663099,...,0.000000,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,0.000000,67590.018085,14.633503,2.396066e+08,-1.518313e-20
1,0.002951,0.022089,-0.041490,-0.070068,0.088273,-0.000520,-7.004715,-6.675822,-1.343320,0.653123,...,0.000000,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,0.000000,67590.018085,14.633503,2.396066e+08,-1.518313e-20
2,0.083199,-0.117540,0.250466,0.099236,-0.050622,-0.041580,-7.546371,-6.011996,-1.740925,0.642579,...,0.000000,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,0.000000,67590.018085,14.633503,2.396066e+08,-1.518313e-20
3,-0.024142,-0.041310,-0.124078,0.268303,0.292325,0.254841,-7.262949,-6.160909,-2.257083,0.731410,...,0.000000,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,0.000000,67590.018085,14.633503,2.396066e+08,-1.518313e-20
4,-0.343633,0.325876,0.097991,-0.121707,-0.284418,-0.060837,-6.545389,-6.867339,-2.387059,0.749090,...,0.000000,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,0.000000,67590.018085,14.633503,2.396066e+08,-1.518313e-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3540,-0.027468,-0.002052,-0.012776,0.062467,0.000044,-0.000010,-9.806233,0.083322,0.035022,-0.157359,...,52.045305,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,4.384527,67590.018085,14.633503,2.396066e+08,-1.518313e-20
3541,-0.029051,0.008580,0.018632,0.031754,-0.000316,-0.000033,-9.806149,0.079065,0.040456,-0.117536,...,52.045305,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,4.384527,67590.018085,14.633503,2.396066e+08,-1.518313e-20
3542,-0.028269,0.002606,0.002662,-0.000234,-0.000159,0.000208,-9.806189,0.082902,0.039185,-0.113216,...,52.045305,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,4.384527,67590.018085,14.633503,2.396066e+08,-1.518313e-20
3543,-0.028832,0.002296,0.002630,0.023920,0.000002,-0.000178,-9.806191,0.083961,0.039061,-0.108622,...,52.045305,9.523891e+06,173.719028,3.376219e+10,-1.379239e-20,4.384527,67590.018085,14.633503,2.396066e+08,-1.518313e-20


## Saving experiment state

In [29]:
from pathlib import Path

# saving the state for faster further experimentation
target_directory = "final_datasets"
Path(target_directory).mkdir(exist_ok=True, parents=True)
dataset.to_csv(f"{target_directory}/{EXPERIMENT}_{MODE}_features.csv", index=False)