# Feature Extraction I

In [None]:
import os

import numpy as np
import pandas as pd
import scipy as sp
import scipy.signal
import scipy.stats

import activity_classifier_utils

Load the data

In [None]:
fs = 256
data = activity_classifier_utils.LoadWristPPGDataset()

### Features
Time Domain:
* mean
* std
* 5, 10, 15, 20, 25 percentile
* cross-correlation of all pairs of channels
* total energy

Frequency Domain:
* dominant frequency
* fraction of energy in each 1Hz bin from 0 to 6 Hz
* spectral entropy of each channel - i'll do

Low-pass filter at 12 Hz

In [None]:
def LowpassFilter(signal, fs):
    b, a = sp.signal.butter(3, 12, btype='lowpass', fs=fs)
    return sp.signal.filtfilt(b, a, signal)

Compute Features

In [None]:
def Featurize(accx, accy, accz, fs):
    """A partial featurization of the accelerometer signal.
    
    Args:
        accx: (np.array) x-channel of the accelerometer.
        accy: (np.array) y-channel of the accelerometer.
        accz: (np.array) z-channel of the accelerometer.
        fs: (number) the sampling rate of the accelerometer
        
    Returns:
        n-tuple of accelerometer features
    """
    
    accx = LowpassFilter(accx, fs)
    accy = LowpassFilter(accy, fs)
    accz = LowpassFilter(accz, fs)
    
    # The mean of the x-channel
    mn_x = None

    # The standard deviation of the x-channel
    std_x = None

    # The 5th percentile of the x-channel
    p5_x = None

    # The pearson correlation coefficient between the x and y channels
    corr_xy = None

    # The total AC energy of the x-axis
    energy_x = np.sum(np.square(accx - np.mean(accx)))
    
    # Take an FFT of the signal. If the signal is too short, 0-pad it so we have at least 2046 points in the FFT.
    fft_len = max(len(accx), 2046)
    
    # Create an array of frequency bins
    fft_freqs = np.fft.rfftfreq(fft_len, 1 / fs)
    
    # Take an FFT of the centered signal
    fft_x = np.fft.rfft(accx - np.mean(accx), fft_len)
    
    # The frequency with the most power between 0.25 and 12 Hz 
    dominant_frequency_x = None

    # The fraction of energy between 2 and 3 Hz in the x-channel
    spectral_energy_x = np.square(np.abs(fft_x))
    energy_23_x = np.sum(spectral_energy_x[(fft_freqs >= 2) & (fft_freqs <= 3)]) / np.sum(spectral_energy_x)
    
    return (mn_x,
            std_x,
            p5_x,
            corr_xy,
            energy_x,
            dominant_frequency_x,
            energy_23_x)

## Check The Code

Extract a 10 second window of the DataFrame

In [None]:
seg = data[0][2].iloc[:fs * 10]

In [None]:
accx = seg.accx.values
accy = seg.accy.values
accz = seg.accz.values

In [None]:
Featurize(accx, accy, accz, fs)