# Access the Labeled Raw IMU Data and Extract Statistical Features on Windowed Segements

In [1]:
# Generate features
import matplotlib as plt
import pandas as pd
import os
import numpy as np
import math
import csv
from astropy.stats import median_absolute_deviation
from statsmodels import robust
from scipy.stats import variation, kurtosis, skew
from __future__ import division

In [2]:
home_url = os.getcwd()
url = os.path.abspath(os.path.join(home_url, '..', 'Extracted Data Files')) + "/"
file_name = ['P1_raw_labeled','P2_raw_labeled','P3_raw_labeled','P4_raw_labeled']
feature_file = ['P1_feature_labeled','P2_feature_labeled','P3_feature_labeled','P4_feature_labeled']

In [3]:
def calculate_magnitude_mean(dataframe):
    x_mean = dataframe['X'].mean()
    y_mean = dataframe['Y'].mean()
    z_mean = dataframe['Z'].mean()
    
    x_mean_sq = x_mean**2
    y_mean_sq = y_mean**2
    z_mean_sq = z_mean**2
    
    mag_mean = np.sqrt(x_mean_sq+y_mean_sq+z_mean_sq)
    return mag_mean

def cal_mean_mag(win_df):
    start_index = 0
    sum_value = 0
    for i in range(start_index, start_index+win_df.shape[0]):
        x = win_df['X'].iloc[start_index]**2
        y = win_df['Y'].iloc[start_index]**2
        z = win_df['Z'].iloc[start_index]**2
        sum_value = sum_value + np.sqrt(x+y+z)
        start_index = start_index + 1
    
    return sum_value/win_df.shape[0]

def cal_cov(a,b):
    return np.cov(a,b)[0][1]

def avg_power(sx):
    # Calculate spectrum via FFT and account for scaling 1/sqrt(N)
    # taking the real fft (rfft) only the positive frequencies are calculated
    fx = np.fft.fft(sx)/(math.sqrt(len(sx)))

    # Calculate RMS for time domains signal + spectrum
    parseval_sx = (np.sum(sx**2))
    parseval_fx = np.sum(np.abs(fx)**2)
    
    return float(parseval_sx),float(parseval_fx)

In [4]:
for index in range(4):
    df = pd.read_csv(url+file_name[index]+".csv")
    df.drop(['Unnamed: 0'], axis=1, inplace=True)
    
    len_df=len(df)
    win_size=20
    step_size=10
    
    instances=[]
    for i in range(0, len_df, step_size): 
        
        win_df=df[i:i+win_size]
        feature={}
        feature['x_mean']=win_df['X'].mean()
        feature['y_mean']=win_df['Y'].mean()
        feature['z_mean']=win_df['Z'].mean()

        feature['x_Var']=win_df['X'].var()
        feature['y_Var']=win_df['Y'].var()
        feature['z_Var']=win_df['Z'].var()

        feature['x_sd']=win_df['X'].std()
        feature['y_sd']=win_df['Y'].std()
        feature['z_sd']=win_df['Z'].std()

        feature['mean_mag'] = calculate_magnitude_mean(win_df)
        feature['mag_mean'] = cal_mean_mag(win_df.reset_index(drop=True))

        feature['cov_xy'] = cal_cov(win_df['X'],win_df['Y'])
        feature['cov_yz'] = cal_cov(win_df['Y'],win_df['Z'])
        feature['cov_zx'] = cal_cov(win_df['Z'],win_df['X'])

        feature['avg_power'],feature['power_spec']=(avg_power(win_df['X']))
        feature['Label'] = win_df['Label'].value_counts().idxmax()
        instances.append(feature)
     
    
    feature_df=pd.DataFrame(instances)
    feature_df['User'] = index
    
    if index == 0:
        feature_df.to_csv(url+feature_file[index]+'.csv', float_format='%.6f', index=False)
    elif index == 1:
        feature_df.to_csv(url+feature_file[index]+'.csv', float_format='%.6f', index=False)
    elif index == 2:
        feature_df.to_csv(url+feature_file[index]+'.csv', float_format='%.6f', index=False)
    elif index == 3:
        feature_df.to_csv(url+feature_file[index]+'.csv', float_format='%.6f', index=False)
        
