# <h1>Feature extraction subject P03 </h1>

In [184]:
import numpy as np
import pandas as pd
pd.set_option('html',False)

from os import path

In [185]:
CHEST_X = path.relpath('data/P03_DATA_WINDOW/PO3_CHEST_X.csv')
CHEST_Y = path.relpath('data/P03_DATA_WINDOW/PO3_CHEST_Y.csv')
CHEST_Z = path.relpath('data/P03_DATA_WINDOW/PO3_CHEST_Z.csv')

THIGH_X = path.relpath('data/P03_DATA_WINDOW/PO3_THIGH_X.csv')
THIGH_Y = path.relpath('data/P03_DATA_WINDOW/PO3_THIGH_Y.csv')
THIGH_Z = path.relpath('data/P03_DATA_WINDOW/PO3_THIGH_Z.csv')

LABEL = path.relpath('data/P03_DATA_WINDOW/P03_LABEL_L.csv')

df_chest_x = pd.read_csv(CHEST_X, header=None, sep='\,')
df_chest_y = pd.read_csv(CHEST_Y, header=None, sep='\,')
df_chest_z = pd.read_csv(CHEST_Z, header=None, sep='\,')

df_thigh_x = pd.read_csv(THIGH_X, header=None, sep='\,')
df_thigh_y = pd.read_csv(THIGH_Y, header=None, sep='\,')
df_thigh_z = pd.read_csv(THIGH_Z, header=None, sep='\,')

df_label = pd.read_csv(LABEL, header=None, sep='\ ')


In [186]:
print len(df_chest_x), len(df_thigh_x)
print min(3,4)


36180 31115
3


<h3>Zero crossing rate</h3>

In [187]:
#Zero crossing
def zero_crossing_rate(l,size):
    return len(np.where(np.diff(np.sign(l)))[0])*1.0 / size

a = [1, 2, 1, 1, -3, -4, 7, 8, 9, 10, -2, 1, -3, 5, 6, 7, -10]
print zero_crossing_rate(a,17)

0.411764705882


<h3>Correlation </h3>

In [188]:
def correlation(data_frame_one, data_frame_two):
    return data_frame_one.corr(data_frame_two)

#df_chest_x_frame = df_chest_x.iloc[100]
#df_chest_y_frame = df_chest_y.iloc[100]
#print correlation(df_chest_y_frame, df_chest_x_frame)

def extractCorrelation(data_frame_one, data_frame_two, start, size):
    res = []
    
    for i in range(start, start + size):
        res.append(correlation(data_frame_one.iloc[i], data_frame_two.iloc[i]))
        
    return pd.DataFrame(np.array(res))

<h3>Energy</h3>

In [189]:
def sumOfSqr(axis):
    l = []
    for i in axis:
        l.append(float(i))
    mean = sum(l)/len(l)
    s = 0
    for i in l:
        s += (i-mean)**2
    return s**0.5
    
def extractEnergy(x ,y ,z ,n_sample ,start, size):
    res = []
    for i in range(start, start + size):
        e = sumOfSqr(x.iloc[i]) + sumOfSqr(y.iloc[i]) + sumOfSqr(z.iloc[i])
        e = e/ 3
        e = e / n_sample
        res.append(e)
        
    return pd.DataFrame(np.array(res))

<h3>Root mean squere</h3>

In [190]:
#df_chest_x_frame = df_chest_x.iloc[100]
#print np.sqrt(df_chest_x_frame.dot(df_chest_x_frame)*1.0/df_chest_x_frame.size)

<h2> Function to extract basic information for each window </h2>

In [191]:
'''
http://pandas.pydata.org/pandas-docs/stable/api.html#id5
Information about mean, median... 
'''

def extract(method, data_frame, start, size):
    res = []
    for i in range(start, start + size):
        
        if method == 'mean':
            res.append(data_frame.iloc[i].mean())
        elif method == 'median':
            res.append(data_frame.iloc[i].median())
        elif method == 'max':
            res.append(data_frame.iloc[i].max())
        elif method == 'min':
            res.append(data_frame.iloc[i].min())
        elif method == 'std':
            res.append(data_frame.iloc[i].std())
        elif method == 'zero-crossing':
            res.append(zero_crossing_rate(data_frame.iloc[i],size))
        elif method == 'rms':
            res.append(np.sqrt(data_frame.iloc[i].dot(data_frame.iloc[i])*1.0/size))
        else:
            res.append(data_frame.iloc[i])
        
    return pd.DataFrame(np.array(res))




        

In [192]:


def add_feature(feature_type,df_x,df_y,df_z,path,sensor):
    start=0
    size=min(len(df_chest_x), len(df_thigh_x))-1 
    
    

    FEATURES = path
    try:
        df_features = pd.read_csv(FEATURES, sep='\,')
        
        l = []
        # Get all the featues already computed
        for i in df_features.columns.values:
            n = i.split("_")
            n = n[0] + '_' + n[1]
            l.append(n)
        
        # Create the string format of the feature: feature and sensor
        t = feature_type + "_" + sensor

        
        if t not in l:
            print "Adding feature: ", t
            # This is for all axis
            if feature_type == "energy":
                df_sensor_feature = extractEnergy(df_x,df_y,df_z,len(df_x.iloc[0].values), start, size)
                df_sensor_feature.columns = [feature_type + '_' + sensor ]
                
                df_features= pd.concat([df_features, df_sensor_feature],axis=1)
                
            elif feature_type == "correlation":
                df_correlation_xy = extractCorrelation(df_x, df_y, start, size)
                df_correlation_xy.columns = [feature_type + '_' + sensor + "_xy"]
                
                df_correlation_xz = extractCorrelation(df_x, df_z, start, size)
                df_correlation_xz.columns = [feature_type + '_' + sensor + "_xz"]
                
                df_correlation_yz = extractCorrelation(df_y, df_z, start, size)
                df_correlation_yz.columns = [feature_type + '_' + sensor + "_yz"]
                
                df_features= pd.concat([df_features, df_correlation_xy,df_correlation_xz, df_correlation_yz],axis=1)
            # Only for one axis
            else:
                df_x_feature = extract(feature_type,df_x,start,size)
                df_x_feature.columns = [feature_type + '_' + sensor +  '_x' ]
                df_y_feature = extract(feature_type,df_y,start,size)
                df_y_feature.columns = [feature_type + '_' + sensor +  '_y']
                df_z_feature= extract(feature_type,df_z,start,size)
                df_z_feature.columns = [feature_type + '_' + sensor +  '_z' ]
                
                df_features= pd.concat([df_features, df_x_feature,df_y_feature, df_z_feature],axis=1)
            
        else:
            print t, " is already computed"
           
        
        
    except IOError:
        print 'file is empty'
        if feature_type == "energy":
            df_sensor_feature = extractEnergy(df_x,df_y,df_z,len(df_x.iloc[0].values), start, size)
            df_sensor_feature.columns = [feature_type + '_' + sensor ]

            df_features= df_sensor_feature
            
        elif feature_type == "correlation":
                df_correlation_xy = extractCorrelation(df_x, df_y, start, size)
                df_correlation_xy.columns = [feature_type + '_' + sensor + "_xy"]
                
                df_correlation_xz = extractCorrelation(df_x, df_z, start, size)
                df_correlation_xz.columns = [feature_type + '_' + sensor + "_xz"]
                
                df_correlation_yz = extractCorrelation(df_y, df_z, start, size)
                df_correlation_yz.columns = [feature_type + '_' + sensor + "_yz"]
                
                df_features= pd.concat([df_correlation_xy,df_correlation_xz, df_correlation_yz],axis=1)
        else:
            df_x_feature = extract(feature_type,df_x,start,size)
            df_x_feature.columns = [feature_type + '_' + sensor +  '_x' ]
            df_y_feature = extract(feature_type,df_y,start,size)
            df_y_feature.columns = [feature_type + '_' + sensor +  '_y']
            df_z_feature= extract(feature_type,df_z,start,size)
            df_z_feature.columns = [feature_type + '_' + sensor +  '_z' ]
            
            df_features= pd.concat([df_x_feature,df_y_feature, df_z_feature],axis=1)



    
    df_features.to_csv(FEATURES, index=False)
    

In [193]:
path = path.relpath('data/FEATURES_P03.csv')
features = ['mean', 'min', 'max', 'median','std', 'energy', 'zero-crossing', 'correlation', 'rms']


for f in features:
    add_feature(f, df_thigh_x, df_thigh_y, df_thigh_z, path, 'thigh')
    add_feature(f, df_chest_x, df_chest_y, df_chest_z, path, 'chest')

mean_thigh  is already computed
mean_chest  is already computed
min_thigh  is already computed
min_chest  is already computed
max_thigh  is already computed
max_chest  is already computed
median_thigh  is already computed
median_chest  is already computed
std_thigh  is already computed
std_chest  is already computed
energy_thigh  is already computed
energy_chest  is already computed
zero-crossing_thigh  is already computed
zero-crossing_chest  is already computed
correlation_thigh  is already computed
correlation_chest  is already computed
Adding feature:  rms_thigh
Adding feature:  rms_chest
