In [201]:
%matplotlib notebook
import pandas as pd
import numpy as np
import math
import statistics
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder, normalize
import matplotlib.pyplot as plt
import seaborn as sn
import matplotlib
from mlxtend.plotting import plot_confusion_matrix
import warnings
from scipy.signal import savgol_filter
from sklearn.decomposition import PCA
import scipy.fftpack as FFT
warnings.filterwarnings('ignore')
matplotlib.rcParams['figure.figsize'] = [12,5]

## help functions

In [212]:
def convert_to_float(x):
    try:
        return np.float(x)
    except:
        return np.nan

def convert_to_datetime(x):
    dt = datetime.fromtimestamp(x // 1000000000)
    s = dt.strftime('%Y-%m-%d %H:%M:%S')
    s += '.' + str(int(x % 1000000000)).zfill(9)
    return s

def heatmap(cm, labels):
    df_cm = pd.DataFrame(cm, index=labels, columns=labels)
    plt.figure(figsize = (10,7))
    sn.heatmap(df_cm, annot=True)

    plt.show()
    
def preprocess(data, activity):
    #convert nanoseconds to date 
    data['time'] = data['time'].apply(convert_to_datetime) 
    data['time'] = pd.to_datetime(data['time'])

#convert to same data type
    for i in range(1, 4): 
        data[column_names_org[i]] = data[column_names_org[i]].apply(convert_to_float)
    
#drop rows with NaN values 
    data.dropna(axis=0, how='any', inplace=True) #TODO: invullen met mean, mod of median / interpolatie

#drop duplicates 
    data.drop_duplicates(subset=None, keep='first', inplace = True)
    
    #drop first and last 3 sec
    indexFirst = data[ (data['time'].iloc[0]+ pd.to_timedelta(3, unit='s')) > data['time'] ].index
    data.drop(indexFirst , inplace=True)
    indexLast = data[ (data['time'].iloc[-1]- pd.to_timedelta(3, unit='s')) < data['time'] ].index
    data.drop(indexLast , inplace=True)
    
    #add activity label
    data['activity'] = activity
    
    return data

#feature extraction
def get_mean_window(df):
    return df.mean()

def get_min_window(df):
    return df.min()

def get_max_window(df):
    return df.max()

def get_std_window(df):
    return df.std()

def get_med_window(df):
    return df.median()

#TODO: scipy integrate
#TODO: voor elke as apart??
#measure of activity level (m/s²)
def get_signal_magnitude_area(df):
        sum = 0
        for i in range(0, len(df)):
            sum += (abs(df['x'].iloc[i]) + abs(df['y'].iloc[i]) + abs(df['z'].iloc[i]))
        return sum /len(df)
    #result = integrate.quad(lambda t: df['x'].apply(lambda n : abs(n)) + df['y'].apply(lambda n : abs(n)) + df['z'].apply(lambda n : abs(n)), 0, len(df))

#TODO: voor elke as apart??
#degree of movement intensity (m/s²)
def get_signal_magnitude_vector(df):
    sum = 0
    for i in range(0, len(df)):
        sum += math.sqrt(df['x'].iloc[i] * df['x'].iloc[i] + df['y'].iloc[i] * df['y'].iloc[i] + df['z'].iloc[i] * df['z'].iloc[i])
    return sum

#average angle (radian) between accelerometer vector and x as (parallel with arm)
def get_tilt_angle(df):
    df_cos = pd.DataFrame(columns=["tilt_ang"])
    df_dot = df['x']
    for i in range(0, len(df)):
        s = pd.Series({"tilt_ang" : (df_dot.iloc[i])/(math.sqrt(df['x'].iloc[i]*df['x'].iloc[i] + df['y'].iloc[i]*df['y'].iloc[i] + df['z'].iloc[i]*df['z'].iloc[i]))})
        df_cos=df_cos.append(s, ignore_index=True)
    df_angle = np.arccos(df_cos)
    return df_angle.mean()['tilt_ang']

def get_power_spectral_density(df):
    df_psd = np.abs(df)**2
    return df_psd.sum()

#TODO: datatype is object en niet compex nr
def get_entropy(df):
    entropy = []
    pdf = df / df.sum()
    for i in range (1, len(pdf.columns)):
        entropy.append(np.complex(-np.nansum(pdf.iloc[:,i] * np.log2(pdf.iloc[:,i]))))
    return entropy

#generate windows with 50% overlap
def windows(df, time, overlap):
    start = df.iloc[0]
    while  (start + pd.to_timedelta(time, unit='s')) < df.iloc[-1]:
        yield start, (start + pd.to_timedelta(time, unit='s'))
        if overlap:
            start += pd.to_timedelta(time/2, unit='s')
        else:
            start += pd.to_timedelta(time, unit='s')
        
def feature_extraction_segmentation(data, window, overlap):
    column_names = ["x_mean", "y_mean", "z_mean", "x_min", "y_min", "z_min", "x_max", "y_max", "z_max",
                "x_std", "y_std", "z_std", "x_med", "y_med", "z_med", "activity"]
    df = pd.DataFrame(columns = column_names)

    for (start, end) in windows(data['time'], window, overlap):
        vw1 = data['time'] >= start
        vw2 = data['time'] < end
        mean = get_mean_window(data[vw1 & vw2])
        min = get_min_window(data[vw1 & vw2])
        max = get_max_window(data[vw1 & vw2])
        std = get_std_window(data[vw1 & vw2])
        med = get_med_window(data[vw1 & vw2])
        sma = get_signal_magnitude_area(data[vw1 & vw2])
        smv = get_signal_magnitude_vector(data[vw1 & vw2])
        tilt = get_tilt_angle(data[vw1 & vw2])
        #fourrier transform
        t_x = data[vw1 & vw2][['time','x']].set_index('time')
        t_y = data[vw1 & vw2][['time','y']].set_index('time')
        t_z = data[vw1 & vw2][['time', 'z']].set_index('time')

        df_f = pd.DataFrame(columns=['f', 'x_f', 'y_f', 'z_f'])

    #TODO: determine sampling rate (datapoints per second)
        sampling_rate = 18

        df_f['x_f'] = FFT.fft(t_x).ravel()
        df_f['y_f'] = FFT.fft(t_y).ravel()
        df_f['z_f'] = FFT.fft(t_z).ravel()
        df_f['f'] = FFT.fftfreq(len(df_f['x_f'])) * sampling_rate

        psd = get_power_spectral_density(df_f)
        entropy = get_entropy(df_f)

        df = df.append(pd.Series({'x_mean': mean['x'], 'y_mean': mean['y'], 'z_mean': mean['z'], "x_min" : min['x'],
                              "y_min" : min['y'], "z_min" : min['z'], "x_max" : max['x'], "y_max" : max['y'], "z_max" : max['z'],
                              "x_std" : std['x'], "y_std" : std['y'], "z_std" : std['z'], "x_med" : med['x'], "y_med" : med['y'], "z_med" : med['z'],
                              "sma" : sma, "smv" : smv, "tilt" : tilt, "x_psd" : psd['x_f'], "y_psd" : psd['y_f'], "z_psd" : psd['z_f']
                              , "x_entropy" : entropy[0], "y_entropy" : entropy[1], "z_entropy" : entropy[2], "activity" : data['activity'].iloc[0]}), ignore_index=True)
    return df


# preprocessing

## jump_mixed: slow and fast jumping

In [213]:
column_names_org = ['time', 'x', 'y', 'z']

jump_mixed = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\normal2.csv", sep=';', header=0, skipinitialspace=True)

jump_mixed.shape

(2730, 4)

In [214]:
jump_mixed = preprocess(jump_mixed, "normal")

jump_mixed.tail()

Unnamed: 0,time,x,y,z,activity
2514,1970-01-01 01:21:37.145528320,2.012364,9.985249,-16.204195,normal
2515,1970-01-01 01:21:37.164926976,1.706083,-5.216353,-16.08934,normal
2516,1970-01-01 01:21:37.184325632,-2.914458,-4.486542,-14.821143,normal
2517,1970-01-01 01:21:37.203462144,-5.053641,-9.008977,-9.745967,normal
2518,1970-01-01 01:21:37.224171520,-8.075776,-15.165709,-5.962914,normal


In [215]:
jump_mixed.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E91AC98708>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E91E3EABC8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E91E3C78C8>],
      dtype=object)

#### number of turns

In [216]:
#smoothing signal (eig niet nodig)
period = pd.DataFrame(jump_mixed)
period['x'] = savgol_filter(period[ 'x'].to_numpy(), 31, 3)
period['y'] = savgol_filter(period[ 'y'].to_numpy(), 31, 3)
period['z'] = savgol_filter(period[ 'z'].to_numpy(), 31, 3)
period.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E922CEFE48>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E922D3D648>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E922D6EEC8>],
      dtype=object)

In [217]:
def get_number_of_turns(df, col):
    duration = df['time'].iloc[-1] - df['time'].iloc[0] 
    local_min = pd.DataFrame()
    local_min = df[(df[col].shift(1) > df[col]) & (df[col].shift(-1) > df[col])][['time', col]].reset_index()
    local_min['deltaT'] = local_min.time - local_min.time.shift(1)
    return duration/(local_min['deltaT'].mean())

turns_x = get_number_of_turns(jump_mixed[['time','x']], "x")
turns_y = get_number_of_turns(jump_mixed[['time','y']], "y")
turns_z = get_number_of_turns(jump_mixed[['time','z']], "z")

(turns_x + turns_y + turns_z)/3

133.01574784235063

## jump_fast

In [218]:
jump_fast = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\jump_fast.csv", sep=';', header=0, skipinitialspace=True)
jump_fast2 = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\jump_fast (meting2).csv", sep=';', header=0, skipinitialspace=True)

jump_fast = preprocess(jump_fast, "jump_fast")
jump_fast2 = preprocess(jump_fast2, "jump_fast")

jump_fast2

Unnamed: 0,time,x,y,z,activity
207,1970-01-01 04:38:03.000963072,-9.779466,-14.309079,-16.500904,jump_fast
208,1970-01-01 04:38:03.032420352,0.303888,-16.752151,-21.851257,jump_fast
209,1970-01-01 04:38:03.043954688,0.303888,-16.752151,-21.851257,jump_fast
210,1970-01-01 04:38:03.053391872,0.332602,-10.947164,-21.942184,jump_fast
211,1970-01-01 04:38:03.071217664,-0.923630,-5.259424,-17.481962,jump_fast
...,...,...,...,...,...
2198,1970-01-01 04:38:41.416593408,0.835095,-9.114262,-10.918449,jump_fast
2199,1970-01-01 04:38:41.436516352,-0.040678,5.474778,-4.292724,jump_fast
2200,1970-01-01 04:38:41.457487872,3.043670,-4.094120,-4.974678,jump_fast
2201,1970-01-01 04:38:41.475313664,1.694118,-4.326223,-6.343373,jump_fast


In [219]:
jump_fast.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E922E51A48>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E922E64DC8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E922EA3688>],
      dtype=object)

In [220]:
jump_fast2.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E925B51088>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E925B86908>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E925BC0548>],
      dtype=object)

In [221]:
#segmentation + feature extraction
jump_fast = feature_extraction_segmentation(jump_fast,10, True)
jump_fast2 = feature_extraction_segmentation(jump_fast2,10, True)
jump_fast2

Unnamed: 0,x_mean,y_mean,z_mean,x_min,y_min,z_min,x_max,y_max,z_max,x_std,...,activity,sma,smv,tilt,x_entropy,x_psd,y_entropy,y_psd,z_entropy,z_psd
0,-4.744691,-9.403931,-2.639542,-32.013573,-36.191441,-27.768707,16.943577,10.650454,19.118652,9.427057,...,jump_fast,25.96551,8825.162806,1.805726,6.976298-1.490642j,57718.129532,8.475273-0.155703j,77212.911355,6.795743-4.644998j,45733.579711
1,-5.161254,-9.53829,-1.73888,-32.38446,-29.343184,-19.235901,13.773088,10.650454,21.824936,9.223164,...,jump_fast,25.569721,8622.012711,1.838948,7.161561-1.215104j,57778.2719,8.619021-0.106609j,70539.160451,6.566882-7.708892j,40302.926441
2,-5.429781,-9.660199,-1.580376,-32.38446,-29.17808,-20.181067,10.535598,13.96212,21.824936,9.539299,...,jump_fast,25.980857,8716.163456,1.837307,7.162744-1.142727j,62318.025752,8.670342-0.111750j,69200.02711,6.447791-8.741579j,41248.83859
3,-5.095638,-9.90683,-1.794013,-29.608788,-34.913673,-20.446669,11.722438,13.96212,17.599211,9.725307,...,jump_fast,26.144589,8800.194663,1.796696,6.952547-1.352507j,62469.37975,8.650846-0.135871j,75903.095133,6.547467-7.216537j,39705.989246
4,-5.095262,-10.12637,-1.405372,-36.258442,-34.913673,-23.150558,11.722438,13.576876,18.857836,9.907339,...,jump_fast,26.218315,8842.828749,1.798779,6.897072-1.398831j,64194.484393,8.691095-0.099853j,75185.898838,6.076857-9.675071j,39261.212621
5,-5.339242,-10.214119,-1.028641,-36.258442,-29.970104,-23.150558,14.093725,6.431907,18.857836,10.005281,...,jump_fast,26.346908,8868.743554,1.824236,6.981252-1.328452j,66521.508996,8.717283-0.066552j,73338.65134,5.722787-13.956606j,38519.5289


In [222]:
jump_fast = pd.concat([jump_fast, jump_fast2], ignore_index=True)
jump_fast

Unnamed: 0,x_mean,y_mean,z_mean,x_min,y_min,z_min,x_max,y_max,z_max,x_std,...,activity,sma,smv,tilt,x_entropy,x_psd,y_entropy,y_psd,z_entropy,z_psd
0,-7.583159,-8.936538,-3.239117,-34.935211,-34.614571,-27.349962,8.568698,13.466805,20.130817,8.14481,...,jump_fast,28.340647,9663.305053,1.985967,8.129320-0.313836j,64207.783263,8.649463-0.104227j,60427.133638,6.091882-4.981816j,76029.470886
1,-8.196665,-8.206268,-2.434212,-39.397823,-33.413376,-27.349962,9.669396,19.688145,21.080767,8.056542,...,jump_fast,28.576395,9686.469888,2.011383,8.240772-0.213433j,68359.361844,8.566078-0.233188j,55280.738295,5.557564-7.562142j,75308.758138
2,-8.169064,-7.995196,-2.028957,-39.397823,-33.413376,-28.922047,9.669396,19.688145,21.080767,8.146689,...,jump_fast,28.15828,9531.324195,2.008742,8.211869-0.212921j,68880.549589,8.527814-0.284395j,54461.103041,5.325624-9.152224j,70276.635797
3,-8.246004,-8.140373,-2.292107,-34.262829,-22.076181,-29.675787,10.4351,16.457834,20.298315,8.213103,...,jump_fast,28.494259,9684.6715,2.01067,8.212756-0.237921j,70096.49801,8.519909-0.293052j,56013.024228,5.587894-7.808038j,71294.505128
4,-8.280305,-8.198424,-1.687567,-34.262829,-23.212772,-29.675787,10.983056,24.086153,23.105095,8.262989,...,jump_fast,28.250489,9646.31169,2.011202,8.243778-0.236742j,70951.912004,8.514085-0.312486j,57924.308969,5.204730-10.862033j,66171.493162
5,-8.309037,-7.785291,-1.006954,-34.858639,-24.878178,-30.606594,10.983056,24.086153,24.16033,8.374299,...,jump_fast,28.473746,9674.101919,2.007731,8.240005-0.223238j,72158.57688,8.483120-0.434166j,57964.743141,4.371055-20.395216j,69372.292039
6,-4.744691,-9.403931,-2.639542,-32.013573,-36.191441,-27.768707,16.943577,10.650454,19.118652,9.427057,...,jump_fast,25.96551,8825.162806,1.805726,6.976298-1.490642j,57718.129532,8.475273-0.155703j,77212.911355,6.795743-4.644998j,45733.579711
7,-5.161254,-9.53829,-1.73888,-32.38446,-29.343184,-19.235901,13.773088,10.650454,21.824936,9.223164,...,jump_fast,25.569721,8622.012711,1.838948,7.161561-1.215104j,57778.2719,8.619021-0.106609j,70539.160451,6.566882-7.708892j,40302.926441
8,-5.429781,-9.660199,-1.580376,-32.38446,-29.17808,-20.181067,10.535598,13.96212,21.824936,9.539299,...,jump_fast,25.980857,8716.163456,1.837307,7.162744-1.142727j,62318.025752,8.670342-0.111750j,69200.02711,6.447791-8.741579j,41248.83859
9,-5.095638,-9.90683,-1.794013,-29.608788,-34.913673,-20.446669,11.722438,13.96212,17.599211,9.725307,...,jump_fast,26.144589,8800.194663,1.796696,6.952547-1.352507j,62469.37975,8.650846-0.135871j,75903.095133,6.547467-7.216537j,39705.989246


## jump_slow

In [223]:
jump_slow = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\jump_slow.csv", sep=';', header=0, skipinitialspace=True)
jump_slow2 = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\jump_slow1 (meting2).csv", sep=';', header=0, skipinitialspace=True)

jump_slow = preprocess(jump_slow, "jump_slow")
jump_slow2 = preprocess(jump_slow2, "jump_slow")

jump_slow2

Unnamed: 0,time,x,y,z,activity
219,1970-01-01 04:35:49.926182912,-5.946164,-9.353543,-11.636296,jump_slow
220,1970-01-01 04:35:49.942960128,0.550349,-10.636097,-6.003592,jump_slow
221,1970-01-01 04:35:49.960785920,2.251646,-7.819745,-4.366901,jump_slow
222,1970-01-01 04:35:49.979660288,-2.684747,-5.158926,-8.037491,jump_slow
223,1970-01-01 04:35:50.001680384,-3.192025,-4.608576,-11.894721,jump_slow
...,...,...,...,...,...
1380,1970-01-01 04:36:12.319571968,1.411765,-0.784846,2.653640,jump_slow
1381,1970-01-01 04:36:12.338446336,-0.392423,1.339981,0.430708,jump_slow
1382,1970-01-01 04:36:12.357320704,-2.203790,1.002593,0.145962,jump_slow
1383,1970-01-01 04:36:12.377243648,-3.278167,-0.617348,1.067199,jump_slow


In [224]:
jump_slow_right = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\jump_slow_right2.csv", sep=';', header=0, skipinitialspace=True)

jump_slow_right = preprocess(jump_slow_right, "jump_slow")

jump_slow_right

Unnamed: 0,time,x,y,z,activity
156,1970-01-01 06:03:48.898168832,-4.953143,-14.878572,1.787439,jump_slow
157,1970-01-01 06:03:48.914946048,-4.240082,-13.222738,3.012564,jump_slow
158,1970-01-01 06:03:48.933820416,-8.599805,-9.298509,2.076970,jump_slow
159,1970-01-01 06:03:48.956889088,-8.432307,-6.472585,1.373480,jump_slow
160,1970-01-01 06:03:48.973666304,-3.282953,-5.137390,1.887937,jump_slow
...,...,...,...,...,...
2574,1970-01-01 06:04:35.526246400,12.643675,-17.874386,2.098505,jump_slow
2575,1970-01-01 06:04:35.557703680,15.239887,-17.785851,2.883351,jump_slow
2576,1970-01-01 06:04:35.568189440,15.187245,-21.001804,1.741975,jump_slow
2577,1970-01-01 06:04:35.584966656,14.062618,-20.618952,0.533599,jump_slow


In [225]:
jump_slow.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E9273858C8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E927395288>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E91A8E1BC8>],
      dtype=object)

In [226]:
jump_slow_right.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E927376288>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E91AA0F0C8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E927541688>],
      dtype=object)

In [227]:
jump_slow2.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E92762C548>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E927674E88>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E918D943C8>],
      dtype=object)

### differences between hands

In [228]:
jump_slow = jump_slow.reset_index(drop=True)
jump_slow2 = jump_slow2.reset_index(drop=True)
diff_left = jump_slow.loc[:,'x':'z'] - jump_slow2.loc[:,'x':'z']
diff_left['time'] =  jump_slow['time']
diff_left

Unnamed: 0,x,y,z,time
0,11.758330,2.084148,6.635297,1970-01-01 01:22:43.384205312
1,-1.603191,5.201997,3.110669,1970-01-01 01:22:43.403997184
2,-7.085148,-0.375672,1.024128,1970-01-01 01:22:43.422347264
3,-5.491528,-4.807180,2.000399,1970-01-01 01:22:43.442663424
4,-4.113262,-3.314060,2.636890,1970-01-01 01:22:43.461931008
...,...,...,...,...
1768,,,,1970-01-01 01:23:17.489008640
1769,,,,1970-01-01 01:23:17.509193728
1770,,,,1970-01-01 01:23:17.528461312
1771,,,,1970-01-01 01:23:17.548122112


### segmentation

In [229]:
#segmentation + feature extraction
jump_slow =  feature_extraction_segmentation(jump_slow,10, True)
jump_slow2 =  feature_extraction_segmentation(jump_slow2,10, True)

jump_slow2

Unnamed: 0,x_mean,y_mean,z_mean,x_min,y_min,z_min,x_max,y_max,z_max,x_std,...,activity,sma,smv,tilt,x_entropy,x_psd,y_entropy,y_psd,z_entropy,z_psd
0,-1.824405,-9.533217,-3.602798,-16.036697,-27.680172,-21.025732,9.276974,3.086741,6.668797,4.710011,...,jump_slow,18.735621,6455.45509,1.75748,6.927388-2.519347j,13218.883864,8.591395-0.024817j,71247.711933,7.475235-1.033194j,21842.564175
1,-1.77528,-9.38714,-3.656141,-16.036697,-27.620352,-15.314065,9.276974,3.086741,6.668797,4.65525,...,jump_slow,18.536185,6373.607493,1.755715,6.873089-2.660032j,12836.626288,8.577956-0.027926j,69371.199271,7.523464-0.940980j,21074.26285
2,-1.444872,-9.102247,-3.89655,-16.204195,-27.620352,-17.553747,10.059426,2.708675,5.233103,4.566257,...,jump_slow,18.086007,6251.563921,1.736782,6.615563-3.475978j,11861.219217,8.548570-0.026424j,67521.456391,7.623902-0.767687j,22009.136444


In [230]:
jump_slow = pd.concat([jump_slow, jump_slow2], ignore_index=True)
jump_slow

Unnamed: 0,x_mean,y_mean,z_mean,x_min,y_min,z_min,x_max,y_max,z_max,x_std,...,activity,sma,smv,tilt,x_entropy,x_psd,y_entropy,y_psd,z_entropy,z_psd
0,-5.481726,-8.859617,-4.460138,-25.50749,-23.327627,-20.379671,6.252446,3.897908,12.57189,6.594594,...,jump_slow,22.073203,7278.048807,1.94817,7.986258-0.363403j,38122.729864,8.574833-0.067170j,60289.15891,7.575196-1.082282j,35914.763428
1,-5.246818,-8.616536,-4.756654,-23.976084,-23.99044,-20.152351,5.816952,3.471986,9.698111,6.474394,...,jump_slow,21.577906,7110.217192,1.934285,7.934561-0.378116j,35931.563222,8.551447-0.074236j,58457.163989,7.632201-0.857656j,36102.697376
2,-4.962876,-8.616176,-5.142804,-24.428326,-25.562525,-20.06621,8.379665,4.733003,9.657433,6.412345,...,jump_slow,21.371412,7097.985079,1.907326,7.862530-0.425318j,34016.504689,8.577708-0.068448j,57634.635965,7.818568-0.642414j,35710.669209
3,-5.084023,-8.429078,-5.237145,-24.428326,-25.562525,-19.283758,9.126225,4.733003,9.657433,6.499475,...,jump_slow,21.42459,7141.333982,1.914784,7.875967-0.432694j,35228.614305,8.575680-0.062869j,55974.492685,7.853880-0.635995j,35909.547769
4,-4.821293,-7.710523,-5.441408,-24.7705,-23.468805,-20.764915,9.126225,3.445665,7.984849,6.393653,...,jump_slow,20.570133,6903.279449,1.918249,7.851224-0.501422j,33239.305649,8.504974-0.066321j,50018.198748,7.871661-0.543276j,37528.72241
5,-1.824405,-9.533217,-3.602798,-16.036697,-27.680172,-21.025732,9.276974,3.086741,6.668797,4.710011,...,jump_slow,18.735621,6455.45509,1.75748,6.927388-2.519347j,13218.883864,8.591395-0.024817j,71247.711933,7.475235-1.033194j,21842.564175
6,-1.77528,-9.38714,-3.656141,-16.036697,-27.620352,-15.314065,9.276974,3.086741,6.668797,4.65525,...,jump_slow,18.536185,6373.607493,1.755715,6.873089-2.660032j,12836.626288,8.577956-0.027926j,69371.199271,7.523464-0.940980j,21074.26285
7,-1.444872,-9.102247,-3.89655,-16.204195,-27.620352,-17.553747,10.059426,2.708675,5.233103,4.566257,...,jump_slow,18.086007,6251.563921,1.736782,6.615563-3.475978j,11861.219217,8.548570-0.026424j,67521.456391,7.623902-0.767687j,22009.136444


## side swing

In [231]:
side_swing = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\side_swing.csv", sep=';', header=0, skipinitialspace=True)
side_swing2 = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\side_swing2 (meting2).csv", sep=';', header=0, skipinitialspace=True)
side_swing3 = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\side_swing3 (meting2).csv", sep=';', header=0, skipinitialspace=True)

side_swing = preprocess(side_swing, "side_swing")
side_swing2 = preprocess(side_swing2, "side_swing")
side_swing3 = preprocess(side_swing3, "side_swing")

side_swing2

Unnamed: 0,time,x,y,z,activity
193,1970-01-01 04:39:14.579906560,0.765703,-8.961121,-14.287544,side_swing
194,1970-01-01 04:39:14.593538048,1.122234,-5.434100,-15.062819,side_swing
195,1970-01-01 04:39:14.610315264,3.814159,-6.367301,-14.864214,side_swing
196,1970-01-01 04:39:14.637578240,2.388037,-4.969893,-14.768501,side_swing
197,1970-01-01 04:39:14.652258304,4.084548,-7.415358,-16.027126,side_swing
...,...,...,...,...,...
1913,1970-01-01 04:39:47.784114176,1.907080,-1.591227,6.159125,side_swing
1914,1970-01-01 04:39:47.785162752,1.907080,-1.591227,6.159125,side_swing
1915,1970-01-01 04:39:47.812425728,2.086541,-1.450050,5.422136,side_swing
1916,1970-01-01 04:39:47.823960064,2.024328,-1.215554,4.934000,side_swing


In [232]:
side_swing.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E929CE46C8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E929D2BD08>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E9273F3708>],
      dtype=object)

In [233]:
#segmentation + feature extraction
side_swing =  feature_extraction_segmentation(side_swing,10, True)
side_swing2 =  feature_extraction_segmentation(side_swing2,10, True)
side_swing3 =  feature_extraction_segmentation(side_swing3,10, True)

side_swing2

Unnamed: 0,x_mean,y_mean,z_mean,x_min,y_min,z_min,x_max,y_max,z_max,x_std,...,activity,sma,smv,tilt,x_entropy,x_psd,y_entropy,y_psd,z_entropy,z_psd
0,-0.151709,-6.735745,-2.689505,-7.707282,-20.054247,-19.812571,5.441278,3.445665,11.026127,2.470456,...,side_swing,15.407214,5574.114436,1.549403,4.525310-27.209617j,3167.251403,8.666862-0.029336j,34378.244416,6.318350-3.304279j,35625.158775
1,0.408326,-6.751972,-2.342814,-5.000999,-20.054247,-19.812571,7.118648,3.445665,10.956735,2.465428,...,side_swing,15.369452,5536.824829,1.483986,5.331905+8.867549j,3222.621232,8.711268-0.024255j,33179.816714,6.027978-4.059036j,34434.954131
2,0.888883,-6.827416,-1.94797,-4.484149,-19.032511,-17.283358,7.910672,0.674776,13.655839,2.638613,...,side_swing,15.672656,5557.878739,1.443965,6.432724+3.189724j,4008.776453,8.762667-0.003370j,32398.248217,5.790473-5.522562j,34136.322436
3,1.04449,-6.387672,-1.738607,-3.981657,-16.835899,-19.101904,7.910672,2.29711,14.155939,2.676115,...,side_swing,15.670249,5536.158342,1.439645,6.874849+2.604656j,4267.661262,8.718190-0.012570j,29405.171217,5.558489-6.860756j,37774.929849
4,1.260824,-6.066055,-1.517077,-3.981657,-16.835899,-19.101904,7.649854,2.29711,14.155939,2.417659,...,side_swing,15.3555,5408.641743,1.423693,7.433112+1.674245j,3845.356011,8.773783-0.011897j,24986.818539,5.509884-8.287802j,37414.376029


In [234]:
side_swing  = pd.concat([side_swing, side_swing2, side_swing3], ignore_index=True)
side_swing

Unnamed: 0,x_mean,y_mean,z_mean,x_min,y_min,z_min,x_max,y_max,z_max,x_std,...,activity,sma,smv,tilt,x_entropy,x_psd,y_entropy,y_psd,z_entropy,z_psd
0,-0.297017,-7.481204,-2.936394,-7.003792,-21.140589,-15.529419,9.937392,0.744168,9.710074,3.233253,...,side_swing,16.149872,5931.453638,1.538744,5.751464-17.842151j,5661.218557,8.784970-0.001541j,40965.941825,7.041395-2.386029j,27314.741982
1,0.280569,-6.708557,-2.569527,-5.958128,-18.163918,-15.529419,9.937392,1.062413,9.126225,2.96806,...,side_swing,15.883275,5614.337201,1.496308,4.638135+17.184240j,4604.114809,8.731746-0.002713j,32128.194691,6.547081-3.694520j,30911.602132
2,0.236631,-6.702495,-2.280679,-7.12104,-16.917255,-14.562718,6.273981,1.423729,10.47817,2.637663,...,side_swing,15.539203,5518.384103,1.501259,4.783465+18.598461j,3625.912632,8.698856-0.009902j,32395.90232,6.575127-4.320450j,29435.690642
3,-0.151709,-6.735745,-2.689505,-7.707282,-20.054247,-19.812571,5.441278,3.445665,11.026127,2.470456,...,side_swing,15.407214,5574.114436,1.549403,4.525310-27.209617j,3167.251403,8.666862-0.029336j,34378.244416,6.318350-3.304279j,35625.158775
4,0.408326,-6.751972,-2.342814,-5.000999,-20.054247,-19.812571,7.118648,3.445665,10.956735,2.465428,...,side_swing,15.369452,5536.824829,1.483986,5.331905+8.867549j,3222.621232,8.711268-0.024255j,33179.816714,6.027978-4.059036j,34434.954131
5,0.888883,-6.827416,-1.94797,-4.484149,-19.032511,-17.283358,7.910672,0.674776,13.655839,2.638613,...,side_swing,15.672656,5557.878739,1.443965,6.432724+3.189724j,4008.776453,8.762667-0.003370j,32398.248217,5.790473-5.522562j,34136.322436
6,1.04449,-6.387672,-1.738607,-3.981657,-16.835899,-19.101904,7.910672,2.29711,14.155939,2.676115,...,side_swing,15.670249,5536.158342,1.439645,6.874849+2.604656j,4267.661262,8.718190-0.012570j,29405.171217,5.558489-6.860756j,37774.929849
7,1.260824,-6.066055,-1.517077,-3.981657,-16.835899,-19.101904,7.649854,2.29711,14.155939,2.417659,...,side_swing,15.3555,5408.641743,1.423693,7.433112+1.674245j,3845.356011,8.773783-0.011897j,24986.818539,5.509884-8.287802j,37414.376029
8,1.841067,-5.996649,-1.293461,-4.031906,-15.060426,-13.947763,8.822337,-0.502493,13.79223,2.264052,...,side_swing,15.064704,5306.47471,1.351988,8.032801+0.652333j,4414.397056,8.815064-0.000000j,24009.843811,5.919007-9.467196j,31898.496686


## cross over

In [235]:
cross_over = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\cross_over_slecht.csv", sep=';', header=0, skipinitialspace=True)

cross_over = preprocess(cross_over, "cross_over")

cross_over

Unnamed: 0,time,x,y,z,activity
156,1970-01-01 04:25:32.993347584,-1.050449,-9.116654,-3.194418,cross_over
157,1970-01-01 04:25:33.013270528,-0.842274,-9.178867,-3.792624,cross_over
158,1970-01-01 04:25:33.035290624,-0.756132,-7.417750,-3.524628,cross_over
159,1970-01-01 04:25:33.056262144,-0.428315,-6.266802,-4.051049,cross_over
160,1970-01-01 04:25:33.070942208,0.083749,-5.869594,-4.685147,cross_over
...,...,...,...,...,...
707,1970-01-01 04:25:43.621713920,4.096512,-8.039884,-2.553142,cross_over
708,1970-01-01 04:25:43.640588288,3.637090,-6.678368,-1.792224,cross_over
709,1970-01-01 04:25:43.659462656,2.816352,-4.902894,-1.229911,cross_over
710,1970-01-01 04:25:43.678337024,2.289931,-3.732803,-0.483350,cross_over


In [236]:
cross_over.plot(x='time', subplots=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000002E92A5C6248>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E92A5DF4C8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000002E929D3FB48>],
      dtype=object)

In [237]:
#segmentation + feature extraction
cross_over =  feature_extraction_segmentation(cross_over,10, True)
cross_over

Unnamed: 0,x_mean,y_mean,z_mean,x_min,y_min,z_min,x_max,y_max,z_max,x_std,...,activity,sma,smv,tilt,x_entropy,x_psd,y_entropy,y_psd,z_entropy,z_psd
0,2.387115,-6.742209,-2.422781,-2.199004,-19.003798,-19.546968,8.080562,0.526421,10.533205,2.164344,...,cross_over,15.469096,5339.241244,1.278915,8.341107+0.171180j,5383.938508,8.785078-0.001020j,30668.462521,7.125474-3.492311j,25365.199715


## merge all activities

In [238]:
#merge different activity dataframes
merged = pd.concat([jump_fast, jump_slow, side_swing], ignore_index=True) #, cross_over
target = merged['activity']
merged

Unnamed: 0,x_mean,y_mean,z_mean,x_min,y_min,z_min,x_max,y_max,z_max,x_std,...,activity,sma,smv,tilt,x_entropy,x_psd,y_entropy,y_psd,z_entropy,z_psd
0,-7.583159,-8.936538,-3.239117,-34.935211,-34.614571,-27.349962,8.568698,13.466805,20.130817,8.14481,...,jump_fast,28.340647,9663.305053,1.985967,8.129320-0.313836j,64207.783263,8.649463-0.104227j,60427.133638,6.091882-4.981816j,76029.470886
1,-8.196665,-8.206268,-2.434212,-39.397823,-33.413376,-27.349962,9.669396,19.688145,21.080767,8.056542,...,jump_fast,28.576395,9686.469888,2.011383,8.240772-0.213433j,68359.361844,8.566078-0.233188j,55280.738295,5.557564-7.562142j,75308.758138
2,-8.169064,-7.995196,-2.028957,-39.397823,-33.413376,-28.922047,9.669396,19.688145,21.080767,8.146689,...,jump_fast,28.15828,9531.324195,2.008742,8.211869-0.212921j,68880.549589,8.527814-0.284395j,54461.103041,5.325624-9.152224j,70276.635797
3,-8.246004,-8.140373,-2.292107,-34.262829,-22.076181,-29.675787,10.4351,16.457834,20.298315,8.213103,...,jump_fast,28.494259,9684.6715,2.01067,8.212756-0.237921j,70096.49801,8.519909-0.293052j,56013.024228,5.587894-7.808038j,71294.505128
4,-8.280305,-8.198424,-1.687567,-34.262829,-23.212772,-29.675787,10.983056,24.086153,23.105095,8.262989,...,jump_fast,28.250489,9646.31169,2.011202,8.243778-0.236742j,70951.912004,8.514085-0.312486j,57924.308969,5.204730-10.862033j,66171.493162
5,-8.309037,-7.785291,-1.006954,-34.858639,-24.878178,-30.606594,10.983056,24.086153,24.16033,8.374299,...,jump_fast,28.473746,9674.101919,2.007731,8.240005-0.223238j,72158.57688,8.483120-0.434166j,57964.743141,4.371055-20.395216j,69372.292039
6,-4.744691,-9.403931,-2.639542,-32.013573,-36.191441,-27.768707,16.943577,10.650454,19.118652,9.427057,...,jump_fast,25.96551,8825.162806,1.805726,6.976298-1.490642j,57718.129532,8.475273-0.155703j,77212.911355,6.795743-4.644998j,45733.579711
7,-5.161254,-9.53829,-1.73888,-32.38446,-29.343184,-19.235901,13.773088,10.650454,21.824936,9.223164,...,jump_fast,25.569721,8622.012711,1.838948,7.161561-1.215104j,57778.2719,8.619021-0.106609j,70539.160451,6.566882-7.708892j,40302.926441
8,-5.429781,-9.660199,-1.580376,-32.38446,-29.17808,-20.181067,10.535598,13.96212,21.824936,9.539299,...,jump_fast,25.980857,8716.163456,1.837307,7.162744-1.142727j,62318.025752,8.670342-0.111750j,69200.02711,6.447791-8.741579j,41248.83859
9,-5.095638,-9.90683,-1.794013,-29.608788,-34.913673,-20.446669,11.722438,13.96212,17.599211,9.725307,...,jump_fast,26.144589,8800.194663,1.796696,6.952547-1.352507j,62469.37975,8.650846-0.135871j,75903.095133,6.547467-7.216537j,39705.989246


## dimensionality reduction

In [239]:
pca = PCA(n_components=2)
merged = pd.DataFrame(pca.fit_transform(merged.drop('activity', axis=1)))
merged['activity'] = target
merged

ValueError: Complex data not supported
      x_mean     y_mean    z_mean      x_min      y_min      z_min      x_max  \
0  -7.583159  -8.936538 -3.239117 -34.935211 -34.614571 -27.349962   8.568698   
1  -8.196665  -8.206268 -2.434212 -39.397823 -33.413376 -27.349962   9.669396   
2  -8.169064  -7.995196 -2.028957 -39.397823 -33.413376 -28.922047   9.669396   
3  -8.246004  -8.140373 -2.292107 -34.262829 -22.076181 -29.675787  10.435100   
4  -8.280305  -8.198424 -1.687567 -34.262829 -23.212772 -29.675787  10.983056   
5  -8.309037  -7.785291 -1.006954 -34.858639 -24.878178 -30.606594  10.983056   
6  -4.744691  -9.403931 -2.639542 -32.013573 -36.191441 -27.768707  16.943577   
7  -5.161254  -9.538290 -1.738880 -32.384460 -29.343184 -19.235901  13.773088   
8  -5.429781  -9.660199 -1.580376 -32.384460 -29.178080 -20.181067  10.535598   
9  -5.095638  -9.906830 -1.794013 -29.608788 -34.913673 -20.446669  11.722438   
10 -5.095262 -10.126370 -1.405372 -36.258442 -34.913673 -23.150558  11.722438   
11 -5.339242 -10.214119 -1.028641 -36.258442 -29.970104 -23.150558  14.093725   
12 -5.481726  -8.859617 -4.460138 -25.507490 -23.327627 -20.379671   6.252446   
13 -5.246818  -8.616536 -4.756654 -23.976084 -23.990440 -20.152351   5.816952   
14 -4.962876  -8.616176 -5.142804 -24.428326 -25.562525 -20.066210   8.379665   
15 -5.084023  -8.429078 -5.237145 -24.428326 -25.562525 -19.283758   9.126225   
16 -4.821293  -7.710523 -5.441408 -24.770500 -23.468805 -20.764915   9.126225   
17 -1.824405  -9.533217 -3.602798 -16.036697 -27.680172 -21.025732   9.276974   
18 -1.775280  -9.387140 -3.656141 -16.036697 -27.620352 -15.314065   9.276974   
19 -1.444872  -9.102247 -3.896550 -16.204195 -27.620352 -17.553747  10.059426   
20 -0.297017  -7.481204 -2.936394  -7.003792 -21.140589 -15.529419   9.937392   
21  0.280569  -6.708557 -2.569527  -5.958128 -18.163918 -15.529419   9.937392   
22  0.236631  -6.702495 -2.280679  -7.121040 -16.917255 -14.562718   6.273981   
23 -0.151709  -6.735745 -2.689505  -7.707282 -20.054247 -19.812571   5.441278   
24  0.408326  -6.751972 -2.342814  -5.000999 -20.054247 -19.812571   7.118648   
25  0.888883  -6.827416 -1.947970  -4.484149 -19.032511 -17.283358   7.910672   
26  1.044490  -6.387672 -1.738607  -3.981657 -16.835899 -19.101904   7.910672   
27  1.260824  -6.066055 -1.517077  -3.981657 -16.835899 -19.101904   7.649854   
28  1.841067  -5.996649 -1.293461  -4.031906 -15.060426 -13.947763   8.822337   

        y_max      z_max      x_std  ...     z_med        sma          smv  \
0   13.466805  20.130817   8.144810  ... -2.969493  28.340647  9663.305053   
1   19.688145  21.080767   8.056542  ... -1.135394  28.576395  9686.469888   
2   19.688145  21.080767   8.146689  ... -1.366302  28.158280  9531.324195   
3   16.457834  20.298315   8.213103  ... -1.877169  28.494259  9684.671500   
4   24.086153  23.105095   8.262989  ... -0.830309  28.250489  9646.311690   
5   24.086153  24.160330   8.374299  ... -0.179462  28.473746  9674.101919   
6   10.650454  19.118652   9.427057  ... -3.503092  25.965510  8825.162806   
7   10.650454  21.824936   9.223164  ... -3.060420  25.569721  8622.012711   
8   13.962120  21.824936   9.539299  ... -2.727818  25.980857  8716.163456   
9   13.962120  17.599211   9.725307  ... -2.663212  26.144589  8800.194663   
10  13.576876  18.857836   9.907339  ... -2.294717  26.218315  8842.828749   
11   6.431907  18.857836  10.005281  ... -2.099702  26.346908  8868.743554   
12   3.897908  12.571890   6.594594  ... -3.842873  22.073203  7278.048807   
13   3.471986   9.698111   6.474394  ... -4.521238  21.577906  7110.217192   
14   4.733003   9.657433   6.412345  ... -5.484349  21.371412  7097.985079   
15   4.733003   9.657433   6.499475  ... -5.827720  21.424590  7141.333982   
16   3.445665   7.984849   6.393653  ... -5.661418  20.570133  6903.279449   
17   3.086741   6.668797   4.710011  ... -3.993621  18.735621  6455.455090   
18   3.086741   6.668797   4.655250  ... -3.979264  18.536185  6373.607493   
19   2.708675   5.233103   4.566257  ... -4.071388  18.086007  6251.563921   
20   0.744168   9.710074   3.233253  ... -2.818745  16.149872  5931.453638   
21   1.062413   9.126225   2.968060  ... -2.416751  15.883275  5614.337201   
22   1.423729  10.478170   2.637663  ... -2.426322  15.539203  5518.384103   
23   3.445665  11.026127   2.470456  ... -1.472782  15.407214  5574.114436   
24   3.445665  10.956735   2.465428  ... -0.727418  15.369452  5536.824829   
25   0.674776  13.655839   2.638613  ... -0.764506  15.672656  5557.878739   
26   2.297110  14.155939   2.676115  ... -0.689133  15.670249  5536.158342   
27   2.297110  14.155939   2.417659  ... -0.778863  15.355500  5408.641743   
28  -0.502493  13.792230   2.264052  ... -1.095913  15.064704  5306.474710   

        tilt            x_entropy         x_psd           y_entropy  \
0   1.985967   8.129320-0.313836j  64207.783263  8.649463-0.104227j   
1   2.011383   8.240772-0.213433j  68359.361844  8.566078-0.233188j   
2   2.008742   8.211869-0.212921j  68880.549589  8.527814-0.284395j   
3   2.010670   8.212756-0.237921j  70096.498010  8.519909-0.293052j   
4   2.011202   8.243778-0.236742j  70951.912004  8.514085-0.312486j   
5   2.007731   8.240005-0.223238j  72158.576880  8.483120-0.434166j   
6   1.805726   6.976298-1.490642j  57718.129532  8.475273-0.155703j   
7   1.838948   7.161561-1.215104j  57778.271900  8.619021-0.106609j   
8   1.837307   7.162744-1.142727j  62318.025752  8.670342-0.111750j   
9   1.796696   6.952547-1.352507j  62469.379750  8.650846-0.135871j   
10  1.798779   6.897072-1.398831j  64194.484393  8.691095-0.099853j   
11  1.824236   6.981252-1.328452j  66521.508996  8.717283-0.066552j   
12  1.948170   7.986258-0.363403j  38122.729864  8.574833-0.067170j   
13  1.934285   7.934561-0.378116j  35931.563222  8.551447-0.074236j   
14  1.907326   7.862530-0.425318j  34016.504689  8.577708-0.068448j   
15  1.914784   7.875967-0.432694j  35228.614305  8.575680-0.062869j   
16  1.918249   7.851224-0.501422j  33239.305649  8.504974-0.066321j   
17  1.757480   6.927388-2.519347j  13218.883864  8.591395-0.024817j   
18  1.755715   6.873089-2.660032j  12836.626288  8.577956-0.027926j   
19  1.736782   6.615563-3.475978j  11861.219217  8.548570-0.026424j   
20  1.538744  5.751464-17.842151j   5661.218557  8.784970-0.001541j   
21  1.496308  4.638135+17.184240j   4604.114809  8.731746-0.002713j   
22  1.501259  4.783465+18.598461j   3625.912632  8.698856-0.009902j   
23  1.549403  4.525310-27.209617j   3167.251403  8.666862-0.029336j   
24  1.483986   5.331905+8.867549j   3222.621232  8.711268-0.024255j   
25  1.443965   6.432724+3.189724j   4008.776453  8.762667-0.003370j   
26  1.439645   6.874849+2.604656j   4267.661262  8.718190-0.012570j   
27  1.423693   7.433112+1.674245j   3845.356011  8.773783-0.011897j   
28  1.351988   8.032801+0.652333j   4414.397056  8.815064-0.000000j   

           y_psd            z_entropy         z_psd  
0   60427.133638   6.091882-4.981816j  76029.470886  
1   55280.738295   5.557564-7.562142j  75308.758138  
2   54461.103041   5.325624-9.152224j  70276.635797  
3   56013.024228   5.587894-7.808038j  71294.505128  
4   57924.308969  5.204730-10.862033j  66171.493162  
5   57964.743141  4.371055-20.395216j  69372.292039  
6   77212.911355   6.795743-4.644998j  45733.579711  
7   70539.160451   6.566882-7.708892j  40302.926441  
8   69200.027110   6.447791-8.741579j  41248.838590  
9   75903.095133   6.547467-7.216537j  39705.989246  
10  75185.898838   6.076857-9.675071j  39261.212621  
11  73338.651340  5.722787-13.956606j  38519.528900  
12  60289.158910   7.575196-1.082282j  35914.763428  
13  58457.163989   7.632201-0.857656j  36102.697376  
14  57634.635965   7.818568-0.642414j  35710.669209  
15  55974.492685   7.853880-0.635995j  35909.547769  
16  50018.198748   7.871661-0.543276j  37528.722410  
17  71247.711933   7.475235-1.033194j  21842.564175  
18  69371.199271   7.523464-0.940980j  21074.262850  
19  67521.456391   7.623902-0.767687j  22009.136444  
20  40965.941825   7.041395-2.386029j  27314.741982  
21  32128.194691   6.547081-3.694520j  30911.602132  
22  32395.902320   6.575127-4.320450j  29435.690642  
23  34378.244416   6.318350-3.304279j  35625.158775  
24  33179.816714   6.027978-4.059036j  34434.954131  
25  32398.248217   5.790473-5.522562j  34136.322436  
26  29405.171217   5.558489-6.860756j  37774.929849  
27  24986.818539   5.509884-8.287802j  37414.376029  
28  24009.843811   5.919007-9.467196j  31898.496686  

[29 rows x 24 columns]


## train-test split

In [175]:
#TODO: NIET NODIG!!!
#standardize
#scaler = preprocessing.StandardScaler().fit(df)
#df = pd.DataFrame(scaler.transform(df), columns=column_names[2:])

#label activity
le = preprocessing.LabelEncoder()
le.fit(merged['activity'])
merged['activity'] = le.transform(merged['activity'])
labels = le.inverse_transform([0,1,2])

#train test split
X_train, X_test, y_train, y_test = train_test_split(merged.drop('activity', axis=1), merged['activity'], test_size = 0.33, random_state = 42)

#normalize X
xNormalizer = preprocessing.Normalizer().fit(X_train)
X_train = xNormalizer.transform(X_train)
X_test = xNormalizer.transform(X_test)

# machine learning algorithms

## SVC

In [176]:
#hyperparameter tuning
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['linear','rbf', 'poly', 'sigmoid']}
grid = GridSearchCV(SVC(),param_grid,refit=True) #verbose=2
grid.fit(X_train,y_train)

clf = grid.best_estimator_
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = clf.score(X_test, y_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm, labels)

<IPython.core.display.Javascript object>

In [177]:
y_pred = clf.predict(merged.drop('activity', axis=1).to_numpy())
cm = confusion_matrix( merged['activity'].to_numpy(), y_pred)

heatmap(cm, labels)

<IPython.core.display.Javascript object>

## linear SVC

In [None]:
#hyperparameter tuning
param_grid = {'C': [0.1,1, 10, 100]}
grid = GridSearchCV(LinearSVC(),param_grid,refit=True)
grid.fit(X_train,y_train)

clf =  grid.best_estimator_
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm,labels)

## random forest

In [None]:
#hyperparameter tuning
param_grid = {'n_estimators': [100, 1000],
              'max_depth': [10, 20],'min_samples_split': np.arange(0.10, 1, 0.4) ,
              'max_leaf_nodes': [10, 100, 1000, None]}
grid = GridSearchCV(RandomForestClassifier(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

clf = grid.best_estimator_
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm, labels)

## extra trees classifier

In [None]:
#hyperparameter tuning
param_grid = {'n_estimators': [10, 100, 1000],
              'max_depth': [1,10,20,30],'min_samples_split': np.arange(0.10, 1, 0.2) ,
                'max_leaf_nodes': [10, 100, 1000, None]}
grid = GridSearchCV(ExtraTreesClassifier(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

clf = grid.best_estimator_
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm)

## AdaBoost

In [None]:
#hyperparameter tuning
param_grid = {'learning_rate': np.arange(0.10, 1, 0.2), 'n_estimators': [10, 50, 100, 1000],
              'base_estimator': [RandomForestClassifier(), ExtraTreesClassifier()], 
              'algorithm' : ['SAMME', 'SAMME.R']}
grid = GridSearchCV(AdaBoostClassifier(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

clf = grid.best_estimator_
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm)

## naive bayes

In [None]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm)

## kneighbors 

In [None]:
#hyperparameter tuning
param_grid = {'n_neighbors': [1,2,4,8,10], 'weights': ['uniform', 'distance'],
              'algorithm': ['ball_tree', 'kd_tree', 'brute'], 'leaf_size': range(1,60,10), 
              'metric' : ['minkowski']}
grid = GridSearchCV(KNeighborsClassifier(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

clf = grid.best_estimator_
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm)

## SGD classifier

In [None]:
#hyperparameter tuning
param_grid = {'alpha': [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], 'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'], 
              'max_iter': [10, 100, 1000], #np.ceil(10**6 / n) 
             'shuffle' : [True], 'eta0' : [1]}
grid = GridSearchCV(SGDClassifier(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

clf = grid.best_estimator_
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm)

## MLP classifier

In [None]:
#hyperparameter tuning
param_grid = {'solver': ['lbfgs', 'sgd', 'adam'], 'alpha': [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3], 
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)], 'activation': ['identity', 'logistic','tanh', 'relu'],
    'learning_rate': ['invscaling', 'constant','adaptive']}
grid = GridSearchCV(MLPClassifier(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

clf = grid.best_estimator_
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

heatmap(cm)