In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import widgets
import numpy as np
%pylab osx

Populating the interactive namespace from numpy and matplotlib


In [2]:
# normalise the feature values between a 0 1 range
from sklearn import preprocessing
def norm(features):
    features_array = features.values #return a numpy array
    min_max_scalar = preprocessing.MinMaxScaler()
    normalise = min_max_scalar.fit_transform(features_array)
    normalised_features = pd.DataFrame(normalise)

    # rename index and columns
    normalised_features.columns = features.columns
    normalised_features.index = features.index

    features = normalised_features.copy(deep=True)
    return features

In [3]:
# load data, remove 'Type', calculate 'magnitude', normalise and display format
right_knee = pd.read_csv('sensor_log/right_knee.txt', sep='\t', index_col='Timestamp')
left_knee = pd.read_csv('sensor_log/left_knee.txt', sep='\t', index_col='Timestamp')
right_hip = pd.read_csv('sensor_log/right_hip.txt', sep='\t', index_col='Timestamp')
left_hip = pd.read_csv('sensor_log/left_hip.txt', sep='\t', index_col='Timestamp')
right_arm = pd.read_csv('sensor_log/right_arm.txt', sep='\t', index_col='Timestamp')
left_arm = pd.read_csv('sensor_log/left_arm.txt', sep='\t', index_col='Timestamp')
right_foot = pd.read_csv('sensor_log/right_foot.txt', sep='\t', index_col='Timestamp')
left_foot = pd.read_csv('sensor_log/left_foot.txt', sep='\t', index_col='Timestamp')
raw_logs = [right_knee, left_knee, right_hip, left_hip, right_arm, left_arm, right_foot, left_foot]
for l in raw_logs:
    l.drop('Type', axis=1, inplace=True)
    l['magnitude'] = np.sqrt( l['X']**2 + l['Y']**2 + l['Z']**2 )

In [4]:
# trim data right knee, normalise, reset index to unify, add identifier to name
right_knee = right_knee[right_knee.index > 250739278]
right_knee = right_knee[right_knee.index < 250850000]
right_knee = norm(right_knee)
right_knee = right_knee.reset_index(drop=True)
right_knee.columns = ['right_knee_' + str(col) for col in right_knee.columns]

In [5]:
# trim data left_knee, normalise, reset index to unify, add identifier to name
left_knee = left_knee[left_knee.index > 16630000]
left_knee = left_knee[left_knee.index < 16756000]
left_knee = norm(left_knee)
left_knee = left_knee.reset_index(drop=True)
left_knee.columns = ['left_knee_' + str(col) for col in left_knee.columns]

In [6]:
# trim data right_hip, normalise, reset index to unify, add identifier to name
right_hip = right_hip[right_hip.index > 9116132]
right_hip = right_hip[right_hip.index < 9230000]
right_hip = norm(right_hip)
right_hip = right_hip.reset_index(drop=True)
right_hip.columns = ['right_hip_' + str(col) for col in right_hip.columns]

In [7]:
# trim data left_hip, normalise, reset index to unify, add identifier to name
left_hip = left_hip[left_hip.index > 879227949]
left_hip = left_hip[left_hip.index < 879337949]
left_hip = norm(left_hip)
left_hip = left_hip.reset_index(drop=True)
left_hip.columns = ['left_hip_' + str(col) for col in left_hip.columns]

In [8]:
# trim data right_arm, normalise, reset index to unify, add identifier to name
right_arm = right_arm[right_arm.index > 194779513]
right_arm = right_arm[right_arm.index < 194909513]
right_arm = norm(right_arm)
right_arm = right_arm.reset_index(drop=True)
right_arm.columns = ['right_arm_' + str(col) for col in right_arm.columns]

In [9]:
# trim data left arm, normalise, reset index to unify, add identifier to name
left_arm = left_arm[left_arm.index > 1188100056]
left_arm = left_arm[left_arm.index < 1188240056]
left_arm = norm(left_arm)
left_arm = left_arm.reset_index(drop=True)
left_arm.columns = ['left_arm_' + str(col) for col in left_arm.columns]

In [10]:
# trim data right_foot, normalise, reset index to unify, add identifier to name
right_foot = right_foot[right_foot.index > 140800245]
right_foot = right_foot[right_foot.index < 140910245]
right_foot = norm(right_foot)
right_foot = right_foot.reset_index(drop=True)
right_foot.columns = ['right_foot_' + str(col) for col in right_foot.columns]

In [11]:
# trim data right_foot, normalise, reset index to unify, add identifier to name
left_foot = left_foot[left_foot.index > 244818069]
left_foot = left_foot[left_foot.index < 244929597]
left_foot = norm(left_foot)
left_foot = left_foot.reset_index(drop=True)
left_foot.columns = ['left_foot_' + str(col) for col in left_foot.columns]

In [12]:
# put all of them in a big df and remove nan
apa = pd.concat([right_knee, left_knee, right_hip, left_hip, right_arm, left_arm, right_foot, left_foot], axis=1)
apa = apa.dropna()

In [22]:
# plot all magnitudes, just because fun
bepa = ['right_knee_magnitude', 'left_knee_magnitude', 'right_hip_magnitude', 'left_hip_magnitude', 'right_arm_magnitude', 'left_arm_magnitude', 'right_foot_magnitude', 'left_foot_magnitude']
for b in bepa:
    plt.plot(apa.index, apa[bepa])
plt.show()

In [14]:
# pickle data for easier use in future
apa.to_pickle('accelerometer_data_all.pkl')

In [15]:
apa

Unnamed: 0,right_knee_X,right_knee_Y,right_knee_Z,right_knee_magnitude,left_knee_X,left_knee_Y,left_knee_Z,left_knee_magnitude,right_hip_X,right_hip_Y,...,left_arm_Z,left_arm_magnitude,right_foot_X,right_foot_Y,right_foot_Z,right_foot_magnitude,left_foot_X,left_foot_Y,left_foot_Z,left_foot_magnitude
0,0.037729,1.000000,0.093661,0.980646,0.355095,0.592563,0.566948,0.287249,0.991814,1.000000,...,0.224080,0.850012,0.449734,0.537152,0.161312,0.396302,0.562478,0.517113,0.741939,0.282916
1,0.000000,0.848288,0.183334,0.859315,0.361495,0.593900,0.555222,0.277147,0.594142,0.952794,...,0.175627,0.694509,0.151820,0.000000,0.360903,0.720527,0.326574,0.000000,0.591119,0.616252
2,0.000000,0.431788,0.475287,0.650893,0.354071,0.593098,0.553041,0.283342,0.480225,0.776108,...,0.067492,0.643168,0.165390,0.000000,0.535072,0.694562,0.216312,0.000000,0.489509,0.660919
3,0.000489,0.054483,0.497723,0.840854,0.346902,0.593365,0.547314,0.289204,0.559170,0.773004,...,0.063221,0.539066,0.218601,0.608446,0.485159,0.347129,0.062623,0.631128,0.429526,0.532115
4,0.384432,0.000000,0.638613,0.643060,0.343830,0.590958,0.541860,0.289006,0.552381,0.966517,...,0.127296,0.447889,0.000000,1.000000,0.515223,0.816659,0.000000,0.966674,0.272465,0.831746
5,0.432753,0.164835,0.508905,0.406844,0.348950,0.592830,0.540224,0.284497,0.544702,1.000000,...,0.195582,0.390131,0.334565,0.851333,0.632562,0.472547,0.364828,0.812955,0.395819,0.411113
6,0.157647,0.473483,0.396282,0.454051,0.357655,0.591493,0.541314,0.274514,0.538497,1.000000,...,0.276256,0.359972,0.477103,0.536418,0.472608,0.056654,0.628885,0.538293,0.528267,0.155382
7,0.237195,0.687132,0.494285,0.419891,0.370968,0.576511,0.551404,0.252011,0.503725,1.000000,...,0.371636,0.347763,0.535841,0.504742,0.489740,0.040431,0.564645,0.521675,0.525322,0.080236
8,0.268590,0.857445,0.547362,0.551882,0.379160,0.580524,0.551950,0.247078,0.477538,1.000000,...,0.435223,0.345045,0.480339,0.610095,0.491801,0.127351,0.434669,0.616083,0.454566,0.162179
9,0.294246,0.980124,0.522870,0.668446,0.383513,0.583735,0.549223,0.244410,0.473637,0.960308,...,0.475377,0.326418,0.400644,0.666712,0.505130,0.222310,0.395666,0.643381,0.455177,0.210542


In [16]:
# extract one walking cycle
cepa = apa
cepa = cepa[cepa.index > 10]
cepa = cepa[cepa.index < 2000]
#plt.plot(cepa.index, cepa)
#plt.show()
cepa.to_pickle('accelerometer_data_cycle.pkl')

In [21]:
# plot data
#plt.figure()
#plt.title('X, Y, Z acceleration')
#plt.plot(a.index, a['X'])
#plt.plot(a.index, a['Y'])
#plt.plot(a.index, a['Z'])
#plt.figure()
#plt.title('magnitude acceleration')
#plt.plot(a.index, a['magnitude'])
#plt.show()