# CX4240 Health Activity Classification Task

## Imports

In [10]:
pip install pydot

Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install tensorflow

In [11]:
pip install graphviz

Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
Installing collected packages: graphviz
Successfully installed graphviz-0.20.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
from matplotlib import pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import math
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.utils import plot_model

## CD Into Dataset Location

In [2]:
%cd C:\Users\6finl\CX4240-Project\PAMAP2_Dataset

C:\Users\6finl\CX4240-Project\PAMAP2_Dataset


## Create Column Names and Activity IDs as according to the UCI ML pdf

In [3]:
# Load data
list_of_files = ["Protocol\subject101.dat",
                 "Protocol\subject102.dat",
                 "Protocol\subject103.dat",
                 "Protocol\subject104.dat",
                 "Protocol\subject105.dat",
                 "Protocol\subject106.dat",
                 "Protocol\subject107.dat",
                 "Protocol\subject108.dat",
                 "Protocol\subject109.dat" ]

subjectID = [1,2,3,4,5,6,7,8,9]

activityIDdict = {0: 'transient',
              1: 'lying',
              2: 'sitting',
              3: 'standing',
              4: 'walking',
              5: 'running',
              6: 'cycling',
              7: 'Nordic_walking',
              9: 'watching_TV',
              10: 'computer_work',
              11: 'car driving',
              12: 'ascending_stairs',
              13: 'descending_stairs',
              16: 'vacuum_cleaning',
              17: 'ironing',
              18: 'folding_laundry',
              19: 'house_cleaning',
              20: 'playing_soccer',
              24: 'rope_jumping' }

colNames = ["timestamp", "activityID","heartrate"]

IMUhand = ['handTemperature', 
           'handAcc16_1', 'handAcc16_2', 'handAcc16_3', 
           'handAcc6_1', 'handAcc6_2', 'handAcc6_3', 
           'handGyro1', 'handGyro2', 'handGyro3', 
           'handMagne1', 'handMagne2', 'handMagne3',
           'handOrientation1', 'handOrientation2', 'handOrientation3', 'handOrientation4']

IMUchest = ['chestTemperature', 
           'chestAcc16_1', 'chestAcc16_2', 'chestAcc16_3', 
           'chestAcc6_1', 'chestAcc6_2', 'chestAcc6_3', 
           'chestGyro1', 'chestGyro2', 'chestGyro3', 
           'chestMagne1', 'chestMagne2', 'chestMagne3',
           'chestOrientation1', 'chestOrientation2', 'chestOrientation3', 'chestOrientation4']

IMUankle = ['ankleTemperature', 
           'ankleAcc16_1', 'ankleAcc16_2', 'ankleAcc16_3', 
           'ankleAcc6_1', 'ankleAcc6_2', 'ankleAcc6_3', 
           'ankleGyro1', 'ankleGyro2', 'ankleGyro3', 
           'ankleMagne1', 'ankleMagne2', 'ankleMagne3',
           'ankleOrientation1', 'ankleOrientation2', 'ankleOrientation3', 'ankleOrientation4']

columns = colNames + IMUhand + IMUchest + IMUankle  #all columns in one list

len(columns)

54

## Read Data in Pandas Dataset

In [4]:
data = pd.DataFrame()
for file in list_of_files:
    procData = pd.read_table(file, header=None, sep='\s+')
    procData.columns = columns
    procData['subject_id'] = int(file[-5])
    data = data.append(procData, ignore_index=True)

data.reset_index(drop=True, inplace=True)
data.head()

Unnamed: 0,timestamp,activityID,heartrate,handTemperature,handAcc16_1,handAcc16_2,handAcc16_3,handAcc6_1,handAcc6_2,handAcc6_3,...,ankleGyro2,ankleGyro3,ankleMagne1,ankleMagne2,ankleMagne3,ankleOrientation1,ankleOrientation2,ankleOrientation3,ankleOrientation4,subject_id
0,8.38,0,104.0,30.0,2.37223,8.60074,3.51048,2.43954,8.76165,3.35465,...,0.00925,-0.01758,-61.1888,-38.9599,-58.1438,1.0,0.0,0.0,0.0,1
1,8.39,0,,30.0,2.18837,8.5656,3.66179,2.39494,8.55081,3.64207,...,-0.004638,0.000368,-59.8479,-38.8919,-58.5253,1.0,0.0,0.0,0.0,1
2,8.4,0,,30.0,2.37357,8.60107,3.54898,2.30514,8.53644,3.7328,...,0.000148,0.022495,-60.7361,-39.4138,-58.3999,1.0,0.0,0.0,0.0,1
3,8.41,0,,30.0,2.07473,8.52853,3.66021,2.33528,8.53622,3.73277,...,-0.020301,0.011275,-60.4091,-38.7635,-58.3956,1.0,0.0,0.0,0.0,1
4,8.42,0,,30.0,2.22936,8.83122,3.7,2.23055,8.59741,3.76295,...,-0.014303,-0.002823,-61.5199,-39.3879,-58.2694,1.0,0.0,0.0,0.0,1


## Clean Data

In [6]:
def dataCleaning(data):
        data = data.drop(['handTemperature','heartrate', 'handOrientation1', 'handOrientation2', 'handOrientation3', 'handOrientation4',
                                             'chestOrientation1', 'chestOrientation2', 'chestOrientation3', 'chestOrientation4',
                                             'ankleOrientation1', 'ankleOrientation2', 'ankleOrientation3', 'ankleOrientation4'],
                                             axis = 1)  # removal of unecessary columns
        data = data.drop(data[data.activityID == 0].index) #removal of any row of activity 0 as it is transient activity which it is not used
        data = data.interpolate() #removal of any remaining NaN value cells by constructing new data points in known set of data points
        
        return data

In [7]:
dataClean = dataCleaning(data)

In [8]:

dataClean.head(10)

Unnamed: 0,timestamp,activityID,handAcc16_1,handAcc16_2,handAcc16_3,handAcc6_1,handAcc6_2,handAcc6_3,handGyro1,handGyro2,...,ankleAcc6_1,ankleAcc6_2,ankleAcc6_3,ankleGyro1,ankleGyro2,ankleGyro3,ankleMagne1,ankleMagne2,ankleMagne3,subject_id
2928,37.66,1,2.2153,8.27915,5.58753,2.24689,8.55387,5.77143,-0.00475,0.037579,...,9.63162,-1.76757,0.265761,0.002908,-0.027714,0.001752,-61.1081,-36.8636,-58.3696,1
2929,37.67,1,2.29196,7.67288,5.74467,2.27373,8.14592,5.78739,-0.17171,0.025479,...,9.58649,-1.75247,0.250816,0.020882,0.000945,0.006007,-60.8916,-36.3197,-58.3656,1
2930,37.68,1,2.2909,7.1424,5.82342,2.26966,7.66268,5.78846,-0.238241,0.011214,...,9.60196,-1.73721,0.356632,-0.035392,-0.052422,-0.004882,-60.3407,-35.7842,-58.6119,1
2931,37.69,1,2.218,7.14365,5.8993,2.22177,7.25535,5.88,-0.192912,0.019053,...,9.58674,-1.78264,0.311453,-0.032514,-0.018844,0.02695,-60.7646,-37.1028,-57.8799,1
2932,37.7,1,2.30106,7.25857,6.09259,2.2072,7.24042,5.95555,-0.069961,-0.018328,...,9.64677,-1.7524,0.295902,0.001351,-0.048878,-0.006328,-60.204,-37.1225,-57.8847,1
2933,37.71,1,2.07165,7.25965,6.01218,2.19238,7.21038,6.01604,0.063895,0.007175,...,9.60177,-1.75239,0.311276,0.003793,-0.026906,0.004125,-61.3257,-36.9744,-57.7501,1
2934,37.72,1,2.41148,7.5978,5.93915,2.23988,7.46679,6.03053,0.190837,0.003116,...,9.67694,-1.76748,0.32606,0.036814,-0.032277,-0.006866,-61.552,-36.9632,-57.9957,1
2935,37.73,1,2.32815,7.63431,5.70686,2.31663,7.64745,6.01495,0.200328,-0.009266,...,9.61685,-1.76749,0.32638,-0.010352,-0.016621,0.006548,-61.5738,-36.1724,-59.3487,1
2936,37.74,1,2.25096,7.78598,5.62821,2.28637,7.70801,5.93935,0.204098,-0.068256,...,9.61686,-1.72212,0.326234,0.039346,0.020393,-0.01188,-61.7741,-37.1744,-58.1199,1
2937,37.75,1,2.14107,7.52262,5.78141,2.31538,7.72276,5.78828,0.171291,-0.055411,...,9.63189,-1.70699,0.326105,0.029874,-0.010763,0.005133,-60.768,-37.4206,-58.8735,1


In [9]:
dataClean.isnull().sum()

timestamp           0
activityID          0
handAcc16_1         0
handAcc16_2         0
handAcc16_3         0
handAcc6_1          0
handAcc6_2          0
handAcc6_3          0
handGyro1           0
handGyro2           0
handGyro3           0
handMagne1          0
handMagne2          0
handMagne3          0
chestTemperature    0
chestAcc16_1        0
chestAcc16_2        0
chestAcc16_3        0
chestAcc6_1         0
chestAcc6_2         0
chestAcc6_3         0
chestGyro1          0
chestGyro2          0
chestGyro3          0
chestMagne1         0
chestMagne2         0
chestMagne3         0
ankleTemperature    0
ankleAcc16_1        0
ankleAcc16_2        0
ankleAcc16_3        0
ankleAcc6_1         0
ankleAcc6_2         0
ankleAcc6_3         0
ankleGyro1          0
ankleGyro2          0
ankleGyro3          0
ankleMagne1         0
ankleMagne2         0
ankleMagne3         0
subject_id          0
dtype: int64

## Autoencoder Approach

In [10]:
y = dataClean[['activityID']]
X = dataClean.drop(['activityID'], axis =1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
n_inputs = X.shape[1]
X_train

Unnamed: 0,timestamp,handAcc16_1,handAcc16_2,handAcc16_3,handAcc6_1,handAcc6_2,handAcc6_3,handGyro1,handGyro2,handGyro3,...,ankleAcc6_1,ankleAcc6_2,ankleAcc6_3,ankleGyro1,ankleGyro2,ankleGyro3,ankleMagne1,ankleMagne2,ankleMagne3,subject_id
441134,652.81,-8.535430,4.672780,0.204375,-8.455050,4.759940,0.429913,-0.005022,0.034622,0.002961,...,9.68665,-1.617810,-0.960966,0.105228,0.002605,-0.011086,-18.84000,-17.40930,35.44030,2
523383,1475.30,-8.795760,4.761390,1.393290,-7.530370,5.600480,2.027810,-0.474665,-0.133971,-0.857582,...,9.50332,-0.877688,-1.749400,0.163930,-0.060279,0.357141,-6.53717,-9.56027,17.97060,2
2011757,2317.37,-12.678300,1.943700,1.113710,-12.522700,2.207720,1.406110,-1.126180,-1.071370,2.646580,...,9.22863,-2.073790,-2.697620,-0.457819,0.143704,-1.226720,-25.51460,-3.30379,28.27050,6
1839311,592.91,-4.460820,1.640340,8.319500,-4.253960,1.626140,8.571790,0.008714,-0.032396,-0.009886,...,9.54624,-1.150540,-2.248220,-0.019183,0.027471,-0.002515,-18.44610,-2.80701,10.70760,6
2567044,1116.08,1.553230,-6.750000,-3.878070,1.950660,-7.264250,-2.936960,-0.551014,1.156070,0.228398,...,9.05025,-0.288674,-2.369440,0.093784,0.142136,0.074481,-36.16820,-20.00700,-9.47498,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
767854,3920.01,0.578358,5.267460,2.571150,0.508042,5.452570,2.805150,0.165054,-0.457209,-0.888072,...,9.65957,1.604410,-0.471726,0.079997,-0.059947,-0.371735,-35.20980,8.79766,24.46600,2
2695867,2404.31,-7.785550,0.858687,-1.349510,-7.822220,0.827393,-1.041990,-0.168163,-2.048040,-2.125310,...,13.10320,1.359810,-3.228700,0.259332,0.900903,1.991510,-28.45940,31.30100,12.76030,8
739868,3640.15,-4.966830,3.344690,7.536420,-5.562300,3.582250,6.937960,-0.498393,-0.108093,-0.184401,...,9.56019,2.014440,0.889652,0.033467,0.764591,-1.741880,-27.08260,-20.21920,-5.74062,2
768346,3924.93,-3.996340,15.562767,1.415911,-4.985490,17.301500,1.979433,0.156208,-1.761497,-1.351430,...,5.34266,1.163240,-1.764140,0.520026,-2.915240,4.909640,-33.77970,-2.60167,14.92560,2


In [10]:
# scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)
# define encoder
visible = Input(shape=(n_inputs,))
# encoder level 1
e = Dense(n_inputs*2)(visible)
e = BatchNormalization()(e)
e = LeakyReLU()(e)
# encoder level 2
e = Dense(n_inputs)(e)
e = BatchNormalization()(e)
e = LeakyReLU()(e)
# bottleneck
n_bottleneck = round(float(n_inputs)/2.0)
bottleneck = Dense(n_bottleneck)(e)
# define decoder, level 1
d = Dense(n_inputs)(bottleneck)
d = BatchNormalization()(d)
d = LeakyReLU()(d)
# decoder level 2
d = Dense(n_inputs*2)(d)
d = BatchNormalization()(d)
d = LeakyReLU()(d)
# output layer
output = Dense(n_inputs, activation='linear')(d)
# define autoencoder model
model = Model(inputs=visible, outputs=output)
# compile autoencoder model
model.compile(optimizer='adam', loss='mse')


In [None]:
# fit the autoencoder model to reconstruct input
history = model.fit(X_train, X_train, epochs=200, batch_size=16, verbose=2, validation_data=(X_test,X_test))
# plot loss
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()
# define an encoder model (without the decoder)
encoder = Model(inputs=visible, outputs=bottleneck)
# save the encoder to file
encoder.save('encoder.h5')

Epoch 1/200
81358/81358 - 290s - loss: 0.0014 - val_loss: 4.7078e-04 - 290s/epoch - 4ms/step
Epoch 2/200
81358/81358 - 281s - loss: 6.0098e-04 - val_loss: 3.6398e-04 - 281s/epoch - 3ms/step
Epoch 3/200
81358/81358 - 284s - loss: 5.5453e-04 - val_loss: 5.1082e-04 - 284s/epoch - 3ms/step
Epoch 4/200
81358/81358 - 303s - loss: 5.3019e-04 - val_loss: 3.2289e-04 - 303s/epoch - 4ms/step
Epoch 5/200
81358/81358 - 301s - loss: 5.1431e-04 - val_loss: 3.6299e-04 - 301s/epoch - 4ms/step
Epoch 6/200
81358/81358 - 295s - loss: 5.0279e-04 - val_loss: 4.2418e-04 - 295s/epoch - 4ms/step
Epoch 7/200
81358/81358 - 299s - loss: 4.9579e-04 - val_loss: 4.4954e-04 - 299s/epoch - 4ms/step
Epoch 8/200
81358/81358 - 301s - loss: 4.8882e-04 - val_loss: 3.3924e-04 - 301s/epoch - 4ms/step
Epoch 9/200
81358/81358 - 289s - loss: 4.8438e-04 - val_loss: 2.8801e-04 - 289s/epoch - 4ms/step
Epoch 10/200
81358/81358 - 297s - loss: 4.8026e-04 - val_loss: 2.6319e-04 - 297s/epoch - 4ms/step
Epoch 11/200
81358/81358 - 295s -

Epoch 85/200
81358/81358 - 292s - loss: 4.2529e-04 - val_loss: 2.4866e-04 - 292s/epoch - 4ms/step
Epoch 86/200
81358/81358 - 288s - loss: 4.2507e-04 - val_loss: 2.4390e-04 - 288s/epoch - 4ms/step
Epoch 87/200
81358/81358 - 287s - loss: 4.2484e-04 - val_loss: 3.9043e-04 - 287s/epoch - 4ms/step
Epoch 88/200
81358/81358 - 295s - loss: 4.2507e-04 - val_loss: 2.0806e-04 - 295s/epoch - 4ms/step
Epoch 89/200
81358/81358 - 287s - loss: 4.2533e-04 - val_loss: 2.7640e-04 - 287s/epoch - 4ms/step
Epoch 90/200
81358/81358 - 288s - loss: 4.2503e-04 - val_loss: 2.7355e-04 - 288s/epoch - 4ms/step
Epoch 91/200
81358/81358 - 292s - loss: 4.2458e-04 - val_loss: 2.4100e-04 - 292s/epoch - 4ms/step
Epoch 92/200
81358/81358 - 287s - loss: 4.2388e-04 - val_loss: 2.3539e-04 - 287s/epoch - 4ms/step
Epoch 93/200
81358/81358 - 287s - loss: 4.2423e-04 - val_loss: 3.3672e-04 - 287s/epoch - 4ms/step
Epoch 94/200
81358/81358 - 292s - loss: 4.2400e-04 - val_loss: 2.5193e-04 - 292s/epoch - 4ms/step
Epoch 95/200
81358/8

In [None]:
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [52]:
y = dataClean[['activityID']]

y = np.ravel(y)
print(y.shape)
X = dataClean.drop(['activityID'], axis =1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)


(1942872,)


In [54]:
# scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)
# define model
model = LogisticRegression()
# fit model on training set
model.fit(X_train, y_train)
# make prediction on test set
yhat = model.predict(X_test)
# calculate accuracy
acc = accuracy_score(y_test, yhat)
print(acc)

MemoryError: Unable to allocate 59.6 MiB for an array with shape (1301724, 12) and data type int32