In [3]:
"""
CNN for Smartphone-Based Recognition of Human Activities and Postural Transitions Data Set 
Link: http://archive.ics.uci.edu/ml/datasets/Smartphone-Based+Recognition+of+Human+Activities+and+Postural+Transitions

Using Raw Data folder and splitting into train and testing with k-fold validation.

***********
Overall data formatting:
         accX  accY  accZ  gyroX  gyroY  gyroZ
Samples
1
2
3
...
n
************

1. Look into 1D convolutions
2. Figure out loading in data efficiently, possibly use some library
3. Samples have differing row lengths (due to time), truncate to same length
    don't know if its necessary. 
    SOLVED: The action/label lengths for each action are chopped up blocks
    of time of entire experiment sampled data length.
    Must divide up and chop full experiment sample data into blocks of time
    that correspond to action/label length from labels .txt file
"""

"\nCNN for Smartphone-Based Recognition of Human Activities and Postural Transitions Data Set \nLink: http://archive.ics.uci.edu/ml/datasets/Smartphone-Based+Recognition+of+Human+Activities+and+Postural+Transitions\n\nUsing Raw Data folder and splitting into train and testing with k-fold validation.\n\n***********\nOverall data formatting:\n         accX  accY  accZ  gyroX  gyroY  gyroZ\nSamples\n1\n2\n3\n...\nn\n************\n\n1. Look into 1D convolutions\n2. Figure out loading in data efficiently, possibly use some library\n3. Samples have differing row lengths (due to time), truncate to same length\n    don't know if its necessary. \n    SOLVED: The action/label lengths for each action are chopped up blocks\n    of time of entire experiment sampled data length.\n    Must divide up and chop full experiment sample data into blocks of time\n    that correspond to action/label length from labels .txt file\n"

In [84]:
import numpy as np
import glob

classes = ('WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING', 'STAND_TO_SIT', 'SIT_TO_STAND', 'SIT_TO_LIE', 'LIE_TO_SIT', 'STAND_TO_LIE', 'LIE_TO_STAND')

# labels array
labels = np.loadtxt('HAPT_Data_Set/RawData/labels.txt')
# Separate labels for each action contained within each experiment, create array 

# import .txt files of samples with data
ACCtxt = glob.glob("HAPT_Data_Set/RawData/ACC/*.txt")
GYROtxt = glob.glob("HAPT_Data_Set/RawData/Gyro/*.txt")
ACCexpData = []
GYROexpData = []

# Create multidemensional arrays comprised of samples of each experiment
for exp in enumerate(ACCtxt):
    ACCexpData.append(np.loadtxt(ACCtxt[exp[0]]))

for expG in enumerate(GYROtxt):
    GYROexpData.append(np.loadtxt(GYROtxt[expG[0]]))

In [96]:
# Convert to np arrays for ease of manipulation
ACCexp = np.asarray(ACCexpData)
GYROexp = np.asarray(GYROexpData)

# Go through labels array and create new arrays based on labels for each experiment
labelArray = []
expArray = []
expcount = 1
for row in labels:
    exp = int(row[0])
    if exp==expcount:
        expArray.append(row)
    else:
        expArray = np.asarray(expArray)
        labelArray.append(expArray)
        expArray = []
        expArray.append(row)
        expcount+=1

# Forgot to convert last experiment to npArray, resulted in a lot of confusion
# Now below code should work properly
expArray = np.asarray(expArray)
labelArray.append(expArray)

# Now, we have label array according to experiment number and comprised of time slices of each
# action with its respective label.
npLabels = np.asarray(labelArray)
print(npLabels.shape)
print(npLabels[0].shape)
print(npLabels[60])
print(ACCexp[60].shape)
print(GYROexp[60].shape)

(61,)
(22, 5)
[[6.1000e+01 3.0000e+01 5.0000e+00 5.7400e+02 1.6220e+03]
 [6.1000e+01 3.0000e+01 7.0000e+00 1.6230e+03 1.8380e+03]
 [6.1000e+01 3.0000e+01 4.0000e+00 1.8390e+03 2.7720e+03]
 [6.1000e+01 3.0000e+01 8.0000e+00 2.7730e+03 2.9000e+03]
 [6.1000e+01 3.0000e+01 5.0000e+00 2.9010e+03 4.1970e+03]
 [6.1000e+01 3.0000e+01 1.1000e+01 4.1980e+03 4.4460e+03]
 [6.1000e+01 3.0000e+01 6.0000e+00 4.4470e+03 5.6570e+03]
 [6.1000e+01 3.0000e+01 1.0000e+01 5.6580e+03 5.8960e+03]
 [6.1000e+01 3.0000e+01 4.0000e+00 5.8970e+03 7.1650e+03]
 [6.1000e+01 3.0000e+01 9.0000e+00 7.1660e+03 7.4570e+03]
 [6.1000e+01 3.0000e+01 6.0000e+00 7.4580e+03 8.5240e+03]
 [6.1000e+01 3.0000e+01 1.2000e+01 8.5250e+03 8.6710e+03]
 [6.1000e+01 3.0000e+01 1.0000e+00 9.6760e+03 1.0779e+04]
 [6.1000e+01 3.0000e+01 1.0000e+00 1.1106e+04 1.2197e+04]
 [6.1000e+01 3.0000e+01 3.0000e+00 1.2956e+04 1.3637e+04]
 [6.1000e+01 3.0000e+01 2.0000e+00 1.3842e+04 1.4574e+04]
 [6.1000e+01 3.0000e+01 3.0000e+00 1.4751e+04 1.5427e+04]


In [99]:
# Now, we must split the ACC and GYRO experiment data into a multidimensional array 
# that corresponds to labelled action for particular time splices.

numexp = npLabels.shape[0]

# Create multidimensional array with first 3 columns ACC data and second 3 columns
# GYRO data. Samples blocked into actions that correspond to label vector.
action = []

for experiment in range(numexp):
    ACCexpsamples = ACCexp[experiment]
    GYROexpsamples = GYROexp[experiment]
    expLabels = npLabels[experiment][:,2]
    actionStart = npLabels[experiment][:,3]
    actionEnd = npLabels[experiment][:,4]
    
    for actions in range(expLabels.shape[0]): 
        print("experiment: {}, action: {}".format(experiment, actions))
        start = int(actionStart[actions])
        end = int(actionEnd[actions])
    # Encountered issue where GYRO and ACC sample lengths vary to the extent that some
    # some actions dont have associated sample values. I only add samples for action
    # if both GYRO and ACC contain equal amounts of samples for it 
    # otherwise, it is not added, and the action is removed from label array.
    # I do this by ensuring that the start and end action samples are within 
    # sample lengths for that experiment
        # print(start)
        # print(end)
        if ((start <= ACCexp[experiment].shape[0]) and (start <= GYROexp[experiment].shape[0]) and (end <= ACCexp[experiment].shape[0]) and (end <= GYROexp[experiment].shape[0])):
                temp = np.hstack((ACCexpsamples[start:end], GYROexpsamples[start:end]))
                action.append(temp)
        else:
            np.delete(npLabels[experiment], actions, axis=0)
            # print(npLabels[experiment].shape)
            break    
       

experiment: 0, action: 0
experiment: 0, action: 1
experiment: 0, action: 2
experiment: 0, action: 3
experiment: 0, action: 4
experiment: 0, action: 5
experiment: 0, action: 6
experiment: 0, action: 7
experiment: 0, action: 8
experiment: 0, action: 9
experiment: 0, action: 10
experiment: 0, action: 11
experiment: 0, action: 12
experiment: 0, action: 13
experiment: 0, action: 14
experiment: 0, action: 15
experiment: 0, action: 16
experiment: 0, action: 17
experiment: 0, action: 18
experiment: 0, action: 19
experiment: 0, action: 20
experiment: 0, action: 21
experiment: 1, action: 0
experiment: 1, action: 1
experiment: 1, action: 2
experiment: 1, action: 3
experiment: 1, action: 4
experiment: 1, action: 5
experiment: 1, action: 6
experiment: 1, action: 7
experiment: 1, action: 8
experiment: 1, action: 9
experiment: 1, action: 10
experiment: 1, action: 11
experiment: 1, action: 12
experiment: 1, action: 13
experiment: 1, action: 14
experiment: 1, action: 15
experiment: 1, action: 16
experi

In [104]:
npAction = np.asarray(action)
print(npAction.shape)
print(npAction[0].shape)
print(npAction[0])

(1115,)
(982, 6)
[[ 1.03750000e+00 -3.00000015e-01 -3.08333342e-01 -2.01585535e-02
   1.95476878e-02 -6.23082556e-02]
 [ 1.09444447e+00 -3.25000020e-01 -3.65277787e-01  1.86313894e-02
  -1.37750119e-01 -8.06342140e-02]
 [ 1.04861114e+00 -3.27777779e-01 -3.37500023e-01  1.36222944e-01
  -2.22049519e-01 -9.16297897e-04]
 ...
 [ 9.76388952e-01 -1.83333340e-01 -2.88888904e-01  1.06901415e-02
  -3.35975876e-03  3.05432623e-04]
 [ 9.73611168e-01 -1.81944448e-01 -2.87500012e-01  9.77384392e-03
  -7.63581553e-03  9.16297827e-03]
 [ 9.76388952e-01 -1.84722231e-01 -2.95833339e-01  1.13010071e-02
  -6.10865233e-03 -9.16297897e-04]]
