First I took out all the _MODEL folders because from what I can tell they look like copies of the data.

In [1]:
import numpy as np
import glob
from scipy.fftpack import fft,fftfreq
import itertools as it
import matplotlib.pyplot as plt
from __future__ import division
%matplotlib osx

In [2]:
def bits2acc(data):
# convert data from bit to acceloration (taken from displayTrial.m)
    return -14.709 + (data/63)*(2*14.709)

fs = 32.0 #Hz (taken from MANUAL.txt)

# load data
all_data = {}
for data_dir in glob.glob('./HMP_Dataset/*/'):
    trial_data = []
    if len(glob.glob(data_dir + '/*.txt')) > 5:
        for data_file in glob.glob(data_dir + '/*.txt'):
            trial_data.append(np.genfromtxt(data_file))

        trial_data = [bits2acc(data) for data in trial_data]

        activity = data_dir[14:-1].replace('_',' ')
        all_data[activity] = trial_data


In [3]:
#How long is each activity?
f,axs = plt.subplots(5,3)
for count,(activity, trials_data) in enumerate(all_data.items()):
    durs = [len(dat)/fs for dat in trials_data]
    axs.ravel()[count].hist(durs)
    axs.ravel()[count].set_title(activity + ': ' + str(len(trials_data)))
f.subplots_adjust(hspace=0.5,bottom=.05)

OK, so the number of examples for each class varies wildly. We'll have to deal with that. Duration is pretty inconsistent too, but the values are reasonable for the tasks

In [6]:
def displayTrial(trial_data):
    f,(ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True)
    y = trial_data[:,0]
    x = np.arange(len(y))/fs
    ax1.plot(x,y)
    ax1.set_ylabel('x')
    ax2.plot(x,trial_data[:,1])
    ax2.set_ylabel('y')
    ax3.plot(x,trial_data[:,2])
    ax3.set_ylabel('z')
    ax3.set_xlabel('time (s)')

for activity, trials_data in all_data.items():
    displayTrial(trials_data[0])
    plt.suptitle(activity + ' acceleration')

From looking that the examples, it appears that an FFT might provide useful features for classification, particularly tasks like brushing teeth and combing hair that might resemble movements of construction workers

In [9]:
all_data_fft = {}

# do fft for each trial.
for activity, trials_data in all_data.items():
    all_data_fft[activity] = [fft.rfft(trial_data,axis=0) for trial_data in trials_data]
    

def displayFFTtrial(trial_data):
    f,(ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True)
    y = trial_data[:,0]
    x = fft.rfftfreq(shape(y)[0], d=1./fs)
    ax1.plot(x,y)
    ax1.set_ylabel('x')
    ax2.plot(x,trial_data[:,1])
    ax2.set_ylabel('y')
    ax3.plot(x,trial_data[:,2])
    ax3.set_ylabel('z')
    ax3.set_xlabel('frequency (s)')

for activity, trials_data in all_data_fft.items():
    displayFFTtrial(trials_data[0])
    plt.suptitle(activity + ' fourier')

AttributeError: 'function' object has no attribute 'rfft'

TODO: Build classifier

In [68]:
from itertools import tee
from future_builtins import zip
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)

def get_fft_feats(trial_data,fs = 32.0, freq_res = 0.2,freq_max = 8.001,combine=False):
    if combine:
        trial_data = np.linalg.norm(trial_data,axis=1)
    fourier = fft(trial_data,axis=0)
    xf = fftfreq(trial_data.shape[0],1/fs)
    xf_bins = np.arange(0,freq_max,freq_res)
    f_bins = []
    for freq_bin_min,freq_bin_max in pairwise(xf_bins):
        these_f = np.where((xf < freq_bin_max) & (xf > freq_bin_min))
        f_bins.append(np.nanmean(np.abs(np.real(fourier[these_f,:])),1))
    f_bins = np.concatenate(f_bins)
    
    xf_bins = xf_bins[:-1] + (xf_bins[1]-xf_bins[0])/2
    return xf_bins,f_bins.ravel()

trial_data = all_data['Climb stairs'][0]
xf_bins,f_bins = get_fft_feats(trial_data)

In [69]:
# rearrange data for classifier
X = []
y = []
labels = []
for count,(activity, trials_data) in enumerate(all_data.items()):
    X.append([get_fft_feats(trial_data)[1] for trial_data in trials_data])
    y.append(count*np.ones(np.shape(trials_data)))
    labels.append(activity)
X = np.concatenate(X)
y = np.concatenate(y)
labels = np.array(labels)

In [73]:
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import Imputer
imp = Imputer(missing_values='NaN', strategy='mean', axis=0)


X_train, X_test, y_train, y_test = cross_validation.train_test_split \
(X, y, test_size=0.3, random_state=0)

imp.fit(X_train)
clf = svm.SVC(kernel='linear', C=1).fit(imp.transform(X_train), y_train)
y_pred = clf.predict(imp.transform(X_test))  

C = confusion_matrix(y_test, y_pred)

In [80]:
import scipy.cluster.hierarchy as sch
Z= sch.linkage(1-C)
Z2 = sch.dendrogram(Z)
C = C[:,Z2['leaves']]
C = C[Z2['leaves'],:]

fig = plt.figure(figsize=(8,8))
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
sch.dendrogram(Z)
ax2.set_xticks([])
ax2.set_yticks([])
ax2.set_title('score: ' + str(clf.score(imp.transform(X_test),y_test)))


axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
im = axmatrix.matshow(C, aspect='auto', origin='lower',cmap=plt.cm.YlGnBu)
axmatrix.set_yticks(np.arange(C.shape[0]))
axmatrix.set_yticklabels(labels[Z2['leaves']])
ax2.set_xticks([])

[]

In [None]:
plt.cm.YlGnBu