#Preparing data for input into the hmm["activity"] models

In [1]:
from __future__ import division

%matplotlib inline
import pandas as pd
import thinkdsp
import thinkplot
import numpy as np

from magnitude import magnitude
from pipeline import preprocess, extract_features_with_sliding_window, learn

In [2]:
#Ryan's fancy way of inputting data in an easier way
data_dict = {'walking':{},'jogging':{},'upstairs':{},'downstairs':{}}
names = ['meg','ryan','dennis']
acts = ['walking', 'jogging', 'upstairs', 'downstairs']
for name in names:
    data_file_names = ['data/{}_{}_long.csv'.format(name, activity) for activity in acts]
    for i,file in enumerate(data_file_names):
        df = pd.read_csv(file)
        data_dict[acts[i]][name] = df

In [3]:
data_dict["walking"]["meg"].head()

Unnamed: 0,x,y,z,time
0,0.493804,2.130241,8.994417,1430067490092
1,0.135272,1.395221,7.765593,1430067490265
2,-2.08535,2.178125,9.363723,1430067490445
3,-2.765303,1.742979,9.216479,1430067490625
4,-1.693299,-0.641047,10.671555,1430067490805


In [4]:
feature_dict = {}
for activity, activity_data_dict in data_dict.iteritems():
    print "Activity: {}".format(activity)
    feature_dict[activity] = {}
    for person, person_data in activity_data_dict.iteritems():
        print "Person: {}".format(person)
        print "Person Data: \n {}".format(person_data.head())
        
        a_norm = preprocess(person_data)
        obs = extract_features_with_sliding_window(a_norm, n_windows=10)
        feature_dict[activity][person] = obs
    print

Activity: walking
Person: meg
Person Data: 
           x         y          z           time
0  0.493804  2.130241   8.994417  1430067490092
1  0.135272  1.395221   7.765593  1430067490265
2 -2.085350  2.178125   9.363723  1430067490445
3 -2.765303  1.742979   9.216479  1430067490625
4 -1.693299 -0.641047  10.671555  1430067490805
Person: dennis
Person Data: 
           x         y         z           time
0 -4.590283 -8.205527  1.548450  1430066134259
1 -4.669292 -8.217499  2.177526  1430066134439
2 -4.509479 -8.155848  2.326566  1430066134620
3 -4.317344 -8.166023  2.051831  1430066134800
4 -4.507683 -8.309077  2.343325  1430066134980
Person: ryan
Person Data: 
           x         y         z           time
0 -4.590283 -8.205527  1.548450  1430066134259
1 -4.669292 -8.217499  2.177526  1430066134439
2 -4.509479 -8.155848  2.326566  1430066134620
3 -4.317344 -8.166023  2.051831  1430066134800
4 -4.507683 -8.309077  2.343325  1430066134980

Activity: downstairs
Person: meg
Person Data

Here is the matrix that represents the features extracted for a sequence of windows.  Let's call this matrix $X$

In [5]:
feature_dict['downstairs']['dennis']

[array([[  0.33333333,  12.87909771],
        [  0.33333333,  36.88440099],
        [  0.33333333,  36.5647144 ],
        [  0.25      ,  21.97315802],
        [  0.2       ,  34.39416802],
        [  0.2       ,  28.47431305],
        [  0.25      ,  24.86148643],
        [  0.14285714,  21.78608675],
        [  0.09090909,  11.73977745],
        [  0.08333333,   3.53140031]]), array([[  0.33333333,  36.88440099],
        [  0.33333333,  36.5647144 ],
        [  0.25      ,  21.97315802],
        [  0.2       ,  34.39416802],
        [  0.2       ,  28.47431305],
        [  0.25      ,  24.86148643],
        [  0.14285714,  21.78608675],
        [  0.09090909,  11.73977745],
        [  0.08333333,   3.53140031],
        [  1.        ,   0.21724057]]), array([[  0.33333333,  36.5647144 ],
        [  0.25      ,  21.97315802],
        [  0.2       ,  34.39416802],
        [  0.2       ,  28.47431305],
        [  0.25      ,  24.86148643],
        [  0.14285714,  21.78608675],
        [ 

Check out the ```learn``` function in ```pipeline.py```: the real magic takes place there.  After training on the features, a dictionary of hidden markov models is returned (```hidden_markov_models```).  These four models can give us a (log)likelihood that a new sequence belongs to the activity they model. The model with the maximum likelihood will tell us which activity is happening.

In [6]:
hidden_markov_models = learn(feature_dict)

Plug in different strings for the feature dict, to make a score of how likely the sequence, for a particular activity and user, be represented by the activity model.  Right now, we are training and testing on the same dataset.

In [13]:
actual_activity = 'downstairs'
actual_person = 'dennis'

max_val = predicted_score
max_act = "none"

for activity in acts:
    print activity
    predicted_score = hidden_markov_models[activity].score(feature_dict[actual_activity][actual_person][0])
    print predicted_score
    if predicted_score >= max_val:
        max_val = predicted_score
        max_act = activity

walking
-31.4626921909
jogging
-36.0912030657
upstairs
-43.5830694044
downstairs
-30.9317716329


In [19]:
print("The predicted activity is: %s"%max_act)
print("The actual activity is: %s"%actual_activity)

if str(max_act) == str(actual_activity): 
    print "The model predicted the right activity!"

The predicted activity is: downstairs
The actual activity is: downstairs
The model predicted the right activity!


Now that we've trained our model and verified that it works on the same dataset, we'll plug in new, unseen data.

In [39]:
#using the inputting code used earlier to input data from 'someone' and 'jen'
data_dict = {'walking':{},'jogging':{},'upstairs':{},'downstairs':{}}
names = ['someone','jen']
acts = ['walking', 'jogging', 'upstairs', 'downstairs']
for name in names:
    data_file_names = ['data/{}_{}_long.csv'.format(name, activity) for activity in acts]
    for i,file in enumerate(data_file_names):
        df = pd.read_csv(file)
        data_dict[acts[i]][name] = df

In [40]:
feature_dict = {}
for activity, activity_data_dict in data_dict.iteritems():
    print "Activity: {}".format(activity)
    feature_dict[activity] = {}
    for person, person_data in activity_data_dict.iteritems():
        print "Person: {}".format(person)
        print "Person Data: \n {}".format(person_data.head())
        
        a_norm = preprocess(person_data)
        obs = extract_features_with_sliding_window(a_norm, n_windows=10)
        feature_dict[activity][person] = obs
    print

Activity: walking
Person: someone
Person Data: 
          x        y        z          time
0 -8.77995 -7.46541  0.63765  1.430320e+12
1 -5.10120 -8.34831 -1.15758  1.430320e+12
2 -3.37464 -9.15273  1.21644  1.430320e+12
3 -2.14839 -9.27045  0.98100  1.430320e+12
4 -5.70942 -5.55246  0.29430  1.430320e+12
Person: jen
Person Data: 
           x         y        z          time
0   3.29616 -1.795230  9.23121  1.430320e+12
1 -10.12392 -7.926480  8.04420  1.430320e+12
2  -3.95343 -8.887859  0.22563  1.430320e+12
3  -4.37526 -7.848000 -0.07848  1.430320e+12
4  -3.79647 -5.130630  2.59965  1.430320e+12

Activity: downstairs
Person: someone
Person Data: 
          x        y        z          time
0 -7.45560 -5.38569  2.05029  1.430320e+12
1 -7.72047 -5.04234  1.54998  1.430320e+12
2 -8.11287 -5.65056  2.12877  1.430320e+12
3 -8.49546 -5.72904  1.93257  1.430320e+12
4 -7.61256 -5.65056  2.43288  1.430320e+12
Person: jen
Person Data: 
          x         y        z          time
0 -4.60089 -8.

Now, plug in different strings for the feature dict, to make a score of how likely the sequence, for a particular unseen activity and user, be represented by the activity model.

In [48]:
actual_activity = 'downstairs'
actual_person = 'jen'

max_val = predicted_score
max_act = "none"

for activity in acts:
    print activity
    predicted_score = hidden_markov_models[activity].score(feature_dict[actual_activity][actual_person][0])
    print predicted_score
    if predicted_score >= max_val:
        max_val = predicted_score
        max_act = activity

walking
78.5004674645
jogging
-60.1345661719
upstairs
66.1211556881
downstairs
70.8541275483


In [49]:
print("The predicted activity is: %s"%max_act)
print("The actual activity is: %s"%actual_activity)

if str(max_act) == str(actual_activity): 
    print "The model predicted the right activity!"

The predicted activity is: walking
The actual activity is: downstairs
