#Preparing data for input into the hmm["activity"] models

In [50]:
from __future__ import division

%matplotlib inline
import pandas as pd
import thinkdsp
import thinkplot
import numpy as np

from magnitude import magnitude
from pipeline import preprocess, extract_features_with_sliding_window, learn

In [51]:
#Ryan's fancy way of inputting data in an easier way
data_dict = {'walking':{},'jogging':{},'upstairs':{},'downstairs':{}}
names = ['meg','ryan','dennis']
acts = ['walking', 'jogging', 'upstairs', 'downstairs']
for name in names:
    data_file_names = ['data/{}_{}_long.csv'.format(name, activity) for activity in acts]
    for i,file in enumerate(data_file_names):
        df = pd.read_csv(file)
        data_dict[acts[i]][name] = df

In [52]:
data_dict["walking"]["meg"].head()

Unnamed: 0,x,y,z,time
0,0.493804,2.130241,8.994417,1430067490092
1,0.135272,1.395221,7.765593,1430067490265
2,-2.08535,2.178125,9.363723,1430067490445
3,-2.765303,1.742979,9.216479,1430067490625
4,-1.693299,-0.641047,10.671555,1430067490805


In [53]:
feature_dict = {}
for activity, activity_data_dict in data_dict.iteritems():
    print "Activity: {}".format(activity)
    feature_dict[activity] = {}
    for person, person_data in activity_data_dict.iteritems():
        print "Person: {}".format(person)
        print "Person Data: \n {}".format(person_data.head())
        
        a_norm = preprocess(person_data)
        obs = extract_features_with_sliding_window(a_norm, n_windows=10)
        feature_dict[activity][person] = obs
    print

Activity: walking
Person: meg
Person Data: 
           x         y          z           time
0  0.493804  2.130241   8.994417  1430067490092
1  0.135272  1.395221   7.765593  1430067490265
2 -2.085350  2.178125   9.363723  1430067490445
3 -2.765303  1.742979   9.216479  1430067490625
4 -1.693299 -0.641047  10.671555  1430067490805
Person: dennis
Person Data: 
           x         y         z           time
0 -4.590283 -8.205527  1.548450  1430066134259
1 -4.669292 -8.217499  2.177526  1430066134439
2 -4.509479 -8.155848  2.326566  1430066134620
3 -4.317344 -8.166023  2.051831  1430066134800
4 -4.507683 -8.309077  2.343325  1430066134980
Person: ryan
Person Data: 
           x         y         z           time
0 -4.590283 -8.205527  1.548450  1430066134259
1 -4.669292 -8.217499  2.177526  1430066134439
2 -4.509479 -8.155848  2.326566  1430066134620
3 -4.317344 -8.166023  2.051831  1430066134800
4 -4.507683 -8.309077  2.343325  1430066134980

Activity: downstairs
Person: meg
Person Data

In [66]:
obs

[array([[  1.        ,  61.05083768],
        [  1.        ,  46.13096416],
        [  1.        ,  47.11071957],
        [  1.        ,  47.01652024],
        [  1.        ,  47.17293157],
        [  1.        ,  47.32934291],
        [  1.        ,  47.48575424],
        [  1.        ,  47.64216557],
        [  1.        ,  47.61568643],
        [  1.        ,  50.70559203]]), array([[  1.        ,  46.13096416],
        [  1.        ,  47.11071957],
        [  1.        ,  47.01652024],
        [  1.        ,  47.17293157],
        [  1.        ,  47.32934291],
        [  1.        ,  47.48575424],
        [  1.        ,  47.64216557],
        [  1.        ,  47.61568643],
        [  1.        ,  50.70559203],
        [  1.        ,  44.92325057]]), array([[  1.        ,  47.11071957],
        [  1.        ,  47.01652024],
        [  1.        ,  47.17293157],
        [  1.        ,  47.32934291],
        [  1.        ,  47.48575424],
        [  1.        ,  47.64216557],
        [ 

Here is the matrix that represents the features extracted for a sequence of windows.  Let's call this matrix $X$

In [54]:
feature_dict['downstairs']['meg']

[array([[   0.33333333,  112.70093003],
        [   1.        ,   68.1630001 ],
        [   1.        ,   53.40477261],
        [   1.        ,   90.05741769],
        [   1.        ,   77.3861749 ],
        [   1.        ,   77.91277932],
        [   1.        ,   36.48374081],
        [   1.        ,   44.91747423],
        [   1.        ,   45.01799548],
        [   1.        ,   45.11851673]]), array([[  1.        ,  68.1630001 ],
        [  1.        ,  53.40477261],
        [  1.        ,  90.05741769],
        [  1.        ,  77.3861749 ],
        [  1.        ,  77.91277932],
        [  1.        ,  36.48374081],
        [  1.        ,  44.91747423],
        [  1.        ,  45.01799548],
        [  1.        ,  45.11851673],
        [  1.        ,  45.21903799]]), array([[  1.        ,  53.40477261],
        [  1.        ,  90.05741769],
        [  1.        ,  77.3861749 ],
        [  1.        ,  77.91277932],
        [  1.        ,  36.48374081],
        [  1.        ,  44.9

Check out the ```learn``` function in ```pipeline.py```: the real magic takes place there.  After training on the features, a dictionary of hidden markov models is returned (```hidden_markov_models```).  These four models can give us a (log)likelihood that a new sequence belongs to the activity they model. The model with the maximum likelihood will tell us which activity is happening.

In [55]:
hidden_markov_models = learn(feature_dict)

Plug in different strings for the feature dict, to make a score of how likely the sequence, for a particular activity and user, be represented by the activity model.  Right now, we are training and testing on the same dataset.

In [65]:
actual_activity = 'upstairs'
actual_person = 'ryan'

correct = 0
max_win_set = len(feature_dict[actual_activity][actual_person])

# Score the models for all sets of windows
for i in range(max_win_set):
    
    max_val = -np.inf
    max_act = "none"

    # For each activity
    for activity in acts:
        #print activity
    
        predicted_score = hidden_markov_models[activity].score(feature_dict[actual_activity][actual_person][i])
        if predicted_score >= max_val:
            max_val = predicted_score
            max_act = activity
        #print("The predicted activity is: %s"%max_act)
        #print("The actual activity is: %s"%actual_activity)

    if str(max_act) == str(actual_activity): 
        correct += 1
        print i
        print "The model predicted the right activity!"

print "The model predicted %d out of %d activities correctly" %(correct,max_win_set)

29
The model predicted the right activity!
30
The model predicted the right activity!
31
The model predicted the right activity!
32
The model predicted the right activity!
33
The model predicted the right activity!
34
The model predicted the right activity!
35
The model predicted the right activity!
36
The model predicted the right activity!
37
The model predicted the right activity!
38
The model predicted the right activity!
39
The model predicted the right activity!
40
The model predicted the right activity!
41
The model predicted the right activity!
42
The model predicted the right activity!
43
The model predicted the right activity!
44
The model predicted the right activity!
45
The model predicted the right activity!
46
The model predicted the right activity!
47
The model predicted the right activity!
48
The model predicted the right activity!
49
The model predicted the right activity!
50
The model predicted the right activity!
51
The model predicted the right activity!
52
The mode

Now that we've trained our model and verified that it works on the same dataset, we'll plug in new, unseen data.

In [2]:
#using the inputting code used earlier to input data from 'someone' and 'jen'
data_dict = {'walking':{},'jogging':{},'upstairs':{},'downstairs':{}}
names = ['dennis']
acts = ['walking', 'jogging', 'upstairs', 'downstairs']
for name in names:
    data_file_names = ['data/{}_{}_long.csv'.format(name, activity) for activity in acts]
    for i,file in enumerate(data_file_names):
        df = pd.read_csv(file)
        data_dict[acts[i]][name] = df

NameError: name 'pd' is not defined

In [3]:
feature_dict = {}
for activity, activity_data_dict in data_dict.iteritems():
    print "Activity: {}".format(activity)
    feature_dict[activity] = {}
    for person, person_data in activity_data_dict.iteritems():
        print "Person: {}".format(person)
        print "Person Data: \n {}".format(person_data.head())
        
        a_norm = preprocess(person_data)
        obs = extract_features_with_sliding_window(a_norm, n_windows=10)
        feature_dict[activity][person] = obs
    print

Activity: walking

Activity: downstairs

Activity: jogging

Activity: upstairs



Now, plug in different strings for the feature dict, to make a score of how likely the sequence, for a particular unseen activity and user, be represented by the activity model.

In [1]:
actual_activity = 'jogging'
actual_person = 'dennis'

max_val = predicted_score
max_act = "none"

for activity in acts:
    print activity
    predicted_score = hidden_markov_models[activity].score(feature_dict[actual_activity][actual_person][3])
    print predicted_score
    if predicted_score >= max_val:
        max_val = predicted_score
        max_act = activity

NameError: name 'predicted_score' is not defined

In [None]:
print("The predicted activity is: %s"%max_act)
print("The actual activity is: %s"%actual_activity)

if str(max_act) == str(actual_activity): 
    print "The model predicted the right activity!"