In [1]:
def get_samples(data_points, points_per_sample):
    return [data_points[i:i+points_per_sample] for i in range(0, len(data_points), points_per_sample)]

In [2]:
import csv
import math
import pandas as pd
import matplotlib.pyplot as plt

POINTS_PER_SAMPLE = 1000

activities = ["sitting", "walking", "walking_holding_phone", "stairs", "car"]

activity_samples = {}    # Mapping from activity to list of 10 second data frames
for activity in activities:
    activity_samples[activity] = get_samples(pd.read_csv(activity + '.csv'), POINTS_PER_SAMPLE)
    print "Found", len(activity_samples[activity]), "samples for", activity

Found 30 samples for sitting
Found 30 samples for walking
Found 30 samples for walking_holding_phone
Found 30 samples for stairs
Found 30 samples for car


In [3]:
print activity_samples['walking'][0].mean()

timestamp          331235.095000
attitude_roll           1.228910
attitude_pitch         -1.301880
attitude_yaw            0.128596
rotation_rate_x        -0.002250
rotation_rate_y        -0.124872
rotation_rate_z        -0.115039
gravity_x               0.118031
gravity_y               0.952927
gravity_z               0.006355
user_acc_x              0.023014
user_acc_y              0.024710
user_acc_z              0.083059
dtype: float64


In [4]:
import numpy.fft as fft
import numpy as np

ACCELERATION_FIELDS = ['user_acc_x', 'user_acc_y', 'user_acc_z']
ATTITUDE_FIELDS = ['attitude_roll', 'attitude_pitch', 'attitude_yaw']
ROTATION_RATE_FIELDS = ['rotation_rate_x', 'rotation_rate_y', 'rotation_rate_z']
GRAVITY_FIELDS = ['gravity_x', 'gravity_y', 'gravity_z']

FIELDS = ACCELERATION_FIELDS + ATTITUDE_FIELDS + ROTATION_RATE_FIELDS + GRAVITY_FIELDS

activity_features = {}    # Mapping from activity to list of samples' features
for activity in activities:
    activity_features[activity] = []
    for sample in activity_samples[activity]:
        sample_features = [sample.mean()[field] for field in FIELDS]
        sample_features += [sample.var()[field] for field in FIELDS]
        #sample_features += [np.argmax(np.abs(fft.fft(sample[field]))) for field in FIELDS]
        #sample_features += [max(np.abs(fft.fft(sample[field]))) for field in FIELDS]
        activity_features[activity].append(sample_features)

X = []
X_train = []
X_test = []
for activity in activities:
    X += activity_features[activity]
    X_train += activity_features[activity][:26]
    X_test += activity_features[activity][26:]

# Generate labels
y = [int(math.floor(float(i) / 150 * 5)) for i in range(0, 150)]
y_train = [int(math.floor(float(i) / 130 * 5)) for i in range(0, 130)]
y_test = [int(math.floor(float(i) / 20 * 5)) for i in range(0, 20)]


In [5]:
from sklearn import cross_validation

# SVM
from sklearn import svm
svmClassifier = svm.SVC()
svmScores = cross_validation.cross_val_score(svmClassifier, X, y)
print("SVM: %0.2f (+/- %0.2f)" % (svmScores.mean(), svmScores.std() * 2))

# Decision Tree
from sklearn.tree import DecisionTreeClassifier
dtClassifier = DecisionTreeClassifier()
dtScores = cross_validation.cross_val_score(dtClassifier, X, y)
print("Decision Tree: %0.2f (+/- %0.2f)" % (dtScores.mean(), dtScores.std() * 2))

# Logisitic Regression
from sklearn.linear_model import LogisticRegression
lrClassifier = LogisticRegression()
lrScores = cross_validation.cross_val_score(lrClassifier, X, y)
print("Decision Tree: %0.2f (+/- %0.2f)" % (lrScores.mean(), lrScores.std() * 2))

SVM: 0.75 (+/- 0.07)
Decision Tree: 0.99 (+/- 0.02)
Decision Tree: 0.81 (+/- 0.08)
