# Activity Classification

In [1]:
import numpy as np
import pandas as pd
import sklearn as skl
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

## 1. Build classifier
- **Load data samples**

In [2]:
data = pd.read_csv('../server/data/sampleXYZ.csv')

print("Shape of Data: " + str(data.shape))
data[:5]

Shape of Data: (4500, 21)


Unnamed: 0,Time stamp,X,Y,Z,Activity,X.1,Y.1,Z.1,Activity .1,X.2,...,Z.2,Activity .2,X.3,Y.3,Z.3,Activity .3,X.4,Y.4,Z.4,Activity .4
0,0.0,-1.44,6.97,6.67,1,-5.46,7.24,3.51,2,4.88,...,1.77,3,8.64,-12.19,4.34,4,14.13,-1.27,7.43,5
1,0.02,-1.5,6.9,6.62,1,-5.52,7.34,3.48,2,5.12,...,1.83,3,11.32,-12.78,7.61,4,19.61,1.19,7.88,5
2,0.04,-1.5,6.9,6.61,1,-5.42,7.3,3.73,2,5.34,...,2.15,3,9.85,-11.01,9.9,4,18.61,-2.27,9.45,5
3,0.06,-1.5,6.77,6.84,1,-5.43,7.4,3.4,2,5.95,...,2.32,3,7.23,-11.42,8.59,4,12.25,-3.78,8.55,5
4,0.08,-1.47,7.11,6.91,1,-5.43,7.36,3.39,2,6.52,...,2.6,3,5.44,-10.62,4.74,4,8.67,-2.3,5.67,5


- **Reshape data formats**

In [3]:
act1 = data[:][['X', 'Y', 'Z', 'Activity ']].rename(columns = { 'X':'X', 'Y':'Y', 'Z':'Z', 'Activity ':'Activity'})
act2 = data[:][['X.1', 'Y.1', 'Z.1', 'Activity .1']].rename(columns = { 'X.1':'X', 'Y.1':'Y', 'Z.1':'Z', 'Activity .1':'Activity'})
act3 = data[:][['X.2', 'Y.2', 'Z.2', 'Activity .2']].rename(columns = { 'X.2':'X', 'Y.2':'Y', 'Z.2':'Z', 'Activity .2':'Activity'})
act4 = data[:][['X.3', 'Y.3', 'Z.3', 'Activity .3']].rename(columns = { 'X.3':'X', 'Y.3':'Y', 'Z.3':'Z', 'Activity .3':'Activity'})
act5 = data[:][['X.4', 'Y.4', 'Z.4', 'Activity .4']].rename(columns = { 'X.4':'X', 'Y.4':'Y', 'Z.4':'Z', 'Activity .4':'Activity'})

reshaped = pd.concat([act1, act2, act3, act4, act5], axis = 0)

print("Reshaped data size: " + str(reshaped.shape))
reshaped[:5]

Reshaped data size: (22500, 4)


Unnamed: 0,X,Y,Z,Activity
0,-1.44,6.97,6.67,1
1,-1.5,6.9,6.62,1
2,-1.5,6.9,6.61,1
3,-1.5,6.77,6.84,1
4,-1.47,7.11,6.91,1


- **Normalize data**

In [4]:
X = reshaped[['X', 'Y', 'Z']].values
Y = reshaped[['Activity']].values
mean = np.mean(X, axis=0).reshape(1,3)
std = np.std(X, axis=0).reshape(1,3)
X = (X - mean)/std

print(X.shape, Y.shape)

(22500, 3) (22500, 1)


- **Split data set into training set and test set**

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)

- **Train SVM classifier**

In [6]:
clf = svm.SVC(kernel = 'rbf', gamma = 0.7)
clf.fit(X_train, Y_train)

  y = column_or_1d(y, warn=True)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.7, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

- **Test model performance**

In [7]:
Y_train_pred = clf.predict(X_train)
Y_test_pred = clf.predict(X_test)
train_error = 1 - accuracy_score(Y_train, Y_train_pred)
test_error = 1 - accuracy_score(Y_test, Y_test_pred)

print("Training Set Error Rate: " + str(train_error))
print("Test Set Error Rate: " + str(test_error))

Training Set Error Rate: 0.0724444444444
Test Set Error Rate: 0.0693333333333


## 2. Predict Experiment Data
- **Load experiment data**

In [8]:
samples = pd.read_csv('../server/data/features_walking.csv')

print("Shape of Data: " + str(samples.shape))
samples[:5]

Shape of Data: (14000, 8)


Unnamed: 0,IR,RED,heart_rate,respiration_rate,SPO2,X,Y,Z
0,118885,100363,92.307692,17.647059,80.151701,-0.088072,0.007006,0.956777
1,118913,100314,92.307692,17.647059,80.151701,-0.045037,0.039032,0.933759
2,118642,100268,92.307692,17.647059,80.151701,-0.032026,0.096078,0.831676
3,117757,100050,92.307692,17.647059,80.151701,0.005004,0.08607,0.82467
4,117224,99614,92.307692,17.647059,80.151701,0.006005,0.106086,0.837681


- **Reshape and Normalize data**

In [9]:
X_exp = samples[:][['X','Y','Z']].values
mean = np.mean(X_exp, axis=0).reshape(1,3)
std = np.std(X_exp, axis=0).reshape(1,3)
X_exp = (X_exp - mean)/std
Y_exp = np.ones((X_exp.shape[0], 1)) * 3

print("Shape of input: " + str(X_exp.shape))
print(X_exp)
print("Shape of output: " + str(Y_exp.shape))
print(Y_exp)

Shape of input: (14000, 3)
[[-0.45777321  0.01377021 -0.37108304]
 [-0.03798944  0.30395092 -0.48184501]
 [ 0.08892602  0.82083247 -0.97306553]
 ..., 
 [-0.13561221  0.78456215  0.5391207 ]
 [-0.58467891  1.04753332  0.50540802]
 [-0.49682037  1.21076337  1.32892409]]
Shape of output: (14000, 1)
[[ 3.]
 [ 3.]
 [ 3.]
 ..., 
 [ 3.]
 [ 3.]
 [ 3.]]


- **Calculate experiment error rate**

In [10]:
Y_exp_pred = clf.predict(X_exp)
exp_error = 1 - accuracy_score(Y_exp, Y_exp_pred)

print("Experiment Error Rate: " + str(exp_error))

Experiment Error Rate: 0.960571428571


- **Output Predictions**

In [11]:
np.savetxt("../server/data/predictions.csv", Y_exp_pred, delimiter=",")