## Actitracker labeled dataset NN model
This model will use keras to predict activity from sessionized accelerometer data, via the actitracker dataset (http://www.cis.fordham.edu/wisdm/dataset.php). 

### Data Processing

In [1]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

#### Data load

In [3]:
DATA_FOLDER = 'Data/WISDM_ar_v1.1/'
DATA_FILE = 'raw_data_fixed.txt'
actitracker = pd.read_csv(
    DATA_FOLDER+DATA_FILE ,
    sep=',' ,
    lineterminator=';' ,
    header=None ,
)
actitracker.columns = [
    'user' ,
    'activity' ,
    'timestamp' ,
    'x-accel' ,
    'y-accel' ,
    'z-accel' ,
    'NA' ,
]
del actitracker['NA']
accel_cols = ['x-accel','y-accel','z-accel']

#### Sessionize 

In [4]:
# re-calculate time in seconds
actitracker['time_seconds'] = actitracker['timestamp']*10e-9

# sort by user and time 
actitracker = actitracker.sort_values(by=['user','time_seconds'])
actitracker['seq'] = xrange(actitracker.shape[0])
actitracker['session'] = actitracker.groupby(['user','activity'])['seq'].apply(lambda x: x%500 == 0).fillna(0).cumsum()

#### Gather labels

In [5]:
# get session_labels 
ohe = OneHotEncoder(sparse=False); le = LabelEncoder()
labels = actitracker.groupby(['user','session'])['activity'].apply(lambda x: max(x))
ohe_labels = ohe.fit_transform(le.fit_transform(labels).reshape(-1,1))
Y = pd.DataFrame(ohe_labels,index=labels.index)

#### Calculate features 

In [6]:
# group by user and session
accel_cols = ['x-accel','y-accel','z-accel']
g = actitracker.loc[:,accel_cols+['user','session']].groupby(['user','session'])

# IQR function
def iqr(x):
    ''' calculate IQR from array
    '''
    q75, q25 = np.percentile(x, [75,25])
    return q75-q25

# calculate model cols 
means = g[accel_cols].apply(lambda x: np.mean(x))
sds = g[accel_cols].apply(lambda x: np.std(x))
median_1 = g[accel_cols[0]].apply(lambda x: np.median(x))
median_2 = g[accel_cols[1]].apply(lambda x: np.median(x))
median_3 = g[accel_cols[2]].apply(lambda x: np.median(x))
iqr_1 = g[accel_cols[0]].apply(lambda x: iqr(x))
iqr_2 = g[accel_cols[1]].apply(lambda x: iqr(x))
iqr_3 = g[accel_cols[2]].apply(lambda x: iqr(x))
mins = g[accel_cols].apply(lambda x: np.min(x))
maxs = g[accel_cols].apply(lambda x: np.max(x))
kurtosis_1 = g[accel_cols[0]].apply(lambda x: sp.stats.kurtosis(x))
kurtosis_2 = g[accel_cols[1]].apply(lambda x: sp.stats.kurtosis(x))
kurtosis_3 = g[accel_cols[2]].apply(lambda x: sp.stats.kurtosis(x))
skew_1 = g[accel_cols[0]].apply(lambda x: sp.stats.skew(x))
skew_2 = g[accel_cols[1]].apply(lambda x: sp.stats.skew(x))
skew_3 = g[accel_cols[2]].apply(lambda x: sp.stats.skew(x))
percentiles = []
for i in range(10,100,10):
    for e in range(1,4):
        percentiles.append(eval('g[accel_cols['+str(e-1)+']].apply(lambda x: sp.percentile(x,'+str(i)+'))'))

# concat columns
X = pd.concat([means,
               sds,
               median_1,
               median_2,
               median_3,
               iqr_1,
               iqr_2,
               iqr_3,
               mins,
               maxs,
               kurtosis_1,
               kurtosis_2,
               kurtosis_3,
               skew_1,
               skew_2,
               skew_3,
              ]+percentiles
              ,axis=1)

# Scale data
ss = StandardScaler()
X = ss.fit_transform(X)

#### Split test and train

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y.as_matrix(), test_size=0.33, random_state=22)

### Baseline LR Model

In [16]:
from sklearn.linear_model import LogisticRegression

def train_models(model_params):
    ''' Train models iteratively 
        for each class
    '''
    models = [] 
    for i in xrange(Y_train.shape[1]):
        model = LogisticRegression(**model_params)
        y = Y_train[:,i]
        model.fit(X_train, y)
        models.append(model)
    return models

def make_predictions(models):
    ''' Make predictions 
        for each class 
    '''
    predictions = np.zeros(Y_test.shape)
    for i, model in enumerate(models):
        p = model.predict_proba(X_test)
        predictions[:,i] = p[:,1]
    return predictions

#### Evaluate model params

In [20]:
c_values = [ 0.1, 1.0, 5.0, 10.0, 15.0, 20.0, 50.0, 100.0, 200.0, 500.0, 1000.0 ]
accuracies = []
log_losses = []
for c in c_values:
    params = {'C':c,'max_iter':1000,'tol':1e-8}
    models = train_models(params)
    predictions = make_predictions(models)
    accuracy = accuracy_score(np.argmax(Y_test, axis=1), np.argmax(predictions,axis=1))
    ll = log_loss(Y_test, predictions)
    accuracies.append(accuracy)
    log_losses.append(ll)

In [21]:
pd.DataFrame({'C':c_values,'accuracy':accuracies,'log_loss':log_losses})

Unnamed: 0,C,accuracy,log_loss
0,0.1,0.765265,0.734195
1,1.0,0.797829,0.631007
2,5.0,0.814111,0.640054
3,10.0,0.815468,0.645282
4,15.0,0.815468,0.650433
5,20.0,0.812754,0.654876
6,50.0,0.818182,0.672025
7,100.0,0.820896,0.686932
8,200.0,0.819539,0.702896
9,500.0,0.816825,0.718873


#### Train final model

In [22]:
params = {'C':15.0,'max_iter':1000,'tol':1e-8}
models = train_models(params)

#### Test prediction accuracy

In [23]:
predictions = make_predictions(models)

In [24]:
print 'Accuracy: {}'.format(accuracy_score(np.argmax(Y_test, axis=1), np.argmax(predictions,axis=1)) )
print 'Log-loss: {}'.format(log_loss(Y_test, predictions))

Accuracy: 0.815468113976
Log-loss: 0.650432628271


### MLP Neural Network Model

In [3]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

#### Compile model

In [33]:
model = Sequential()

indim = X.shape[1]
model.add(Dense(512*4, input_dim=indim, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(512*12, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(512*4, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(6, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

#### Execute model

In [34]:
model.fit(X_train, Y_train,
          nb_epoch=20,
          batch_size=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x11cfddd90>

In [35]:
predictions = model.predict(X_test)
print 'Accuracy: {}'.format(accuracy_score(np.argmax(Y_test, axis=1), np.argmax(predictions,axis=1)) )
print 'Log-loss: {}'.format(log_loss(Y_test, predictions))

Accuracy: 0.84803256445
Log-loss: 0.534963186299


### RNN

In [8]:
from keras.layers.core import Flatten
from keras.layers.recurrent import GRU

Using Theano backend.


In [18]:
nnum = (actitracker.shape[0]//500)
lim = nnum * 500
X = actitracker.iloc[:lim,:][['x-accel','y-accel','z-accel']].as_matrix()
Y = actitracker.iloc[:lim,:].groupby(['session'])['activity'].apply(lambda x: max(x))
Y = ohe.fit_transform(le.fit_transform(Y.iloc[:lim]).reshape(-1,1))
X = X[:lim].T.reshape((nnum, 500, 3))

In [None]:
model = Sequential()
model.add(GRU(32, return_sequences=True, input_dim=3, input_length=500, dropout_W=.5, dropout_U=.5))
model.add(GRU(32, return_sequences=True, dropout_W=.5, dropout_U=.5))
model.add(GRU(32, dropout_W=.5, dropout_U=.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(6))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
indices = np.random.permutation(nnum)
train, test = indices[:(nnum//3)*2], indices[(nnum//3)*2:]

In [None]:
model.fit(X[train], Y[train],
          nb_epoch=20,
          batch_size=20)

In [None]:
training_scores = model.evaluate(training_data, training_result)
test_scores = model.evaluate(test_data, test_result)
print("training: %s: %.2f%%" % (model.metrics_names[1], training_scores[1]*100))
print("test: %s: %.2f%%" % (model.metrics_names[1], test_scores[1]*100))