In [18]:
import mlflow
import pandas as pd
import numpy as np

import argparse
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
from tensorflow.keras.datasets.mnist import load_data
from ray import tune
import os

idx = pd.IndexSlice



# Handling Data

In [19]:
# INPUTS
DATA_PATH = '/Users/camilovelasquez/Desktop/Documents/Datasets/WISDM-Smartphones/wisdm-dataset/raw'
ids = np.arange(1600, 1650)
devices = ['phone']
sensors = ['accel']
activities = ['A', 'B']
time_taken = 3000
time_split = 100

TRAIN_BATCH_SIZE = 8
EVAL_BATCH_SIZE = 16
EPOCHS = 10

train_size = 2500
valid_size = 220
test_size = 220

In [20]:
def read_WISDM_data(DATA_PATH, ids=np.arange(1600, 1650), 
                    devices=['phone'], sensors=['accel']):
    """Read from DATA PATH and create a pandas table from it"""
    i = 0
    for current_id in ids:
        for current_device in devices:
            for current_sensor in sensors:
                file_path = os.path.join(DATA_PATH, current_device, current_sensor, 
                                         'data_{}_{}_{}.txt'.format(current_id, current_sensor, current_device))
                if i==0:
                    table = pd.read_csv(file_path, delimiter=',', 
                                        names=['ID', 'Activity Label', 'Timestamp', 'x', 'y', 'z'], 
                                        lineterminator='\n')
                else:
                    aux = pd.read_csv(file_path, delimiter=',', 
                                      names=['ID', 'Activity Label', 'Timestamp', 'x', 'y', 'z'], 
                                        lineterminator='\n')
                    table = pd.concat([table, aux], axis=0)
                i+=1
    table.loc[:,'z'] = table.z.str.replace(';','').astype(np.float32)
    return table

def transform_data(table, time_taken, time_split):
    """Transform data from raw table into a zip of (features, labels),
        where features has shape (samples, time_steps, features), and labels (samples,)"""
    table = table.set_index(['ID', 'Activity Label'])
    table = table.groupby(['ID', 'Activity Label']).head(time_taken)
    timestamp_edit = np.tile(np.arange(0,time_split), int(table.shape[0]/time_split))
    table['Timestamp'] = timestamp_edit
    table = table.reset_index().set_index(['ID', 'Activity Label', 'Timestamp'], append=True)
    features = table.values.reshape((int(table.shape[0]/time_split), time_split, table.shape[1]))
    labels = table.reset_index()['Activity Label']\
        .values[np.arange(0,int(table.shape[0]/time_split)*time_split, time_split)]
    return features, labels

def preprocessing_data(table, time_taken=3000, time_split=100, activities=['A', 'B']):
    """Preprocess table and convert it into tf dataset"""
    features, labels = transform_data(table, time_taken=time_taken, time_split=time_split)
    dataset = tf.data.Dataset.from_tensor_slices(({'feature': features}, {'label': labels}))
    ds = dataset.filter(lambda x, y: tf.reduce_any(tf.equal(y['label'], activities))==True)
    ds = ds.map(label2prob)
    return ds

def label2prob(feature, label):
    new_label = tf.where(tf.equal(label['label'], 'A'), 1, 0)
    label['label'] = new_label
    return feature, label

In [21]:
table = read_WISDM_data(DATA_PATH, ids=ids, devices=devices, sensors=sensors)
ds = preprocessing_data(table, time_taken=time_taken, time_split=time_split, activities=activities)

In [22]:
ds.reduce(0, lambda x,_: x+1)

<tf.Tensor: id=12259, shape=(), dtype=int32, numpy=2940>

# Creating Model

In [23]:
def build_model(time_split):
    inputs = tf.keras.Input(shape=(time_split, 3), name='feature')
    x = tf.keras.layers.BatchNormalization(axis=2)(inputs)
    x_1 = tf.keras.layers.Conv1D(filters=32, kernel_size=15)(x)
    x_1 = tf.keras.layers.GlobalMaxPool1D()(x_1)
    x_3 = tf.keras.layers.Conv1D(filters=32, kernel_size=31)(x)
    x_3 = tf.keras.layers.GlobalMaxPool1D()(x_3)
    x_5 = tf.keras.layers.Conv1D(filters=32, kernel_size=63)(x)
    x_5 = tf.keras.layers.GlobalMaxPool1D()(x_5)
    x = tf.keras.layers.Concatenate()([x_1, x_3, x_5])
    x = tf.keras.layers.Dense(16, activation='relu')(x)
    output = tf.keras.layers.Dense(1, activation='sigmoid', name='label')(x)
    model = tf.keras.Model(inputs=[inputs], outputs=output)
    return model

def build_optimizer():
    optimizer='adam'
    return optimizer

def build_loss():
    loss='binary_crossentropy'
    return loss

def build_metrics():
    metrics=['accuracy']
    return metrics

def compile_model(model, optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']):
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    return model

def train_model(model, train_ds=None, valid_ds=None):
    model.fit()
    

In [24]:
model = build_model(time_split=time_split)
optimizer = build_optimizer()
loss = build_loss()
metrics = build_metrics()
model = compile_model(model=model, optimizer=optimizer, loss=loss, metrics=metrics)

In [25]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
feature (InputLayer)            [(None, 100, 3)]     0                                            
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 100, 3)       12          feature[0][0]                    
__________________________________________________________________________________________________
conv1d_3 (Conv1D)               (None, 86, 32)       1472        batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv1d_4 (Conv1D)               (None, 70, 32)       3008        batch_normalization_1[0][0]      
____________________________________________________________________________________________

In [26]:
ds_train = ds.take(train_size)
ds_train = ds_train.shuffle(buffer_size=TRAIN_BATCH_SIZE)
ds_train = ds_train.repeat(count=EPOCHS)
ds_train = ds_train.batch(TRAIN_BATCH_SIZE)
ds_train = ds_train.prefetch(1)

ds_valid = ds.skip(train_size).take(valid_size)
ds_valid = ds_valid.repeat(count=1)
ds_valid = ds_valid.batch(TRAIN_BATCH_SIZE)
ds_valid = ds_valid.prefetch(1)

ds_test = ds.skip(train_size+valid_size)
ds_test = ds_test.batch(EVAL_BATCH_SIZE)

In [27]:
es_callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy',
                                               patience=EPOCHS//2,
                                               mode='max',
                                               restore_best_weights=True)

In [28]:
train_size//TRAIN_BATCH_SIZE

312

In [29]:
callbacks = [es_callback]
history = model.fit(ds_train, 
                    validation_data=ds_valid,
                    steps_per_epoch=train_size//TRAIN_BATCH_SIZE,  
                    epochs=EPOCHS, 
                    callbacks=callbacks,
                    verbose=1, 
                    shuffle=False)

Train for 312 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [31]:
test_result = model.evaluate(ds_test)

