In [1]:
from __future__ import print_function
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import tensorflow
import glob
import os
import pandas as pd
import seaborn as sns
from scipy import stats
from IPython.display import display, HTML

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn import preprocessing

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Reshape
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
# Set some standard parameters upfront
pd.options.display.float_format = '{:.1f}'.format
sns.set() # Default seaborn look and feel
plt.style.use('ggplot')
print('keras version ', keras.__version__)
# Same labels will be reused throughout the program
LABELS = ['[1]Sitting-on-Bed',
          '[2]Sitting-on-Chair',
          '[3]Lying-Down',
          '[4]Ambulating']
# The number of steps within one time segment
TIME_PERIODS = 
# The steps to take from one segment to the next; if this value is equal to
# TIME_PERIODS, then there is no overlap between the segments
STEP_DISTANCE = 1

SyntaxError: invalid syntax (<ipython-input-2-f0ba86add063>, line 12)

In [None]:
def read_data(file_path):
    
    pd.options.display.float_format = '{:,.4f}'.format

    column_names = ['experiment-id',
                    'time',
                    'frontal-acceleration',
                    'vertical-acceleration',
                    'lateral-acceleration',
                    'antenna-id',
                    'RSSI',
                    'phase',
                    'frequency',
                    'label']
    
    df = pd.read_csv(file_path,
                     header=None,
                     names=column_names)

    # This is very important otherwise the model will not fit and loss
    # will show up as NAN
    df.dropna(axis=0, how='any', inplace=True)

    return df

def convert_to_float(x):

    try:
        return np.float(x)
    except:
        return np.nan
 
def show_basic_dataframe_info(dataframe):

    # Shape and how many rows and columns
    print('Number of columns in the dataframe: %i' % (dataframe.shape[1]))
    print('Number of rows in the dataframe: %i\n' % (dataframe.shape[0]))

In [None]:
# Load data set containing all the data

df = read_data('/Users/Mariana/Desktop/test_file.txt')

#Reading multiple files not working **--**
#file_path = '/Users/Mariana/Desktop/S1_Dataset/'
#read_files = glob.glob(os.path.join(file_path, '*.txt'))
#np_array_values = []

#for files in read_files:

#    df = pd.read_csv(files, header=None, names=column_names)
    
#    df.dropna(axis=0, how='any', inplace=True)

#    np_array_values.append(df)


In [None]:
# Describe the data
show_basic_dataframe_info(df)
df.head(20)


In [None]:
df['label'].value_counts().plot(kind='bar',
                                   title='Training Examples by Activity Type')

plt.show()

for i in LABELS:
    print(i)

# Split Data into Training and Test Set


In [None]:
# Differentiate between test set and training set
df_test = df[df['experiment-id'] > 50]
df_train = df[df['experiment-id'] <= 50]

## Normalize Training Data

In [None]:
# Normalize features for training data set (values between 0 and 1)
# Surpress warning for next 6 operation
pd.options.mode.chained_assignment = None  # default='warn'
df_train['frontal-acceleration'] = df_train['frontal-acceleration'] / df_train['frontal-acceleration'].max()
df_train['vertical-acceleration'] = df_train['vertical-acceleration'] / df_train['vertical-acceleration'].max()
df_train['lateral-acceleration'] = df_train['lateral-acceleration'] / df_train['lateral-acceleration'].max()
df_train['lateral-acceleration'] = df_train['lateral-acceleration'] / df_train['lateral-acceleration'].max()
df_train['RSSI'] = df_train['RSSI'] / df_train['RSSI'].max()
df_train['phase'] = df_train['phase'] / df_train['phase'].max()
df_train['frequency'] = df_train['frequency'] / df_train['frequency'].max()

# Round numbers
df_train = df_train.round({'frontal-acceleration': 4,
                           'vertical-acceleration': 4,
                           'lateral-acceleration': 4,
                           'RSSI': 4,
                           'phase': 4,
                           'frequency': 4})

## Reshape Data into Segments and Prepare for Keras


In [None]:
def create_segments_and_labels(df, time_steps, step, label_name):

    # x, y, z acceleration as features
    N_FEATURES = 6
    # Number of steps to advance in each iteration (for me, it should always
    # be equal to the time_steps in order to have no overlap between segments)
    # step = time_steps
    segments = []
    labels = []
    for i in range(0, len(df) - time_steps, step):
        fa = df['frontal-acceleration'].values[i: i + time_steps]
        va = df['vertical-acceleration'].values[i: i + time_steps]
        la = df['lateral-acceleration'].values[i: i + time_steps]
        rs = df['RSSI'].values[i: i + time_steps]
        ph = df['phase'].values[i: i + time_steps]
        fr = df['frequency'].values[i: i + time_steps]

        # Retrieve the most often used label in this segment
        label = stats.mode(df[label_name][i: i + time_steps])[0][0]
        segments.append([fa, va, la, rs, ph, fr])
        labels.append(label)

    # Bring the segments into a better shape
    reshaped_segments = np.asarray(segments, dtype= np.float32).reshape(-1, time_steps, N_FEATURES)
    labels = np.asarray(labels)

    return reshaped_segments, labels

LABEL = 'label'
x_train, y_train = create_segments_and_labels(df_train,
                                              TIME_PERIODS,
                                              STEP_DISTANCE,
                                              LABEL)

In [None]:
print('x_train shape: ', x_train.shape)
print(x_train.shape[0], 'training samples')
print('y_train shape: ', y_train.shape)

In [None]:
# Set input & output dimensions
num_time_periods = x_train.shape[1]
num_sensors = x_train.shape[2]
num_classes = 5

In [None]:
input_shape = (num_time_periods*num_sensors)
x_train = x_train.reshape(x_train.shape[0], input_shape)
print('x_train shape:', x_train.shape)
print('input_shape:', input_shape)

In [None]:
x_train = x_train.astype('float32')
y_train = y_train.astype('float32')

In [None]:
y_train_hot = np_utils.to_categorical(y_train, num_classes)
print('New y_train shape: ', y_train_hot.shape)

## Create Deep Neural Network Model in Keras

In [None]:
model_m = Sequential()
# Remark: since coreml cannot accept vector shapes of complex shape like
# [80,3] this workaround is used in order to reshape the vector internally
# prior feeding it into the network

model_m.add(Reshape((TIME_PERIODS, 3), 
                    input_shape=(input_shape,)))
model_m.add(Dense(100, activation='relu'))
model_m.add(Dense(100, activation='relu'))
model_m.add(Dense(100, activation='relu'))
model_m.add(Flatten())
model_m.add(Dense(num_classes, activation='softmax'))
print(model_m.summary())