<a href="https://colab.research.google.com/github/hoodedapollo/AaRP_assignement/blob/master/fromSensorsDataframeToModelEvaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Obtain Sensor Dataframe

## Read Sensor Dataframe form File

In [43]:
!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

!mkdir /content/dataframes

download = drive.CreateFile({'id': '1_ioXezgNztjZyGzLownSh4ATQh6YCMW4'}) # https://drive.google.com/file/d/1_ioXezgNztjZyGzLownSh4ATQh6YCMW4/view?usp=sharing
download.GetContentFile('/content/dataframes/IMUSsensors.csv')

import pandas as pd
import numpy as np

IMUsensorsDataFrame = pd.read_csv('/content/dataframes/IMUSsensors.csv', header = [0,1], index_col = [0,1,2])

mkdir: cannot create directory ‘/content/dataframes’: File exists


## Fill NaNs with zeros

In [0]:
IMUsensorsDataFrame = IMUsensorsDataFrame.fillna(0)

## Build one sensorDataframe for each sensor (IMU)

* 6 columns for data (accX, accY, accZ, gyroX, gyroY, gyroZ) 
* 4 columns for labels (locomotion, low level left arm, low level right arm, medium level both arms = gestures)

In [0]:
def singleSensorDataframe(multipleSensorsDataframe, sensorName):
  return IMUsensorsDataFrame[[sensorName,'labels']]

backImuDataframe =  singleSensorDataframe(IMUsensorsDataFrame, 'backImu')

# Useful Classes

##Find single label sequences


In [0]:
class MultiLabelSequence(object):
  def __init__(self,sequence):
    self.sequence = sequence
    self.indecesWhereSequenceChanges = None
    self.rangesWhereSequencesChanges = None
    self.indeces = None
    self.labels = None
    self.slices = None
    
    self.findIndecesWhereSequenceChanges()
    self.findRangesWhereSequencesChanges()

  
  def findIndecesWhereSequenceChanges(self):
    sequenceChange = np.array(self.sequence[:-1]) != np.array(self.sequence[1:])
    sequenceChange = np.append(sequenceChange, True)
    self.indecesWhereSequenceChanges = np.array(range(len(sequenceChange)))[sequenceChange]

  
  def findRangesWhereSequencesChanges(self):
    self.rangesWhereSequencesChanges = [0] + list(self.indecesWhereSequenceChanges + 1)

  
  def  getLabelsAndRangesLists(self):
    numberOfDifferentSequences = len(self.rangesWhereSequencesChanges) - 1
    
    self.ranges = []
    self.labels = [] 
    for i in range(numberOfDifferentSequences):
      if self.sequence[self.rangesWhereSequencesChanges[i]] != 0:
        self.ranges.append([self.rangesWhereSequencesChanges[i], self.rangesWhereSequencesChanges[i+1]])
        self.labels.append(self.sequence[self.rangesWhereSequencesChanges[i]])
    
    return self.ranges,  self.labels
    
  
  def getRangesWithLabel(self, label):
    numberOfDifferentSequences = len(self.rangesWhereSequencesChanges) - 1
    
    ranges = []
    for i in range(numberOfDifferentSequences):
      if self.sequence[self.rangesWhereSequencesChanges[i]] == label:
        ranges.append([self.rangesWhereSequencesChanges[i], self.rangesWhereSequencesChanges[i+1]])
        
    return ranges  
    
  
  def getLabelsAndSlicesLists(self):
    numberOfDifferentSequences = len(self.rangesWhereSequencesChanges) - 1
    
    self.slices = []
    self.labels = [] 
    for i in range(numberOfDifferentSequences):
      if self.sequence[self.rangesWhereSequencesChanges[i]] != 0:
        self.slices.append(slice(self.rangesWhereSequencesChanges[i], self.rangesWhereSequencesChanges[i+1]))
        self.labels.append(self.sequence[self.rangesWhereSequencesChanges[i]])
    
    return self.slices,  self.labels
    
  
  def getSlicesWithLabel(self, label):
    numberOfDifferentSequences = len(self.rangesWhereSequencesChanges) - 1
    
    slices = []
    for i in range(numberOfDifferentSequences):
      if self.sequence[self.rangesWhereSequencesChanges[i]] == label:
        slices.append(slice(self.rangesWhereSequencesChanges[i], self.rangesWhereSequencesChanges[i+1]))
        
    return slices
  

## Timeseries Class

In [0]:
import matplotlib.pyplot as plt

class Timeseries(object):
  def __init__(self, dataframe, tsSlices):
    self.items = []
    for tsSlice in tsSlices:
      self.items.append(dataframe.iloc[tsSlice,:6])
  
  def getConcatenatedDf(self):
    return pd.concat(self.items)
  
  def getTimeseries(self):
    return self.items
  
  def plotWithIndexAndChannels(self, tsIndex, channelColumns = None):
    sensorValues = self.items[tsIndex].values
    time = range(len(sensorValues))
    
    if not channelColumns: 
      channelColumns = range(sensorValues.shape[-1])
       
    for i in channelColumns:
      plt.plot(time,  sensorValues[:,i], label='sensor column: {0}'.format(i))
    
    plt.title('Timeseries')
    plt.xlabel('Sample index')
    plt.ylabel('Sensordata values')
    plt.legend()

    plt.show()
    

## From TimeSeries Instance To Train Tensor

In [0]:
class TensorNNready(object):
  def __init__(self, timeseries, lookback = 40, delay = 1):
    self.timeseries = timeseries
    self.lookback = lookback
    self.delay = delay
  
  def setTimeseries(self, timeseries):
    self.timeseries = timeseries
  
  def setLookback(self, lookback):
    self.lookback = lookback
  
  def setDelay(self, delay):
    self.delay = delay

  def samplesTargets(self, multivariateTimeseries, min_index=0, step=1):
      max_index = len(multivariateTimeseries) - self.delay - 1 # -1 is because indexes start at 0
      currentTimestep = min_index + self.lookback

      rows = np.arange(currentTimestep, max_index)

      samples = np.zeros((len(rows), self.lookback // step, multivariateTimeseries.shape[-1]))   # (samples, timesteps, features)
      targets = np.zeros((len(rows), multivariateTimeseries.shape[-1]))

      for j, row in enumerate(rows):
        indices = range(rows[j] - self.lookback, rows[j], step)   # rows starts at i (see line 12), which is at minimum i = min_index + lookback (see line 5) 
                                                               # and at each loop it is increased by the length of the batches or the remaining 
                                                               # length to the max_index (see line13)  
        samples[j] = multivariateTimeseries[indices]                           # data[indices] is all the timesteps starting lookback number of steps back from current timestep j to current time step j (sampled at step frquency)
        targets[j] = multivariateTimeseries[rows[j] + self.delay]  

      return samples, targets 


  def allSamplesTargets(self, min_index=0, step=1):
      for i, self.ts in enumerate(self.timeseries.items):
        if i == 0:
          samples, targets = self.samplesTargets(self.ts.values, min_index, step)
        else:
          partial_samples, partial_targets = self.samplesTargets(self.ts.values, min_index, step)
          samples = np.concatenate((samples, partial_samples), axis = 0)  
          targets = np.concatenate((targets, partial_targets), axis = 0)  

      return samples, targets 

## define and train the NN model

In [0]:
import keras

from keras.models import Model
from keras import layers
from keras import Input

from abc import ABC, abstractmethod

# abstarct class
class NNModel(ABC):
  def __init__(self, trainSamples = None, trainTargets = None):
    self.trainSamples = trainSamples
    self.trainTargets = trainTargets
  
  def defineModel(self): 
      pass

  def setFilepath(self, modelFilepath):
    self.callbacks_list = [
        keras.callbacks.EarlyStopping(
          monitor='val_loss',
          patience=3,
        ),
        keras.callbacks.ModelCheckpoint(
          filepath=modelFilepath,
          monitor='val_loss',
          save_best_only=True,
         )
     ]

  def compileModel(self, modelOptimizer = 'adam', modelLoss = 'mse'):
    self.model.compile(optimizer=modelOptimizer, loss=modelLoss)

  def fitModel(self, modelEpochs = 30, modelBatchSize = 128 ):
    self.history = self.model.fit( self.trainSamples,   self.trainTargets,
                                shuffle=True,
                                epochs=modelEpochs,
                                batch_size=modelBatchSize,
                                callbacks=self.callbacks_list,
                                validation_split=0.2)
  
  def loadFromFilepath(self, modelFilepath):
    self.model = keras.models.load_model(modelFilepath)

    
    
class baseLSTMModel(NNModel): 
  
  # overriding abstract method 
  def defineModel(self, modelRecurrentDropout = 0.5): 
      input_tensor = Input(shape=(None, self.trainSamples.shape[-1]))
      x = layers.LSTM(32, recurrent_dropout=modelRecurrentDropout)(input_tensor)
      output_tensor = layers.Dense(self.trainSamples.shape[-1])(x)

      self.model = Model(input_tensor, output_tensor)
      self.model.summary()

## generate samples and targets given sensor dataframe, activity category and name

In [0]:
import pandas as pd
import time

class SamplesAndTargetsGenerator(object):
  def __init__(self, sensorDf):
    self.sensorDf = sensorDf
   
    self.activityDict = {  
      'locomotion' : {
          'nullActivity' : 0,
          'stand'        : 1,
          'walk'         : 2,
          'sit'          : 3,
          'lie'          : 4,
      },
    
      'llRightArm' : {
          'nullActivity' : 0,
          'unlock'       : 401,
          'stir'         : 402,
          'lock'         : 403,
          'close'        : 404,
          'reach'        : 405,
          'open'         : 406,
          'sip'          : 407,
          'clean'        : 408,
          'bite'         : 409,
          'cut'          : 410,
          'spread'       : 411,
          'release'      : 412,
          'move'         : 413,
      },
    }
    
    self.lookback = 40
    self.delay = 1
  
    self.trainDf = None
    self.testDf = None
    
    self.normTrainDf = None
    self.normTestDf = None
    
    self.trainMultiLabelSequence = None
    
  def trainTestSplitDataframe(self):
    # The testing dataset is composed of data from subjects 2 and 3 (ADL4, ADL5).
    idx = pd.IndexSlice

    trainDf1 = self.sensorDf.loc[idx['S1':'S4', ('ADL1','ADL2','ADL3','Drill')], :]
    trainDf2 = self.sensorDf.loc[idx[('S1','S4'), ('ADL4','ADL5')], :]
    self.trainDf = pd.concat([trainDf1,trainDf2])
    
    self.testDf = self.sensorDf.loc[idx[('S2','S3'), ('ADL4','ADL5')], :]
  
  def normalizeTrainDataframe(self):
    normDf = self.trainDf.iloc[:,:6]   # select only the sensor columns
    normDf = (normDf-normDf.min())/(normDf.max()-normDf.min())
    self.normTrainDf = pd.concat([normDf, self.trainDf.iloc[:,-4:]], axis = 1)   # concatenate the label columns


  def normalizeTestDataframe(self):
    teDf = self.testDf.iloc[:,:6]   # select only the sensor columns
    trDf = self.trainDf.iloc[:,:6]
    normDf = (teDf-trDf.min())/(trDf.max()-trDf.min())   
    self.normTestDf = pd.concat([normDf, self.testDf.iloc[:,-4:]], axis = 1)   # concatenate the label columns

  def getTrainSamplesAndTargets(self, activityCategory, activityName):
    #0. set activity catgeory category 
    trainMultiLabelSequence = MultiLabelSequence(self.normTrainDf['labels',activityCategory].values)
    
    #1.find single label sequence
    activityId = self.activityDict[activityCategory][activityName]
    trainSlices = trainMultiLabelSequence.getSlicesWithLabel(activityId)
  
    #2.create time series
    trainTimeseries = Timeseries(self.normTrainDf, trainSlices)
    
    #3.from timeseries instance to train tensor
    trainTensorNNready = TensorNNready(trainTimeseries, lookback = self.lookback, delay = self.delay)    
    start = time.time()
    trainSamples, trainTargets = trainTensorNNready.allSamplesTargets()
    end = time.time()
    print("Time required to compute samples and target tensors from timeseries: {0}".format(end - start))
    
    return trainSamples, trainTargets
  
  def getTestSamplesAndTargets(self, activityCategory, activityName):
    #0. set activity catgeory category 
    testMultiLabelSequence = MultiLabelSequence(self.normTestDf['labels',activityCategory].values)
    
    #1.find single label sequence
    activityId = self.activityDict[activityCategory][activityName]
    testSlices = testMultiLabelSequence.getSlicesWithLabel(activityId)
    
    #2.create time series
    testTimeseries = Timeseries(self.normTestDf, testSlices)
    
    #3.from timeseries instance to train tensor
    testTensorNNready = TensorNNready(testTimeseries, lookback = self.lookback, delay = self.delay)    
    start = time.time()
    testSamples, testTargets = testTensorNNready.allSamplesTargets()
    end = time.time()
    print("Time required to compute samples and target tensors from timeseries: {0}".format(end - start))
    
    return testSamples, testTargets

# Main

## generate samples and targets

In [52]:
backImuGenerator = SamplesAndTargetsGenerator(backImuDataframe)
backImuGenerator.trainTestSplitDataframe()

backImuGenerator.normalizeTrainDataframe()
trainWalkBackImuSamples, trainWalkBackImuTargets = backImuGenerator.getTrainSamplesAndTargets('locomotion', 'walk')

backImuGenerator.normalizeTestDataframe()
testWalkBackImuSamples, testWalkBackImuTargets = backImuGenerator.getTestSamplesAndTargets('locomotion', 'walk')

Time required to compute samples and target tensors from timeseries: 56.65997815132141
Time required to compute samples and target tensors from timeseries: 1.3107879161834717


## Train Specific Model Using Generated Samples and Targets

In [53]:
trainSamples = trainWalkBackImuSamples
trainTargets = trainWalkBackImuTargets
modelFilepath = 'my_model_walk.h5'
modelRecurrentDropout = 0.5

# create and train the model
walkModel = baseLSTMModel(trainSamples = trainSamples,  trainTargets = trainTargets)
walkModel.defineModel()
walkModel.setFilepath(modelFilepath)
walkModel.compileModel()
walkModel.fitModel(modelEpochs = 2)

# load and continue training the model
walkModel.loadFromFilepath(modelFilepath)
walkModel.fitModel(modelEpochs = 2)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, None, 6)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                4992      
_________________________________________________________________
dense_2 (Dense)              (None, 6)                 198       
Total params: 5,190
Trainable params: 5,190
Non-trainable params: 0
_________________________________________________________________
Train on 87212 samples, validate on 21804 samples
Epoch 1/2
Epoch 2/2
Train on 87212 samples, validate on 21804 samples
Epoch 1/2
Epoch 2/2


## Evaluate Model

In [54]:
walkModel.model.evaluate(testWalkBackImuSamples, testWalkBackImuTargets)



0.0010348577193664988