# Look at accelerometer data 

Finding Zero velocity times by rail axis acceleration noise levels, making summary statistics for the noise levels across the whole day files.  Spot check graphs to see what works

In [2]:
#Standard Header used on the projects

#first the major packages used for math and graphing
import numpy as np
import matplotlib.pyplot as plt
from cycler import cycler
import scipy.special as sp

#Custome graph format style sheet
#plt.style.use('Prospectus.mplstyle')

#If being run by a seperate file, use the seperate file's graph format and saving paramaeters
#otherwise set what is needed
if not 'Saving' in locals():
    Saving = False
if not 'Titles' in locals():
    Titles = True
if not 'Ledgends' in locals():
    Ledgends = True
if not 'FFormat' in locals():
    FFormat = '.png'

#Standard cycle to make black and white images and dashed and line styles
default_cycler = (cycler('color', ['0.00', '0.40', '0.60', '0.70']) + cycler(linestyle=['-', '-', '-', '-']))
plt.rc('axes', prop_cycle=default_cycler)
my_cmap = plt.get_cmap('gray')

In [3]:
#Extra Headers:
import os as os
import pywt as py
import statistics as st
import os as os
import random
import multiprocessing
from joblib import Parallel, delayed
import platform

from time import time as ti

In [4]:
import CoreFunctions as cf
from skimage.restoration import denoise_wavelet

## Choosing Platform
Working is beinging conducted on several computers, and author needs to be able to run code on all without rewriting..  This segment of determines which computer is being used, and sets the directories accordingly.

In [5]:
HostName = platform.node()

if HostName == "Server":
    Computer = "Desktop"   
elif HostName[-6:] == 'wm.edu':
    Computer = "SciClone"
elif HostName == "SchoolLaptop":
    Computer = "LinLap"
elif HostName == "WTC-TAB-512":
    Computer = "PortLap"
else:
    Computer = "WinLap"

In [6]:
if Computer == "SciClone":
    location = '/sciclone/home20/dchendrickson01/image/'
elif Computer == "WinLap":
    location = 'C:\\Data\\'
elif Computer == "Desktop":
    location = "E:\\Backups\\Dan\\CraneData\\"
elif Computer == "LinLap":
    location = '/home/dan/Output/'
elif Computer == 'PortLap':
    location = 'C:\\users\\dhendrickson\\Desktop\\AccelData\\'

In [7]:
if Computer ==  "SciClone":
    rootfolder = '/sciclone/home20/dchendrickson01/'
    folder = '/sciclone/scr10/dchendrickson01/Recordings2/'
    imageFolder = '/sciclone/scr10/dchendrickson01/Move3Dprint/'
elif Computer == "Desktop":
    rootfolder = location
    folder = rootfolder + "Recordings2\\"
elif Computer =="WinLap":
    rootfolder = location
    folder = rootfolder + "Recordings2\\"   
elif Computer == "LinLap":
    rootfolder = '/home/dan/Data/'
    folder = rootfolder + 'Recordings2/'
elif Computer =='PortLap':
    rootfolder = location 
    folder = rootfolder + 'Recordings2\\'

## Global Variables

In [8]:
Saving = False
location = folder
Titles = True
Ledgends = True

f = 0


In [9]:
files = ['230418 recording1.csv','230419 recording1.csv','230420 recording1.csv','230421 recording1.csv',
         '230418 recording2.csv','230419 recording2.csv','230420 recording2.csv','230421 recording2.csv']

In [10]:
BeforeTamping = ['221206 recording1.csv','221207 recording1.csv','221208 recording1.csv','221209 recording1.csv',
         '221206 recording2.csv','221207 recording2.csv','221208 recording2.csv','221209 recording2.csv']


## Project Specific Functions

In [11]:
def RollingStdDev(RawData, SmoothData, RollSize = 25):
    StdDevs = []
    for i in range(RollSize):
        Diffs = RawData[0:i+1]-SmoothData[0:i+1]
        Sqs = Diffs * Diffs
        Var = sum(Sqs) / (i+1)
        StdDev = np.sqrt(Var)
        StdDevs.append(StdDev)
    for i in range(len(RawData)-RollSize-1):
        j = i + RollSize
        Diffs = RawData[i:j]-SmoothData[i:j]
        Sqs = Diffs * Diffs
        Var = sum(Sqs) / RollSize
        StdDev = np.sqrt(Var)
        StdDevs.append(StdDev)  
    
    return StdDevs

def RollingSum(Data, Length = 100):
    RollSumStdDev = []
    for i in range(Length):
        RollSumStdDev.append(sum(Data[0:i+1]))
    for i in range(len(Data) - Length):
        RollSumStdDev.append(sum(Data[i:i+Length]))
    return RollSumStdDev

def SquelchPattern(DataSet, StallRange = 5000, SquelchLevel = 0.02):
    SquelchSignal = np.ones(len(DataSet))

    for i in range(len(DataSet)-2*StallRange):
        if np.average(DataSet[i:i+StallRange]) < SquelchLevel:
            SquelchSignal[i+StallRange]=0

    return SquelchSignal

def getVelocity(Acceleration, Timestamps = 0.003, Squelch = [], corrected = 0):
    velocity = np.zeros(len(Acceleration))
    
    Acceleration -= np.average(Acceleration)
    
    if len(Timestamps) == 1:
        dTime = np.ones(len(Acceleration),dtype=float) * Timestamps
    elif len(Timestamps) == len(Acceleration):
        dTime = np.zeros(len(Timestamps), dtype=float)
        dTime[0]=1
        for i in range(len(Timestamps)-1):
            j = i+1
            if Timestamps[j] > Timestamps[i]:
                dTime[j]=Timestamps[j]-Timestamps[i]
            else:
                dTime[j]=Timestamps[j]-Timestamps[i]+10000.0
        dTime /= 10000.0

    velocity[0] = Acceleration[0] * (dTime[0])

    for i in range(len(Acceleration)-1):
        j = i + 1
        if corrected ==2:
            if Squelch[j]==0:
                velocity[j]=0
            else:
                velocity[j] = velocity[i] + Acceleration[j] * dTime[j]                
        else:
            velocity[j] = velocity[i] + Acceleration[j] * dTime[j]

    if corrected == 1:
        PointVairance = velocity[-1:] / len(velocity)
        for i in range(len(velocity)):
            velocity[i] -=  PointVairance * i
    
    velocity *= 9.81

    return velocity

def MakeDTs(Seconds, Miliseconds):
    dts = np.zeros(len(Miliseconds), dtype=float)
    dts[0]=1
    for i in range(len(MiliSeconds)-1):
        j = i+1
        if Seconds[j]==Seconds[i]:
            dts[j]=Miliseconds[j]-Miliseconds[i]
        else:
            dts[j]=Miliseconds[j]-Miliseconds[i]+1000
    dts /= 10000
    return dts



In [12]:
def DeviationVelocity(file):
    if file[-3:] =='csv':
        ODataSet = np.genfromtxt(open(folder+file,'r'), delimiter=',',skip_header=0,missing_values=0,invalid_raise=False)
        SmoothX = denoise_wavelet(ODataSet[:,3], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothY = denoise_wavelet(ODataSet[:,4], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothZ = denoise_wavelet(ODataSet[:,5], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothX -= np.average(SmoothX)
        SmoothY -= np.average(SmoothY)
        SmoothZ -= np.average(SmoothZ)
        StdDevsX = RollingStdDev(ODataSet[:,3],SmoothX)
        StdDevsX.append(0)
        StdDevsX = np.asarray(StdDevsX)
        SmoothDevX = denoise_wavelet(StdDevsX, method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SquelchSignal = SquelchPattern(SmoothDevX, 2000, 0.03)
        #Velocity = getVelocity(ODataSet[:,3], ODataSet[:,2],SquelchSignal, 2)
        #Velocity = np.asarray(Velocity)
        MoveMatrix = np.matrix([ODataSet[:,3], ODataSet[:,4], ODataSet[:,5]])
        return [SquelchSignal,MoveMatrix,file[:-4],SmoothDevX]
    else:
        pass


In [13]:
def SepreateMovements(SquelchSignal, RawData, FileName):
    Moves= []
    MoveNames = []
    Move = np.zeros((1,3), dtype=float)
    i = 0
    for j in range(len(SquelchSignal)-1):
        if SquelchSignal[j] == 1:
            #try:
            Move = np.concatenate((Move, RawData[j,:]), axis=0)
            #except:
            #    print(j)
            if SquelchSignal[j+1] == 0:
                #Move = np.matrix(Move)
                Moves.append(Move)
                MoveNames.append(FileName + str(i).zfill(3))
                i+=1
                Move = np.zeros((1,3), dtype=float)
                #Move[0,2]=0
    Moves.append(Move)
    MoveNames.append(FileName + str(i).zfill(3))
    return Moves, MoveNames
    


In [14]:
def splitLong(Moves, maxLength = 4000, minLength = 1000, MoveNames = []):
    if len(MoveNames) <=1:
        MoveNames = ['null'  for x in range(len(Moves))]
    Xmoves = []
    Xnames = []
    for i in range(len(Moves)):
        if np.shape(move)[0] > maxLength: 
            Xmoves.append(Moves[i][:int(len(Moves[i])/2),:])
            Xnames.append(MoveNames[i] + 'a')
            Xmoves.append(Moves[i][int(len(Moves[i])/2):,:])
            Xnames.append(MoveNames[i] + 'b')
        else:
            if np.shape(Moves[i])[0] < minLength:
                pass
            else:
                Xmoves.append(Moves[i])
                Xnames.append(MoveNames[i])
    return Xmoves, Xnames

def findMaxLength(Moves):
    maxLength = 0
    LongMove = 0
    for i in range(len(Moves)):
        if np.shape(Moves[i])[0] > maxLength: 
            maxLength =  np.shape(Moves[i])[0]
            LongMove = i
    return maxLength, LongMove

def findMinLength(Moves):
    minLength = 9999999
    SmallMove = 0
    for i in range(len(Moves)):
        if np.shape(Moves[i])[0] < minLength: 
            minLength =  np.shape(Moves[i])[0]
            SmallMove = i
    return minLength, SmallMove



## Process Files

In [15]:
LoopFiles = 2
loops = int(len(files) / LoopFiles) 
if len(files)%LoopFiles != 0:
    loops += 1


In [16]:
SquelchSignal = []
RawData=[]
OrderedFileNames=[]


In [17]:

st = ti()

for k in range(loops):
    if k == loops -1:
        tfiles = files[k*LoopFiles:]
    else:
        tfiles = files[k*LoopFiles:(k+1)*LoopFiles]
    Results = Parallel(n_jobs=LoopFiles)(delayed(DeviationVelocity)(file) for file in tfiles)
    #Results =[]
    #for file in tfiles:
    #    Results.append(DeviationVelocity(file))
    #    print(file, (ti()-st)/60.0)
        
    for i in range(len(Results)):       
        SquelchSignal.append(Results[i][0])
        RawData.append(Results[i][1])
        OrderedFileNames.append(Results[i][2])
    print(k, np.shape(Results), (ti()-st)/60.0)
    



  result = asarray(a).shape


0 (2, 4) 5.155741878350576
1 (2, 4) 13.703637421131134
2 (2, 4) 16.9488076488177
3 (2, 4) 20.16348130305608


In [18]:
np.shape(SquelchSignal[0])

(18659125,)

In [19]:
#MoveData = Parallel(n_jobs=13)(delayed(SepreateMovements)(SquelchSignal[i], RawData[i].T, OrderedFileNames[i]) for i in range(len(OrderedFileNames)))

MoveData = []


In [20]:

for i in range(len(RawData)):
    MoveData.append(SepreateMovements(SquelchSignal[i], RawData[i].T, OrderedFileNames[i]))

In [21]:
Movements = []
GroupNames = []
for move in MoveData:
    Movements.append(move[0])
    GroupNames.append(move[1])



In [22]:
Moves=[]
for Groups in Movements:
    for Move in Groups:
        Moves.append(np.asarray(Move).astype('float32'))
#Moves = np.asarray(Moves)

MoveNames = []
for Groups in GroupNames:
    for name in Groups:
        MoveNames.append(name)

In [23]:

del SquelchSignal
del RawData
del Movements
del GroupNames
del MoveData
del OrderedFileNames


In [24]:
longMove, MoveNumb = findMaxLength(Moves)


In [25]:
minLength = 750

In [26]:
Moves, MoveNames = splitLong(Moves, longMove+1, minLength, MoveNames)


In [27]:
np.shape(Moves[0])

(2001, 3)

In [28]:
#padding moves.  Not needed, need sequences, not moves

#Moves2 = []
#for move in Moves:
#    if np.shape(move)[0] < longMove:
#        padding = np.zeros((longMove-np.shape(move)[0], 3))
#        tempMove = np.concatenate((move, padding), axis=0)
#        Moves2.append(tempMove)
#    else:
#        Moves2.append(move)
#Moves = Moves2

#del Moves2

## Try LSTM Stuff

https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/

In [29]:
TimeSteps = 100
Features = np.shape(Moves[0])[1]

In [30]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix > len(sequences)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix, :]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [31]:
Sequences = []
Outputs = []
for move in Moves:
    Seq, Out = split_sequences(move,TimeSteps)
    Sequences.append(Seq)
    Outputs.append(Out)
    

In [32]:
MoveSegments = []
for seq in Sequences:
    for mv in seq:
        MoveSegments.append(mv)
NextDataPoint = []
for out in Outputs:
    for pt in out:
        NextDataPoint.append(np.reshape(pt,(1,3)))

# now the ML bit

In [33]:
from keras import Input
from keras.models import load_model, Model
from sklearn.model_selection import train_test_split

In [34]:
from keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Masking
from keras.models import Sequential
from attention import Attention
import tensorflow as tf


In [35]:
BatchSize = 32
BatchesPerLoop = 500
TestHoldOut = 0.05
MinorEpochs = 2
MajorEpochs = 5

In [36]:
seq_train, seq_test, out_train, out_test = train_test_split(MoveSegments, NextDataPoint, test_size=TestHoldOut, shuffle=True, random_state=0)

In [37]:
'''
model_input = Input(shape=(TimeSteps, Features))
x = LSTM(64,return_sequences=True)(model_input)
x = Attention(units=32)(x)
x = RepeatVector(TimeSteps)(x)

x = LSTM(TimeSteps, activation='relu', return_sequences=True)(x)
x = Attention(units=32)(x)
#x = LSTM(64, activation='relu', return_sequences=True)(x)


x = TimeDistributed(Dense(Features))(x)
'''

"\nmodel_input = Input(shape=(TimeSteps, Features))\nx = LSTM(64,return_sequences=True)(model_input)\nx = Attention(units=32)(x)\nx = RepeatVector(TimeSteps)(x)\n\nx = LSTM(TimeSteps, activation='relu', return_sequences=True)(x)\nx = Attention(units=32)(x)\n#x = LSTM(64, activation='relu', return_sequences=True)(x)\n\n\nx = TimeDistributed(Dense(Features))(x)\n"

In [38]:
model_input = Input(shape=(TimeSteps, Features))
x = LSTM(32,return_sequences=True, activation = 'relu')(model_input)
x = LSTM(6, activation='relu')(x)

x =RepeatVector(TimeSteps)(x)

x = LSTM(TimeSteps, activation='relu', return_sequences=True)(x)
x = LSTM(32, activation='relu', return_sequences=True)(x)

x = TimeDistributed(Dense(Features))(x)

In [39]:
model = Model(model_input, x)
model.compile(loss='mse', optimizer = 'adam')
model.summary

<bound method Model.summary of <keras.engine.functional.Functional object at 0x00000205D8697F70>>

In [40]:
InputsPerBatch = BatchSize * BatchesPerLoop 
LoopsNeeded = int(len(seq_train)/InputsPerBatch)

if len(seq_train) % InputsPerBatch != 0:
    LoopsNeeded += 1

In [49]:
for j in range(MajorEpochs):
    for i in range(LoopsNeeded):
        start = i * InputsPerBatch
        data = seq_train[start:start+InputsPerBatch]
        model.fit(seq_train[start:start+InputsPerBatch], seq_train[start:start+InputsPerBatch], epochs=MinorEpochs, batch_size=BatchSize)

Epoch 1/2


TypeError: 'NoneType' object is not callable

In [None]:
model.save("LSTM_batchedLearning")

In [None]:
output_X = model.predict(seq_test)
reconstruction = np.squeeze(output_X)
scores = np.linalg.norm(seq_test - reconstruction, axis=-1)

In [None]:
threshold=0.95

sorted_scores = sorted(scores)
threshold_score = sorted_scores[round(len(scores) * threshold)]

plt.title("Reconstruction Error")
plt.plot(scores)
plt.plot([threshold_score]*len(scores), c='r')
plt.show()

anomalous = np.where(scores > threshold_score)
normal = np.where(scores <= threshold_score)

plt.title("Anomalies")
plt.scatter(normal, seq_test[normal][:,-1], s=3)
plt.scatter(anomalous, seq_test[anomalous][:,-1], s=5, c='r')
plt.show()