# Look at accelerometer data 

Finding Zero velocity times by rail axis acceleration noise levels, making summary statistics for the noise levels across the whole day files.  Spot check graphs to see what works

In [1]:
#Standard Header used on the projects

#first the major packages used for math and graphing
import numpy as np
import matplotlib.pyplot as plt
from cycler import cycler
import scipy.special as sp

#Custome graph format style sheet
#plt.style.use('Prospectus.mplstyle')

#If being run by a seperate file, use the seperate file's graph format and saving paramaeters
#otherwise set what is needed
if not 'Saving' in locals():
    Saving = False
if not 'Titles' in locals():
    Titles = True
if not 'Ledgends' in locals():
    Ledgends = True
if not 'FFormat' in locals():
    FFormat = '.png'

#Standard cycle to make black and white images and dashed and line styles
default_cycler = (cycler('color', ['0.00', '0.40', '0.60', '0.70']) + cycler(linestyle=['-', '-', '-', '-']))
plt.rc('axes', prop_cycle=default_cycler)
my_cmap = plt.get_cmap('gray')

In [2]:
#Extra Headers:
import os as os
import pywt as py
import statistics as st
import os as os
import random
import multiprocessing
from joblib import Parallel, delayed
import platform

from time import time as ti

In [3]:
import CoreFunctions as cf
from skimage.restoration import denoise_wavelet

## Choosing Platform
Working is beinging conducted on several computers, and author needs to be able to run code on all without rewriting..  This segment of determines which computer is being used, and sets the directories accordingly.

In [4]:
HostName = platform.node()

if HostName == "Server":
    Computer = "Desktop"   
elif HostName[-6:] == 'wm.edu':
    Computer = "SciClone"
elif HostName == "SchoolLaptop":
    Computer = "LinLap"
elif HostName == "WTC-TAB-512":
    Computer = "PortLap"
else:
    Computer = "WinLap"

In [5]:
if Computer == "SciClone":
    location = '/sciclone/home20/dchendrickson01/image/'
elif Computer == "WinLap":
    location = 'C:\\Data\\'
elif Computer == "Desktop":
    location = "E:\\Backups\\Dan\\CraneData\\"
elif Computer == "LinLap":
    location = '/home/dan/Output/'
elif Computer == 'PortLap':
    location = 'C:\\users\\dhendrickson\\Desktop\\AccelData\\'

In [6]:
if Computer ==  "SciClone":
    rootfolder = '/sciclone/home/dchendrickson01/'
    folder = '/sciclone/scr10/dchendrickson01/Recordings2/'
    imageFolder = '/sciclone/scr10/dchendrickson01/Move3Dprint/'
elif Computer == "Desktop":
    rootfolder = location
    folder = rootfolder + "Recordings2\\"
elif Computer =="WinLap":
    rootfolder = location
    folder = rootfolder + "Recordings2\\"   
elif Computer == "LinLap":
    rootfolder = '/home/dan/Data/'
    folder = rootfolder + 'Recordings2/'
elif Computer =='PortLap':
    rootfolder = location 
    folder = rootfolder + 'Recordings2\\'

## Global Variables

In [8]:
Saving = False
location = folder
Titles = True
Ledgends = True



In [None]:
#small set of files.  Files are 4 days of before and after tamping on 2 cranes
files = ['221206 recording1.csv','221207 recording1.csv','221208 recording1.csv','221209 recording1.csv',
         '221206 recording2.csv','221207 recording2.csv','221208 recording2.csv','221209 recording2.csv',
         '230418 recording1.csv','230419 recording1.csv','230420 recording1.csv','230421 recording1.csv',
         '230418 recording2.csv','230419 recording2.csv','230420 recording2.csv','230421 recording2.csv']

## Make Moves

Functions needed to make this work

In [None]:
def RollingStdDev(RawData, SmoothData, RollSize = 25):
    StdDevs = []
    for i in range(RollSize):
        Diffs = RawData[0:i+1]-SmoothData[0:i+1]
        Sqs = Diffs * Diffs
        Var = sum(Sqs) / (i+1)
        StdDev = np.sqrt(Var)
        StdDevs.append(StdDev)
    for i in range(len(RawData)-RollSize-1):
        j = i + RollSize
        Diffs = RawData[i:j]-SmoothData[i:j]
        Sqs = Diffs * Diffs
        Var = sum(Sqs) / RollSize
        StdDev = np.sqrt(Var)
        StdDevs.append(StdDev)  
    
    return StdDevs

def RollingSum(Data, Length = 100):
    RollSumStdDev = []
    for i in range(Length):
        RollSumStdDev.append(sum(Data[0:i+1]))
    for i in range(len(Data) - Length):
        RollSumStdDev.append(sum(Data[i:i+Length]))
    return RollSumStdDev

def SquelchPattern(DataSet, StallRange = 5000, SquelchLevel = 0.0086):
    SquelchSignal = []

    for i in range(StallRange):
        SquelchSignal.append(1)

    for i in range(len(DataSet)-2*StallRange):
        
        if np.average(DataSet[i:i+2*StallRange]) < SquelchLevel:
            SquelchSignal.append(0)
        else:
            SquelchSignal.append(1)

    for i in range(StallRange+1):
        SquelchSignal.append(1)    
    
    return SquelchSignal

def getVelocity(Acceleration, Timestamps = 0.003, Squelch = [], corrected = 0):
    velocity = np.zeros(len(Acceleration))
    
    if Squelch == []:
        Squelch = np.ones(len(Acceleration))
    
    if len(Timestamps) == 1:
        dTime = Timestamps[0]
    elif len(Timestamps) == len(Acceleration):
        totTime = 0
        for i in range(len(Timestamps)-1):
            if Timestamps[i]<Timestamps[i+1]:
                totTime += (Timestamps[i+1] - Timestamps[i])
            else:
                totTime += (Timestamps[i+1] - Timestamps[i] + 10000)
        dTime = totTime / len(Timestamps)
    else:
        print('error')

    dTime = dTime / 10000.0

    velocity[0] = Acceleration[0] * (dTime)
    
    MoveStart = 0
    for i in range(len(Acceleration)-1):
        j = i + 1
        if corrected ==2:
            if Squelch[j]==0:
                velocity[j]=0
                MoveStart = j
            else:
                velocity[j] = velocity[i] + Acceleration[j] * dTime 
                try:
                    if Squelch[j+1]  == 0:
                        PointVairance = velocity[j] / (j - MoveStart)
                        for k in range(j-MoveStart):
                            velocity[k+MoveStart] -=  PointVairance * k
                except:
                    pass
        else:
            velocity[j] = velocity[i] + Acceleration[j] * dTime

    if corrected == 1:
        PointVairance = velocity[-1:] / len(velocity)
        for i in range(len(velocity)):
            velocity[i] -=  PointVairance * i
    
    velocity *= 9.81

    return velocity



In [None]:
#Smooth = cf.Smoothing(ODataSet[:,3],2) #,50)
def DeviationVelocity(file):
    if file[-3:] =='csv':
        #try: 
        ODataSet = np.genfromtxt(open(folder+file,'r'), delimiter=',',skip_header=0,missing_values=0,invalid_raise=False)
        SmoothX = denoise_wavelet(ODataSet[:,3], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothY = denoise_wavelet(ODataSet[:,4], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothZ = denoise_wavelet(ODataSet[:,5], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        StdDevsX = RollingStdDev(ODataSet[:,3],SmoothX)
        StdDevsY = RollingStdDev(ODataSet[:,4],SmoothY)
        StdDevsZ = RollingStdDev(ODataSet[:,5],SmoothZ)
        StdDevsX.append(0)
        StdDevsY.append(0)
        StdDevsZ.append(0)
        StdDevsX = np.asarray(StdDevsX)
        StdDevsY = np.asarray(StdDevsY)
        StdDevsZ = np.asarray(StdDevsZ)
        SmoothDevX = denoise_wavelet(StdDevsX, method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothDevY = denoise_wavelet(StdDevsY, method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothDevZ = denoise_wavelet(StdDevsZ, method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        #RollSumStdDevX = RollingSum(SmoothDevX)
        #RollSumStdDevX = np.asarray(RollSumStdDevX)
        #RollSumStdDevY = RollingSum(SmoothDevY)
        #RollSumStdDevY = np.asarray(RollSumStdDevY)
        #RollSumStdDevZ = RollingSum(SmoothDevZ)
        #RollSumStdDevZ = np.asarray(RollSumStdDevZ)
        Max = np.max(SmoothDevX)
        buckets = int(Max / 0.01) + 1
        bins = np.linspace(0,buckets*0.01,buckets+1)
        counts, bins = np.histogram(SmoothDevX,bins=bins)
        SquelchLevel = bins[np.argmax(counts)+1] 
        SquelchSignal = SquelchPattern(SmoothDevX, 5000, SquelchLevel)
        SmoothX -= np.average(SmoothX)
        Velocity = getVelocity(SmoothX, ODataSet[:,2],SquelchSignal, 2)
        Velocity = np.asarray(Velocity)

        '''except:
            Velocity = file
            StdDevsX = 0
            SmoothDevX = 0
            StdDevsY = 0
            SmoothDevY = 0
            StdDevsZ = 0
            SmoothDevZ = 0
            SquelchSignal=0
            SmoothX=0
            ODataSet=np.zeros((5,5))
            print(file)
        '''
        return [Velocity, [SmoothDevX, SmoothDevY, SmoothDevZ], [StdDevsX,StdDevsY,StdDevsZ], SquelchSignal,[SmoothX, SmoothY, SmoothZ],ODataSet[:,3],ODataSet[:,1]]
    else:
        pass

In [None]:
def DeviationVelocity(file):
    if file[-3:] =='csv':
        ODataSet = np.genfromtxt(open(folder+file,'r'), delimiter=',',skip_header=0,missing_values=0,invalid_raise=False)
        SmoothX = denoise_wavelet(ODataSet[:,3], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothY = denoise_wavelet(ODataSet[:,4], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothZ = denoise_wavelet(ODataSet[:,5], method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SmoothX -= np.average(SmoothX)
        SmoothY -= np.average(SmoothY)
        SmoothZ -= np.average(SmoothZ)
        StdDevsX = RollingStdDev(ODataSet[:,3],SmoothX)
        StdDevsX.append(0)
        StdDevsX = np.asarray(StdDevsX)
        SmoothDevX = denoise_wavelet(StdDevsX, method='VisuShrink', mode='soft', wavelet_levels=3, wavelet='sym2', rescale_sigma='True')
        SquelchSignal = SquelchPattern(SmoothDevX, 2000, 0.03)
        #Velocity = getVelocity(ODataSet[:,3], ODataSet[:,2],SquelchSignal, 2)
        #Velocity = np.asarray(Velocity)
        MoveMatrix = np.matrix([SmoothX, SmoothY, SmoothZ])
        return [SquelchSignal,MoveMatrix,SmoothDevX,file[:-3]]
    else:
        pass

In [None]:
def SepreateMovements(SquelchSignal, RawData, FileName):
    Moves= []
    MoveNames = []
    Move = np.zeros((1,3), dtype=float)
    i = 0
    for j in range(len(SquelchSignal)-1):
        if SquelchSignal[j] == 1:
            try:
                Move = np.concatenate((Move, RawData[j,:]), axis=0)
            except:
                print(j)
            if SquelchSignal[j+1] == 0:
                #Move = np.matrix(Move)
                Moves.append(Move)
                MoveNames.append(FileName + str(i).zfill(3))
                i+=1
                Move = np.zeros((1,3), dtype=float)
                #Move[0,2]=0
    Moves.append(Move)
    MoveNames.append(FileName + str(i).zfill(3))
    return Moves, MoveNames

In [None]:
def splitLong(Moves, maxLength = 4000, minLength = 1000, MoveNames = []):
    if len(MoveNames) <=1:
        MoveNames = ['null'  for x in range(len(Moves))]
    Xmoves = []
    Xnames = []
    for i in range(len(Moves)):
        if np.shape(move)[0] > maxLength: 
            Xmoves.append(Moves[i][:int(len(Moves[i])/2),:])
            Xnames.append(MoveNames[i] + 'a')
            Xmoves.append(Moves[i][int(len(Moves[i])/2):,:])
            Xnames.append(MoveNames[i] + 'b')
        else:
            if np.shape(Moves[i])[0] < minLength:
                pass
            else:
                Xmoves.append(Moves[i])
                Xnames.append(MoveNames[i])
    return Xmoves, Xnames

def findMaxLength(Moves):
    maxLength = 0
    LongMove = 0
    for i in range(len(Moves)):
        if np.shape(Moves[i])[0] > maxLength: 
            maxLength =  np.shape(Moves[i])[0]
            LongMove = i
    return maxLength, LongMove

def findMinLength(Moves):
    minLength = 9999999
    SmallMove = 0
    for i in range(len(Moves)):
        if np.shape(Moves[i])[0] < minLength: 
            minLength =  np.shape(Moves[i])[0]
            SmallMove = i
    return minLength, SmallMove

Building up the list of movements with their names kept handy

In [None]:
LoopFiles = 3
loops = int(len(files) / LoopFiles) 
if len(files)%LoopFiles != 0:
    loops += 1


In [None]:
SquelchSignal = []
RawData=[]
OrderedFileNames=[]

In [None]:
st = ti()

for k in range(loops):
    if k == loops -1:
        tfiles = files[k*LoopFiles:]
    else:
        tfiles = files[k*LoopFiles:(k+1)*LoopFiles]
    Results = Parallel(n_jobs=LoopFiles)(delayed(DeviationVelocity)(file) for file in tfiles)
    
    for i in range(len(Results)):       
        SquelchSignal.append(Results[i][0])
        RawData.append(np.matrix(Results[i][1]).T)
        OrderedFileNames.append(Results[i][3])
    print(k, np.shape(Results), (ti()-st)/60.0)
    

In [None]:
MoveData = Parallel(n_jobs=31)(delayed(SepreateMovements)(SquelchSignal[i], RawData[i], OrderedFileNames[i])
                                       for i in range(len(RawData)))

In [None]:
Movements = []
GroupNames = []
for move in MoveData:
    Movements.append(move[0])
    GroupNames.append(move[1])


In [None]:
Moves=[]
for Groups in Movements:
    for Move in Groups:
        Moves.append(Move)

MoveNames = []
for Groups in GroupNames:
    for name in Groups:
        MoveNames.append(name)

In [None]:

del SquelchSignal
del RawData
del Movements
del GroupNames
del MoveData
del OrderedFileNames

In [None]:
longMove, MoveNumb = findMaxLength(Moves)

In [None]:
Moves, MoveNames = splitLong(Moves, longMove+1, minLength, MoveNames)

## LSTM Self Supervised
#https://medium.com/@jetnew/anomaly-detection-of-time-series-data-e0cb6b382e33

In [None]:
Train_data, Test_data, Train_names, Test_Names = train_test_split(Moves, MoveNames, test_size=0.10, shuffle=True, random_state=0)

In [None]:
from keras.layers import LSTM, Dense, RepeatVector, TimeDistributed
from keras.models import Sequential

class LSTM_Autoencoder:
  def __init__(self, optimizer='adam', loss='mse'):
    self.optimizer = optimizer
    self.loss = loss
    self.n_features = 3
    
  def build_model(self):
    timesteps = self.timesteps
    n_features = self.n_features
    model = Sequential()
    
    # Encoder
    model.add(LSTM(timesteps, activation='relu', input_shape=(None, n_features), return_sequences=True))
    model.add(LSTM(16, activation='relu', return_sequences=True))
    model.add(LSTM(1, activation='relu'))
    model.add(RepeatVector(timesteps))
    
    # Decoder
    model.add(LSTM(timesteps, activation='relu', return_sequences=True))
    model.add(LSTM(16, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(n_features)))
    
    model.compile(optimizer=self.optimizer, loss=self.loss)
    model.summary()
    self.model = model
    
  def fit(self, X, epochs=3, batch_size=32):
    self.timesteps = np.shape(X)[1]
    self.build_model()
    
    input_X = np.expand_dims(X, axis=2)
    self.model.fit(input_X, input_X, epochs=epochs, batch_size=batch_size)
    
  def predict(self, X):
    input_X = np.expand_dims(X, axis=2)
    output_X = self.model.predict(input_X)
    reconstruction = np.squeeze(output_X)
    return np.linalg.norm(X - reconstruction, axis=-1)
  
  def plot(self, scores, timeseries, threshold=0.95):
    sorted_scores = sorted(scores)
    threshold_score = sorted_scores[round(len(scores) * threshold)]
    
    plt.title("Reconstruction Error")
    plt.plot(scores)
    plt.plot([threshold_score]*len(scores), c='r')
    plt.show()
    
    anomalous = np.where(scores > threshold_score)
    normal = np.where(scores <= threshold_score)
    
    plt.title("Anomalies")
    plt.scatter(normal, timeseries[normal][:,-1], s=3)
    plt.scatter(anomalous, timeseries[anomalous][:,-1], s=5, c='r')
    plt.show()


In [None]:
lstm_autoencoder = LSTM_Autoencoder(optimizer='adam', loss='mse')
lstm_autoencoder.fit(Train_data, epochs=3, batch_size=32)

In [None]:
scores = lstm_autoencoder.predict(Test_data)
lstm_autoencoder.plot(scores, Test_data, threshold=0.95)

In [None]:

lstm_autoencoder.model.save("LSTM_FP")

In [None]:
#give error and stop code on run all
adsfasdfasdfasdfasdfasdfasdfasdf

# Try Others

## iForest 
Requires data in Pandas data frames

https://towardsdatascience.com/unsupervised-anomaly-detection-in-python-f2e61be17c2b

In [None]:
from pycaret.anomaly import *


In [None]:
Move_dict = dict(Xmoves)
df_Move = pd.DataFrame.from_dict(Move_dict, oreint='index')

In [None]:
df_train = df_Move[:-Split]
df_unseen = df_Move[-Split:]

In [None]:
anom = setup(data = df_train, silent = True)


In [None]:
anom_model = create_model(model = 'iforest', fraction = 0.05)


In [None]:
results = assign_model(anom_model)

In [None]:
plot_model(anom_model, plot = 'tsne')

In [None]:
plot_model(anom_model, plot = 'umap')

In [None]:
anom_model.predict(df_unseen)

In [None]:
anom_model.predict_proba(df_unseen)


In [None]:
anom_model.decision_function(df_unseen)


## Heirarchal clustering
https://medium.com/@jetnew/anomaly-detection-of-time-series-data-e0cb6b382e33

In [None]:
from sklearn.cluster import AgglomerativeClustering

clusters = 3
y_pred = AgglomerativeClustering(n_clusters=clusters).fit_predict(Train_data)


from scipy.cluster.hierarchy import linkage, fcluster, dendrogram

clusters=5
cls = linkage(Train_data, method='ward')
y_pred = fcluster(cls, t=clusters, criterion='maxclust')

dendrogram(cls)
plt.show()

## Multiple Techniques
https://www.kaggle.com/code/victorambonati/unsupervised-anomaly-detection

In [None]:
# libraries
#%matplotlib notebook

import pandas as pd
import numpy as np

import matplotlib
import seaborn
import matplotlib.dates as md
from matplotlib import pyplot as plt

from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.covariance import EllipticEnvelope
#from pyemma import msm # not available on Kaggle Kernel
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM

In [None]:
# some function for later

# return Series of distance between each point and his distance with the closest centroid
def getDistanceByPoint(data, model):
    distance = pd.Series()
    for i in range(0,len(data)):
        Xa = np.array(data.loc[i])
        Xb = model.cluster_centers_[model.labels_[i]-1]
        distance.set_value(i, np.linalg.norm(Xa-Xb))
    return distance

# train markov model to get transition matrix
def getTransitionMatrix (df):
    df = np.array(df)
    model = msm.estimate_markov_model(df, 1)
    return model.transition_matrix

def markovAnomaly(df, windows_size, threshold):
    transition_matrix = getTransitionMatrix(df)
    real_threshold = threshold**windows_size
    df_anomaly = []
    for j in range(0, len(df)):
        if (j < windows_size):
            df_anomaly.append(0)
        else:
            sequence = df[j-windows_size:j]
            sequence = sequence.reset_index(drop=True)
            df_anomaly.append(anomalyElement(sequence, real_threshold, transition_matrix))
    return df_anomaly

In [None]:
#In 13
# calculate with different number of centroids to see the loss plot (elbow method)
n_cluster = range(1, 20)
kmeans = [KMeans(n_clusters=i).fit(Train_data) for i in n_cluster]
scores = [kmeans[i].score(Train_data) for i in range(len(kmeans))]
fig, ax = plt.subplots()
ax.plot(n_cluster, scores)
plt.show()

In [None]:
# Not clear for me, I choose 15 centroids arbitrarily and add these data to the central dataframe
df['cluster'] = kmeans[14].predict(Train_data)
df['principal_feature1'] = Train_data[0]
df['principal_feature2'] = Train_data[1]
df['cluster'].value_counts()