/
extract.py
526 lines (373 loc) · 16.5 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
'''
Author: John Dogan
Extract features from given sensor file for Android in format (time,sensor type,x,y,z)
Writes data like height, width, and distance up and down to a file.
Uses 50% window overlapping
Returns Features for the X, Y, and Z axis
'''
from collections import defaultdict
import numpy
'''
Open sensor file and extract data from it and write information to new file
'''
class extract():
allWindowFeaturesX = []
allWindowFeaturesY = []
allWindowFeaturesZ = []
def __init__(self, filename):
# Open Sensor File
file = open(filename, 'r')
# Read lines
lines = file.readlines()
# Seperated data
time,x,y,z = self.seperateData(lines)
# Features for X axis
self.allWindowsFeaturesX = self.writeFeatures(time, x, "X_" + filename)
# Features for Y axis
self.allWindowsFeaturesY = self.writeFeatures(time, y, "Y_" + filename)
# Features for Z axis
self.allWindowsFeaturesZ = self.writeFeatures(time, z, "Z_" + filename)
'''
Return X,Y,Z features in file
@return allWindowFeaturesX, all..Y, all..Z: Returns all X,Y,Z features in a list
'''
def getAll(self):
#RETURN ALL AXIS
return self.allWindowsFeaturesX, self.allWindowsFeaturesY, self.allWindowsFeaturesZ;
'''
Seperate data in data file
@return time,x,y,z: Returns all separated data from given sensor file
'''
def seperateData(self, lines):
#DECLARE X,Y,Z, and Time
time = []
x = []
y = []
z = []
#GET ALL TIMES AND X,Y,Z separately
for line in lines:
temp = line.split()
time.append(int(temp[0]))
x.append(float(temp[2]))
y.append(float(temp[3]))
z.append(float(temp[4]))
return time,x,y,z;
'''
Write features to new file
1. ORGANIZE DATA INTO WINDOWS
2. EXTRACT FEATURES
3. WRITE FEATURES AND EXTRACT FROM HALF CYCLES
'''
def writeFeatures(self, time, axis, fileName):
###############################################################
# 1. ORGANIZE DATA INTO WINDOWS
###############################################################
#CREATE FILE TO WRITE IN
#file = open(fileName , 'w')
#File created to extract into
#extractFile = open("features_" + fileName, 'w')
#extractFile.write("UpPeak DownPeak Cycle MeanOfAllAxis SDofAllAxis RMSofAllAxis MinMaxofAllAxis ""HeightFeature WidthFeature MeanofCycleHeight MeanofCycleWidth MeanOfCycleDistance ""SDofCycleHeight SDofCycleWidth SDofCycleDistance ...\n")
#DICT OF TIME AND AXIS ITEMS
timeDict = defaultdict(list)
axisDict = defaultdict(list)
allWindowFeatures = [];
features = []
#POSITION OF PLACE IN WINDOW
i = 0
#NEW WINDOW
window = 1
windowStart = time[i]
windowSize = 1500 #1.5 Seconds
gotAlready = False
nextWindowStartPosition = i
#CREATE WINDOWS AND APPEND TO LIST
while i in range(len(time)):
#NEXT WINDOW
if(time[i] > windowStart+windowSize):
window = 1 + window
i = nextWindowStartPosition
windowStart = windowStart+(windowSize/2)
gotAlready = False
continue
#IF LARGER THAN HALF, SET NEW WINDOW 50% overlapping windows
if ((time[i] > windowStart + windowSize / 2) & (gotAlready == False)):
nextWindowStartPosition = i
gotAlready = True
#APPEND TIME AND AXIS TO KEY WINDOW
timeDict[window].append(time[i])
axisDict[window].append(axis[i])
#MOVE UP IN WINDOW
i = 1 + i
###############################################################
# 2. EXTRACT FEATURES HERE! THIS PART HANDLES PEAKS
# Use timeDict and axisDict to compare items
###############################################################
#Overall Dict
i = 1
#COUNT Peaks in DATA
upPeak = []
downPeak = []
#ALL PEAKS IN WHOLE FILE
upPeaks = 0
downPeaks = 0
#List Version
listUpPeaks = [];
listDownPeaks = [];
#IS THIS A HALF CYCLE? TURNS TRUE WHEN IT REACHES A PEAK AND ENDS AT HIGHEST PEAK OR LOWEST PEAK
cycles = 0
cycle = False
lastSign = ''
#TIME VALUES
startTime = []
endTime = []
#AXIS VALUES
startAxis = []
endAxis = []
#FEATURES FOR CYCLE
listHeight = []
listWidth = []
listDistance = []
#FOR ALL AXIS AND TIME IN WINDOW
listAxis = []
listTime = []
#WRITE ALL UP AND DOWNS IN WINDOW AND WRITE/EXTRACT FEATURES
while i in range(len(timeDict)+1):
#WINDOW NUMBER
#file.write("WINDOW: " + str(i) + "\n")
#file.write("--------------------------------------------\n\n")
#For range in current window
for w in range(len(timeDict[i])):
# WRITE PLACE IN WINDOW
#file.write("- TIME: " + str(timeDict[i][w]) + " AXIS: " + str(axisDict[i][w]) + "\n\n")
# FEATURE EXTRACTION FROM ALL DATA
listAxis.append(axisDict[i][w])
listTime.append(timeDict[i][w])
# STATEMENT CREATED FOR DATA IN HALF CYCLES
#IF NOT LAST IN WINDOW THEN COMPARE
if(len(axisDict[i]) != w+1 ):
#FEATURE EXTRACTION ALL HALF CYCLE DATA
#IF DATA GOES DOWN
if(axisDict[i][w] > axisDict[i][w+1]):
#file.write("POSITION: DOWN\n\n")
# START OF CYCLE
if((lastSign == 'DOWN') & (cycle == False)):
cycle = True
# UP PEAK
upPeak.append(timeDict[i][w - 1])
startTime.append(timeDict[i][w - 1])
startAxis.append(axisDict[i][w - 1])
# END OF CYCLE ENDS IN
elif(((lastSign == "UP") & (cycle == True))):
cycle = False
cycles = cycles + 1
# UP PEAK
upPeak.append(timeDict[i][w])
endTime.append(timeDict[i][w])
endAxis.append(axisDict[i][w])
# NEW SIGN
lastSign = "DOWN"
#IF DATA GOES UP
elif(axisDict[i][w] < axisDict[i][w+1]):
#file.write("POSITION: UP\n\n")
#START OF CYCLE
if((lastSign == 'UP') & (cycle == False)):
# HALF CYCLE STARTS
cycle = True
# DOWN PEAK
downPeak.append(timeDict[i][w - 1])
startTime.append(timeDict[i][w - 1])
startAxis.append(axisDict[i][w - 1])
#END OF HALF CYCLE
elif((lastSign == "DOWN") & (cycle == True)):
#HALF CYCLE ENDS
cycle = False
cycles = cycles + 1
# DOWN PEAK
downPeak.append(timeDict[i][w])
endTime.append(timeDict[i][w])
endAxis.append(axisDict[i][w])
# NEW SIGN
lastSign = "UP"
#IF STILL IN CYCLE AND WINDOW HAS ENDED.
#THE START IS DELETED
elif(cycle == True):
cycle = False
# Last start is removed from list
# No longer in half cycle
startTime.pop()
startAxis.pop()
#REMOVE ALL SAME OCCURENCES
upPeak = list(set(upPeak))
downPeak = list(set(downPeak))
#file.write("\n"+str(upPeak))
#file.write("\n"+str(downPeak)+"\n")
#WRITE UPS AND DOWNS EXTRACTED
#file.write("UP PEAKS: " + str(len(upPeak)) + "\n")
#file.write("DOWN PEAKS: " + str(len(downPeak)) + "\n")
#file.write("HALF CYCLES: " + str(cycles) + "\n")
#WRITE TO EXTRACT FILE
#extractFile.write(str(len(upPeak)) + " " + str(len(downPeak)) + " " + str(cycles) + " ")
features.append(str(len(upPeak)))
features.append(str(len(downPeak)))
features.append(str(cycles))
#ADD UP PEAKS AND DOWNPEAKS INTO LIST FOR varAPF at the end
listUpPeaks.append(len(upPeak))
listDownPeaks.append(len(downPeak))
#WRITE START AND ENDS
#file.write("\nSTARTS: " + str(startTime) +"\n")
#file.write("ENDS: " + str(endTime)+"\n")
#####################################################################
# 3. WRITE FEATURES HERE (WRITES WHILE LOOPING THROUGH EACH WINDOW)
# USED TO WRITE AND EXTRACT FEATURES AND RESET DATA
#####################################################################
#FOR NUMBER OF HALF CYCLES IN WINDOW EXTRACT BASIC FEATURES
for s in range(len(startTime)):
#EXTRACT HEIGHT
height = abs(startAxis[s] - endAxis[s])
#GREATEST IS IN FRONT OF LIST
if((len(listHeight) > 0)):
if(listHeight[0] < height):
listHeight.insert(0, height)
elif(listHeight[0] > height):
listHeight.append(height)
else:
listHeight.append(height)
#EXTRACT WIDTH
width = endTime[s] - startTime[s]
# GREATEST IS IN FRONT OF LIST
if(len(listWidth) > 0):
if((listWidth[0] < width)):
listWidth.insert(0, width)
elif ((listWidth[0] > width)):
listWidth.append(width)
else:
listWidth.append(width)
#EXTRACT DISTANCE
distance = ((height**2) + (width**2)) ** .5
listDistance.append(distance)
#WRITE FEATURES
#file.write("\nHEIGHT [" + str(s+1) + "]: " + str(height) + "\n")
#file.write("WIDTH [" + str(s+1) + "]: " + str(width) + "\n")
#file.write("DISTANCE [" + str(s + 1) + "]: " + str(distance) + "\n")
#file.write("\n")
#ADD ALL IN LIST EXCEPT THE FIRST
totalHeight = sum(listHeight[1:])
#ADD ALL IN LIST EXCEPT THE LAST
totalWidth = sum(listWidth[:-1])
#HEIGHT AND WIDTH FEATURES
if((len(listHeight)-1 > 0) & (len(listWidth)-1 > 0) ):
#AVG OF HEIGHT AND WIDTH EXCLUDING THE GREATEST FOR HEIGHT AND THE LEAST FOR WIDTH
avgHeight = totalHeight/(len(listHeight)-1)
avgWidth = totalWidth/(len(listWidth)-1)
#SUBTRACT AVERAGE FROM HIGHEST OF ALL
featureHeight = listHeight[0] - avgHeight
#SUBTRACT AVERAGE FROM THE LEAST WIDTH
featureWidth = avgWidth - listWidth[-1]
else:
# SUBTRACT AVERAGE FROM HIGHEST OF ALL
featureHeight = 0
# SUBTRACT AVERAGE FROM THE LEAST WIDTH
featureWidth = 0
# -ALL IN WINDOW
# MEAN FOR ALL DATA
meanAxis = numpy.mean(listAxis)
# SD FOR ALL DATA
sdAxis = numpy.std(listAxis)
# RMS FOR ALL DATA
rmsAxis = numpy.sqrt(numpy.mean(numpy.square(listAxis)))
# MINMAX FOR ALL DATA
# ORGANIZE FROM LEAST TO GREATEST USING QUICKSORT
listAxis = numpy.sort(listAxis, kind='quicksort')
minMaxAxis = abs(listAxis[0] - listAxis[-1])
# THIS IS DATA COLLECTED FROM ALL CYCLES
#file.write("MEAN OF ALL AXIS: " + str(meanAxis) + "\n\n")
#file.write("SD OF ALL AXIS: " + str(sdAxis) + "\n\n")
#file.write("RMS OF ALL AXIS: " + str(rmsAxis) + "\n\n")
#file.write("MINMAX OF ALL AXIS: " + str(minMaxAxis) + "\n\n")
#WRITE TO EXTRACT FILE
#extractFile.write(str(meanAxis) + " " + str(sdAxis) + " " + str(rmsAxis) + " " + str(minMaxAxis) + " ")
features.append(str(meanAxis))
features.append(str(sdAxis))
features.append(str(rmsAxis))
features.append(str(minMaxAxis))
# DATA IN HALF CYCLES
# LIST MUST BE BIGGER THAN 0
if(len(listHeight) > 0):
# -ALL IN CYCLE
#ADD CYCLE HEIGHT
cycleHeight = sum(listHeight)
cycleWidth = sum(listWidth)
cycleDistance = sum(listDistance)
# GET THE MEAN OF THE VALUES
avgHeight = cycleHeight / len(listHeight)
avgWidth = cycleWidth / len(listWidth)
# avgDistance is made for further distance analysis
avgDistance = cycleDistance / len(listDistance)
# GET SD OF THE VALUES
sdHeight = numpy.std(listHeight)
sdWidth = numpy.std(listWidth)
sdDistance = numpy.std(listDistance)
#CONTINUE WRITING FEATURES AT END OF HALF CYCLE
#file.write("HEIGHT FEATURE: " + str(featureHeight) + "\n")
#file.write("WIDTH FEATURE: " + str(featureWidth) + "\n\n")
#file.write("MEAN OF CYCLE HEIGHT: " + str(avgHeight) + "\n")
#file.write("MEAN OF CYCLE WIDTH: " + str(avgWidth) + "\n")
#file.write("MEAN OF CYCLE DISTANCE: " + str(avgDistance) + "\n\n")
#file.write("SD OF CYCLE HEIGHT: " + str(sdHeight) + "\n")
#file.write("SD OF CYCLE WIDTH: " + str(sdWidth) + "\n")
#file.write("SD OF CYCLE DISTANCE: " + str(sdDistance) + "\n")
#Write features to Extract
#extractFile.write(str(featureHeight) + " " + str(featureWidth) + " " + str(avgHeight) + " " +
##str(avgWidth) + " " + str(avgDistance) + " " + str(sdHeight) + " " + str(sdWidth) +
##" " + str(sdDistance) + "\n")
features.append(str(featureHeight))
features.append(str(featureWidth))
features.append(str(avgHeight))
features.append(str(avgWidth))
features.append(str(avgDistance))
features.append(str(sdHeight))
features.append(str(sdWidth))
features.append(str(sdDistance))
#file.write("\n")
#file.write("--------------------------------------------\n")
#ADD ALL PEAKS
upPeaks = len(upPeak) + upPeaks
downPeaks = len(downPeak) + downPeaks
#RESET DATA COUNT FOR NEXT WINDOW
allWindowFeatures.append(features)
features = []
upPeak = [];
downPeak = [];
lastSign = "";
cycle = False;
cycles = 0
listHeight = []
listWidth = []
listDistance = []
listAxis = []
listTime = []
#RESET START AND END TIME VALUES
startTime = []
endTime = []
#RESET START AND END AXIS VALUES
startAxis = []
endAxis = []
#NEXT WINDOW IN DICTIONARY
i = i + 1;
#NUMBER OF WINDOWS
windows = i-1
#APF
apfUpPeak = upPeaks/windows
apfDownPeak = downPeaks/windows
#varAPF
varAPFUpPeak = numpy.std(listUpPeaks)
varAPFDownPeak = numpy.std(listDownPeaks)
#WRITE PEAK DATA
#file.write("\nAPF OF UPPEAKS: " + str(apfUpPeak) + "\n")
#file.write("APF OF DOWNPEAKS: " + str(apfDownPeak) + "\n")
#file.write("\nVarAPF OF UPPEAKS: " + str(varAPFUpPeak) + "\n")
#file.write("VarAPF OF DOWNPEAKS: " + str(varAPFDownPeak) + "\n\n")
# CLOSE FILE
#file.close()
#extractFile.close()
return allWindowFeatures;