-
Notifications
You must be signed in to change notification settings - Fork 8
/
automarkerlabel.py
1579 lines (1371 loc) · 65.9 KB
/
automarkerlabel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 18 13:49:57 2020
Algorithm for automatic labelling of motion capture markers.
Includes functions for importing motion capture data, generating simulated data,
training the algorithm, and automatic labelling.
@author: aclouthi
"""
import xml.dom.minidom
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from scipy import signal
from scipy import stats
from scipy.optimize import linear_sum_assignment
from scipy.interpolate import CubicSpline
from sklearn.utils.extmath import weighted_mode
from ezc3d import c3d
import h5py
import random
import copy
import pickle
import warnings
import glob
import time
from datetime import date
import os
# Import parameters
filtfreq = 6 # Cut off frequency for low-pass filter for marker trajectories
# Neural network architecture parameters
batch_size = 100
nLSTMcells = 256
nLSTMlayers = 3
LSTMdropout = .17
FCnodes = 128
# Learning parameters
lr = 0.078
momentum = 0.65
# --------------------------------------------------------------------------- #
# ---------------------------- MAIN FUNCTIONS ------------------------------- #
# --------------------------------------------------------------------------- #
def generateSimTrajectories(bodykinpath,markersetpath,outputfile,alignMkR,alignMkL,
fs,num_participants=100,max_len=240):
'''
Generate simulated marker trajectories to use for training the machine learning-
based marker labelling algorithm. Trajectories are generated based on the defined
OpenSim (https://simtk.org/projects/opensim) marker set using body kinematics
for up to 100 participants performing a series of athletic movements.
Parameters
----------
bodykinpath : string
Path to .hdf5 file containing body kinematics of training data
markersetpath : string
Path to .xml file of OpenSim marker set
outputfile : string
Path to save .pickle file of training data
alignMkR : string
Markers to use to align person such that they face +x. This is for the right side.
Suggest acromions or pelvis markers.
alignMkL : string
Markers to use to align person such that they face +x. This is for the left side.
Suggest acromions or pelvis markers.
fs : int
Sampling frequency of data to be labelled.
num_participants : int, optional
Number of participants to include in training data, must be <=100.
The default is 100.
max_len : int, optional
Max length of data segments. The default is 240.
Returns
-------
data : list of numpy arrays
num_frames x num_markers x 3 matrices of marker trajectories of simulated
training data.
'''
# Read marker set
markers, segment, uniqueSegs, segID, mkcoordL, num_mks = import_markerSet(markersetpath)
hf = h5py.File(bodykinpath,'r')
# Get body segment scaling and centre of mass
com = {}
scale = {}
for seg in uniqueSegs:
com[seg] = np.array(hf.get('/com/'+seg))
scale[seg] = np.array(hf.get('/scale/'+seg))
sids = list(hf['kin'].keys())
# Generate simulated trajectories
data = []
R = np.array([[1, 0, 0],[0,0,-1],[0,1,0]]) # Converts from y=up to z=up
for s in range(num_participants):
for t in hf['kin'][sids[s]].keys():
pts = np.zeros((hf['kin'][sids[s]][t]['torso'].shape[2],len(markers),3))
for m in range(len(markers)):
T = np.array(hf['kin'][sids[s]][t][segment[m]])
for i in range(T.shape[2]):
p = np.ones((4,1))
p[:3,0] = np.transpose((np.multiply(mkcoordL[m],scale[segment[m]][s,:]) -
com[segment[m]][s,:]) * 1000)
p = np.matmul(T[:,:,i],p)
pts[i,m,:] = np.transpose(np.matmul(R,p[:3,0]))
cs = CubicSpline(np.arange(0,pts.shape[0],1),pts,axis=0)
pts = cs(np.arange(0,pts.shape[0],120/fs)) # match sampling frequency of data to label
pts = align(pts,markers.index(alignMkR),markers.index(alignMkL))
if pts.shape[0] > max_len:
data.append(torch.from_numpy(pts[:round(pts.shape[0]/2),:,:]))
data.append(torch.from_numpy(pts[round(pts.shape[0]/2):,:,:]))
else:
data.append(torch.from_numpy(pts))
if (s+1) % 10 == 0:
print('%d/%d complete' % (s+1,num_participants))
hf.close()
with open(outputfile,'wb') as f:
pickle.dump(data,f)
print('Training data saved to ' + outputfile)
return data
def trainAlgorithm(savepath,datapath,markersetpath,fs,num_epochs=10,prevModel=None,windowSize=120,
alignMkR=None,alignMkL=None,tempCkpt=None,contFromTemp=False):
'''
Use this function to train the marker labelling algorithm on existing labelled
c3d files or simulated marker trajectories created using
generateSimTrajectories()
Parameters
----------
savepath : string
Folder where trained model should be saved.
datapath : string
Full path to .pickle file containing simualted trajetory training data
or folder containing labelled .c3d files to use as training data.
markersetpath : string
Path to .xml file of OpenSim marker set.
fs : int
Sampling frequency of training data.
num_epochs : int, optional
Number of epochs to train for. The default is 10.
prevModel : string, optional
Path to a .ckpt file of a previously trained neural network if using
transfer learning. Set to None if not using a previous model.
The default is None.
windowSize : int, optional
Desired size of data windows. Not required if using simulated trajectories
to train. The default is 120.
alignMkR : string
Markers to use to align person such that they face +x. This is for the right side.
Suggest acromions or pelvis markers. Not required if using simulated trajectories
to train. The default is None.
alignMkL : string
Markers to use to align person such that they face +x. This is for the left side.
Suggest acromions or pelvis markers. Not required if using simulated trajectories
to train. The default is None.
tempCkpt : string, optional
Path to save a temporary .ckpt file of training progress after each epoch.
Set to None to only save model when training completes
contFromTemp : boolean, optional
Set to True to continue progress from partially completed training .ckpt file
at tempCkpt
Returns
-------
None.
'''
t0 = time.time()
# Read marker set
markers, segment, uniqueSegs, segID, _, num_mks = import_markerSet(markersetpath)
if '.pickle' in datapath:
# Import simulated trajectory data
with open(datapath,'rb') as f:
data_segs = pickle.load(f)
# Filter trajectories
b, a = signal.butter(2,6,btype='low',fs=fs) # 2nd order, low-pass at 6 Hz
for i in range(len(data_segs)):
for k in range(3):
inan = torch.isnan(data_segs[i][:,:,k])
df = pd.DataFrame(data_segs[i][:,:,k].numpy())
df = df.interpolate(axis=0,limit_direction='both')
dummy = torch.from_numpy(signal.filtfilt(b,a,df.to_numpy(),axis=0).copy())
dummy[inan] = np.nan
data_segs[i][:,:,k] = dummy
# Set windows (simulated data is already windowed)
windowIdx = []
for i in range(len(data_segs)):
for m in range(num_mks):
windowIdx.append([i,m,0,data_segs[i].shape[0]])
max_len = max([len(x) for x in data_segs])
print('Loaded simulated trajectory training data')
else:
# Load labelled c3d files for training
filelist = glob.glob(os.path.join(datapath,'*.c3d'))
data_segs, windowIdx = import_labelled_c3ds(filelist,markers,
alignMkR,alignMkL,windowSize)
max_len = max([x[3]-x[2] for x in windowIdx])
print('Loaded c3ds files for training data')
# Calculate values to use to scale neural network inputs and distances between
# markers on same body segment to use for label verification
scaleVals, segdists = get_trainingVals(data_segs,uniqueSegs,segID)
max_len = max([len(x) for x in data_segs])
training_vals = {'segdists' : segdists, 'scaleVals' : scaleVals,'max_len' : max_len}
with open(os.path.join(savepath,'trainingvals_' + date.today().strftime("%Y-%m-%d") + '.pickle'),'wb') as f:
pickle.dump(training_vals,f)
net, running_loss = train_nn(data_segs,num_mks,max_len,windowIdx,
scaleVals,num_epochs,prevModel,tempCkpt,contFromTemp)
with open(os.path.join(savepath,'training_stats_' + date.today().strftime("%Y-%m-%d") + '.pickle'),'wb') as f:
pickle.dump(running_loss,f)
torch.save(net.state_dict(),os.path.join(savepath,'model_'+ date.today().strftime("%Y-%m-%d") + '.ckpt'))
print('Model saved to %s' % os.path.realpath(savepath))
print('Algorithm trained in %s' % (time.time() - t0))
def transferLearning(savepath,datapath,modelpath,trainvalpath,markersetpath,
num_epochs=10,windowSize=120,alignMkR=None,alignMkL=None,
tempCkpt=None,contFromTemp=False):
'''
Use this function to perform transfer learning. Requires a previously trained
model and labelled c3d files to add to training set.
Parameters
----------
savepath : string
Path for save location of trained model.
datapath : string
Path to folder containing labelled .c3d files to add to training set.
modelpath : string
Path to a .ckpt file of a previously trained neural network to use as
base for transfer learning.
trainvalpath : string
Path to training values from previously trained algorithm.
Should match the model used in modelpath.
markersetpath : string
Path to .xml file of OpenSim marker set.
num_epochs : int, optional
Number of epochs to train for. The default is 10.
windowSize : int, optional
Size of windows used to segment data for algorithm. The default is 120.
alignMkR : string, optional
Markers to use to align person such that they face +x. This is for the right side.
Suggest acromions or pelvis markers. The default is None (ie. no alignment).
alignMkL : string, optional
Markers to use to align person such that they face +x. This is for the left side.
Suggest acromions or pelvis markers. The default is None (ie. no alignment).
tempCkpt : string, optional
Path to save a temporary .ckpt file of training progress after each epoch.
Set to None to only save model when training completes
contFromTemp : boolean, optional
Set to True to continue progress from partially completed training .ckpt file
at tempCkpt
Returns
-------
None.
'''
t0 = time.time()
# Read marker set
markers, segment, uniqueSegs, segID, _, num_mks = import_markerSet(markersetpath)
# Load c3d files
filelist = glob.glob(os.path.join(datapath,'*.c3d'))
data_segs, windowIdx = import_labelled_c3ds(filelist,markers,alignMkR,alignMkL,windowSize)
# Load scale values and intra-segment distances
with open(trainvalpath,'rb') as f:
trainingvals = pickle.load(f)
segdists = trainingvals['segdists']
scaleVals = trainingvals['scaleVals']
max_len = trainingvals['max_len']
# Perform transfer learning
net, running_loss = train_nn(data_segs,num_mks,max_len,windowIdx,scaleVals,
num_epochs,modelpath,tempCkpt,contFromTemp)
with open(os.path.join(savepath,'training_stats_plus' + str(len(filelist)) + 'trials_' +
date.today().strftime("%Y-%m-%d") + '.pickle'),'wb') as f:
pickle.dump(running_loss,f)
torch.save(net.state_dict(),os.path.join(savepath,'model_plus' + str(len(filelist)) + 'trials_' +
date.today().strftime("%Y-%m-%d") + '.ckpt'))
# Update intra-segment distances by adding in new training data
nframes = 0
for i in range(len(data_segs)):
nframes = nframes + data_segs[i].shape[0]
for bs in range(len(uniqueSegs)):
I = np.where(segID == bs)[0]
dists = np.zeros((I.shape[0],I.shape[0],nframes))
k = 0
for i in range(len(data_segs)):
pts = data_segs[i]
for m1 in range(len(I)):
for m2 in range(m1+1,len(I)):
dists[m1,m2,k:k+data_segs[i].shape[0]] = (pts[:,I[m1],:] - pts[:,I[m2],:]).norm(dim=1).numpy()
k = k+data_segs[i].shape[0]
# update mean and std based on new data
mn = (segdists['mean'][bs]*segdists['nframes'] + np.nanmean(dists,axis=2)*nframes) / (segdists['nframes'] + nframes) # new mean
sumdiff = np.nansum((dists - np.repeat(np.expand_dims(segdists['mean'][bs],axis=2),nframes,axis=2))**2,axis=2)
segdists['std'][bs] = np.sqrt(((segdists['std'][bs]**2)*(segdists['nframes']-1) + sumdiff - \
(segdists['nframes']+nframes)*(mn-segdists['mean'][bs])**2)/(segdists['nframes']+nframes-1))
segdists['mean'][bs] = mn.copy()
for i in range(1,segdists['mean'][bs].shape[0]):
for j in range(0,i):
segdists['mean'][bs][i,j] = segdists['mean'][bs][j,i]
segdists['std'][bs][i,j] = segdists['std'][bs][j,i]
segdists['nframes'] = segdists['nframes']+nframes
training_vals = {'segdists' : segdists, 'scaleVals' : scaleVals,'max_len' : max_len}
with open(os.path.join(savepath,'trainingvals_plus' + str(len(filelist)) + 'trials_' +
date.today().strftime("%Y-%m-%d") + '.pickle'),'wb') as f:
pickle.dump(training_vals,f)
print('Added %d trials in %f s' % (len(filelist),time.time() - t0))
# --------------------------------------------------------------------------- #
# --------------------------- IMPORT FUNCTIONS ------------------------------ #
# --------------------------------------------------------------------------- #
def import_markerSet(markersetpath):
'''
Read marker set from OpenSim marker set .xml file
Parameters
----------
markersetpath : string
path to .xml file
Returns
-------
markers : list of strings
marker names
segment : list of strings
body segment each marker belongs to
uniqueSegs : list of strings
body segments
segID : list of ints
index of body segment each marker belongs to
mkcoordL : list of numpy arrays
position of each marker relative to the local coordinate system of its
body segment
num_mks : int
number of markers
'''
markersetxml = xml.dom.minidom.parse(markersetpath);
mkxml=markersetxml.getElementsByTagName('Marker')
markers = []
segment = []
mkcoordL = []
for i in range(len(mkxml)):
markers.append(mkxml[i].attributes['name'].value)
segment.append(mkxml[i].childNodes[3].childNodes[0].data)
mkcoordL.append(np.fromstring(mkxml[i].childNodes[7].childNodes[0].data,sep=' '))
segment = [x.split('/')[-1] for x in segment]
uniqueSegs = sorted(list(set(segment)))
segID = -1 * np.ones(len(segment),dtype=np.int64)
for i in range(len(segment)):
segID[i] = uniqueSegs.index(segment[i])
num_mks = len(markers)
return markers, segment, uniqueSegs, segID, mkcoordL, num_mks
def align(data,m1,m2):
'''
Rotate points about z-axis (vertical) so that participant is facing +x direction.
Angle to rotate is calculated based on m1 and m2. These should be the indices of
a marker on the right and left side of the torso or head.
If one of these markers is missing from the entire trial, the data will not be
rotated.
Parameters
----------
data : numpy array
num_frames x num_markers x 3 matrix of marker trajectories
m1 : int
index of the right side marker
m2 : int
index of the left side marker
Returns
-------
data : numpy array
Rotated marker trajectories
'''
# if alignment markers are missing for entire trial, can't align
if np.isnan(data[:,m1,0]).sum() == data.shape[0] or np.isnan(data[:,m2,0]).sum() == data.shape[0]:
return data
else:
# find first non-nan entry for the markers
i = 0
while np.isnan(data[i,m1,0]) or np.isnan(data[i,m2,0]):
i = i+1
pts = data[i,:,:]
v = pts[m2,:] - pts[m1,:] # L - R
theta = np.arctan2(v[0],v[1])
T = np.array([[np.cos(theta),-np.sin(theta),0],
[np.sin(theta),np.cos(theta),0],
[0,0,1]])
dataR = np.empty(data.shape)
for i in range(0,data.shape[0]):
pts = data[i,:,:]
ptsR = np.transpose(np.matmul(T,pts.transpose()))
dataR[i,:,:] = ptsR
return dataR
def window_data(data_segs,windowSize,num_mks):
'''
Determine how to window data.
Parameters
----------
data_segs : list of numpy arrays
num_frames x num_markers x 3 arrays of marker trajectories imported from .c3d files
windowSize : int
desired size of windows
num_mks : int
number of markers of interest
windows will be created for the first num_mks trajectories
Returns
-------
windowIdx : list of lists
indices to use to window data, required input to training function
'''
windowIdx = []
for t in range(len(data_segs)):
pts = data_segs[t]
if torch.is_tensor(pts):
pts = pts.numpy()
for m in range(num_mks):
# only include if it's not all nans
if len(np.where(~np.isnan(pts[:,m,0]))[0]) > 0:
i1 = np.where(~np.isnan(pts[:,m,0]))[0][0] # first visible frame
while i1 < pts.shape[0]:
if (np.isnan(pts[i1:,m,0])).sum() > 0: # if any more nans
i2 = np.where(np.isnan(pts[i1:,m,0]))[0][0] + i1
else:
i2 = pts.shape[0]
while i1 <= i2:
if (i2 - (i1+windowSize) < 12) or (i1 + windowSize > i2):
if i2 - i1 > 0:
# confirm that there are other markers visible in this window
if (~np.isnan(np.concatenate((pts[i1:i2,0:m,:],pts[i1:i2,m+1:,:]),1))).sum() > 0:
windowIdx.append([t,m,i1,i2])
if (~np.isnan(pts[i2:,m,0])).sum() > 1: # any more visible markers?
i1 = i2 + np.where(~np.isnan(pts[i2:,m,0]))[0][0]
else:
i1 = pts.shape[0] + 1
else:
if (~np.isnan(np.concatenate((pts[i1:i2,0:m,:],pts[i1:i2,m+1:,:]),1))).sum() > 0:
windowIdx.append([t,m,i1,i1+windowSize])
i1 = i1 + windowSize
return windowIdx
def import_labelled_c3ds(filelist,markers,alignMkR,alignMkL,windowSize):
'''
Import c3d files for training, sort markers to match marker set order,
filter data, rotate data such that person faces +x, and determine window
indices.
Parameters
----------
filelist : list of strings
list of filepaths to .c3d files to import
markers : list of strings
list of marker names
alignMkR : string
name of marker to use to rotate data on RIGHT side of body,
set to None if rotation is not needed
alignMkL : string
name of marker to use to rotate data on LEFT side of body,
set to None if rotation is not needed
windowSize : int
desired size of windows
Returns
-------
data_segs : list of torch tensors
num_frames x num_markers x 3 tensors of marker trajectories imported from .c3d files
windowIdx : list of lists
indices to use to window data, required input to training function
'''
num_mks = len(markers)
data_segs = []
for trial in filelist:
# Import c3d and reorder points according to marker set order
c3ddat = c3d(trial)
alllabels = c3ddat['parameters']['POINT']['LABELS']['value']
fs = c3ddat['parameters']['POINT']['RATE']['value'][0]
pts = np.nan * np.ones((c3ddat['data']['points'].shape[2],num_mks,3))
for i in range(c3ddat['data']['points'].shape[1]):
j = [ii for ii,x in enumerate(markers) if x in alllabels[i]]
if len(j) == 0:
# if this is an extraneous marker (not part of the defined marker set),
# add to the end of the array
dummy = np.empty((pts.shape[0],1,3))
for k in range(3):
dummy[:,0,k] = c3ddat['data']['points'][k,i,:]
pts = np.append(pts,dummy,axis=1)
elif len(j) == 1:
# If this is part of the marker set
for k in range(3):
pts[:,j[0],k] = c3ddat['data']['points'][k,i,:]
# delete any empty frames at the end
while (~np.isnan(pts[-1,:,0])).sum() == 0:
pts = pts[:-1,:,:]
# rotate so that the person faces +x
if (alignMkR is not None) and (alignMkL !='') and (alignMkL is not None) and (alignMkL !=''):
pts = align(pts,markers.index(alignMkR),markers.index(alignMkL))
# Filter with 2nd order, low-pass Butterworth at filtfreq Hz
b, a = signal.butter(2,filtfreq,btype='low',fs=fs)
for k in range(3):
inan = np.isnan(pts[:,:,k])
df = pd.DataFrame(pts[:,:,k])
df = df.interpolate(axis=0,limit_direction='both')
dummy = signal.filtfilt(b,a,df.to_numpy(),axis=0).copy()
dummy[inan] = np.nan
pts[:,:,k] = dummy
data_segs.append(torch.from_numpy(pts))
windowIdx = window_data(data_segs,windowSize,num_mks)
return data_segs, windowIdx
def import_raw_c3d(file,rotang):
'''
Import an a c3d file for labelling. Trajectories are split at gaps were the
distance the marker travels during the occlusion is greater than the distance to
the closest marker in the next frame. Marker data is rotated 'rotang' degrees
about the z-axis (vertical).
Parameters
----------
file : string
path to the c3d file to be imported
rotang : float
Angle to rotate the marker data about z-axis in DEGREES.
Returns
-------
pts : numpy array
num_frames x num_markers x 3 array of marker trajectories imported from .c3d files
fs : float
sampling frequency used in c3d file
'''
c3ddat = c3d(file) # read in c3d file
rawpts = c3ddat['data']['points'][0:3,:,:].transpose((2,1,0)) # Get points from c3d file
fs = c3ddat['parameters']['POINT']['RATE']['value'] # sampling frequency
rawlabels = c3ddat['parameters']['POINT']['LABELS']['value']
# # Try to find and fix places where the markers swap indices
# thresh = 20
# for m in range(rawpts.shape[1]):
# kf = np.where(np.isnan(rawpts[1:,m,0]) != np.isnan(rawpts[0:-1,m,0]))[0]
# if ~np.isnan(rawpts[0,m,0]):
# kf = np.insert(kf,0,-1,axis=0)
# if ~np.isnan(rawpts[-1,m,0]):
# kf = np.concatenate((kf,[rawpts.shape[0]-1]))
# kf = np.reshape(kf,(-1,2))
# k = 0
# while k < kf.shape[0]-1:
# d = np.linalg.norm(rawpts[kf[k+1,0]+1,m,:] - rawpts[kf[k,1],m,:])
# all_d = np.linalg.norm(rawpts[kf[k,1]+1,:,:] - rawpts[kf[k,1],m,:],axis=1)
# all_d[m] = np.nan
# if (~np.isnan(all_d)).sum() > 0:
# if d > np.nanmin(all_d) and np.nanmin(all_d) < thresh and \
# np.isnan(rawpts[kf[k,1],np.nanargmin(all_d),0]):
# dummy = rawpts[kf[k,1]+1:,m,:].copy()
# rawpts[kf[k,1]+1:,m,:] = rawpts[kf[k,1]+1:,np.nanargmin(all_d),:]
# rawpts[kf[k,1]+1:,np.nanargmin(all_d),:] = dummy.copy()
# kf = np.where(np.isnan(rawpts[1:,m,0]) != np.isnan(rawpts[0:-1,m,0]))[0]
# if ~np.isnan(rawpts[0,m,0]):
# kf = np.insert(kf,0,0,axis=0)
# if ~np.isnan(rawpts[-1,m,0]):
# kf = np.concatenate((kf,[rawpts.shape[0]-1]))
# kf = np.reshape(kf,(-1,2))
# k = k+1
# Wherever there is a gap, check if the marker jumps further than the distance to the
# next closest marker. If so, split it into a new trajectory.
pts = np.empty((rawpts.shape[0],0,3))
labels = []
for m in range(rawpts.shape[1]):
# key frames where the marker appears or disappears
kf = np.where(np.isnan(rawpts[1:,m,0]) != np.isnan(rawpts[0:-1,m,0]))[0]
if ~np.isnan(rawpts[0,m,0]):
kf = np.insert(kf,0,-1,axis=0)
if ~np.isnan(rawpts[-1,m,0]):
kf = np.concatenate((kf,[rawpts.shape[0]-1]))
kf = np.reshape(kf,(-1,2))
k = 0
while k < kf.shape[0]:
i1 = kf[k,0]
d = 0
gapsize = 0
min_d = 1000
while d < min_d and gapsize < 60:
if k < kf.shape[0]-1:
d = np.linalg.norm(rawpts[kf[k+1,0]+1,m,:] - rawpts[kf[k,1],m,:])
all_d = np.linalg.norm(rawpts[kf[k,1]+1,:,:] - rawpts[kf[k,1],m,:],axis=1)
all_d[m] = np.nan
if (~np.isnan(all_d)).sum() > 0:
min_d = np.nanmin(all_d)
else:
min_d = 1000
gapsize = kf[k+1,0] - kf[k,1]
else:
gapsize = 61
k=k+1
if kf[k-1,1] - i1 > 2:
traj = np.nan * np.ones((rawpts.shape[0],1,3))
traj[i1+1:kf[k-1,1]+1,0,:] = rawpts[i1+1:kf[k-1,1]+1,m,:]
pts = np.append(pts,traj,axis=1)
labels.append(rawlabels[m])
# Angle to rotate points about z-axis
rotang = float(rotang) * np.pi/180
Ralign = np.array([[np.cos(rotang),-np.sin(rotang),0],
[np.sin(rotang),np.cos(rotang),0],
[0,0,1]])
for i in range(pts.shape[1]):
pts[:,i,:] = np.matmul(Ralign,pts[:,i,:].transpose()).transpose()
return pts, fs, labels
# --------------------------------------------------------------------------- #
# ------------------------- NEURAL NET FUNCTIONS ---------------------------- #
# --------------------------------------------------------------------------- #
def get_trainingVals(data_segs,uniqueSegs,segID):
'''
Calculates the values that will be used to scale the input matrix to the neural
network. These are the mean observed relative distances, velocities, and
accelerations from 2000 trials from the training set.
Calculates the mean and standard deviation of distances among markers belonging
to each body segment. These are used to validate and correct the labels
predicted by the neural network.
Parameters
----------
data_segs : list of torch tensors
num_frames x num_markers x 3 tensors of marker trajectories in training set
uniqueSegs : list of strings
body segment names
segID : list of ints
index of body segments each marker belongs to
Returns
-------
scaleVals : list of floats
mean relative distance, velocity, and acceleration in training set and
number of data frames used to calculate these
segdists : dictionary
['mean'] : numpy arrays for each body segment containing mean distances
among associated markers in training set
['std'] : numpy arrays for each body segment containing standard deviation
of distances among associated markers in training set
['nframes'] : number of frames used to calculate these values
'''
# Get scale values
sumDist = 0.0
sumVel = 0.0
sumAccn = 0.0
nDist = 0.0
nVel = 0.0
nAccn = 0.0
# Only use 2000 segments to save computing time
if len(data_segs) > 2000:
I = random.sample(range(len(data_segs)),2000)
else:
I = range(len(data_segs))
for i in I:
for m in range(int(data_segs[i].shape[1])):
# marker distances relative to marker m
xyz = data_segs[i] - data_segs[i][:,m,:].unsqueeze(1).repeat(1,data_segs[i].shape[1],1)
xyz_v = xyz[1:,:,:] - xyz[0:xyz.shape[0]-1,:,:]
xyz_v_norm = xyz_v.norm(dim=2)
xyz_a = xyz_v[1:,:,:] - xyz_v[0:xyz_v.shape[0]-1,:,:]
xyz_a_norm = xyz_a.norm(dim=2)
sumDist = sumDist + np.nansum(abs(xyz))
nDist = nDist + (~torch.isnan(xyz[:,0:m,:])).sum() + (~torch.isnan(xyz[:,m+1:,:])).sum() #xyz.shape[0]*(xyz.shape[1]-1)*xyz.shape[2]
sumVel = sumVel + np.nansum(xyz_v_norm)
nVel = nVel + (~torch.isnan(xyz_v_norm[:,0:m])).sum() + (~torch.isnan(xyz_v_norm[:,m+1:])).sum() #xyz_v_norm.shape[0] * (xyz_v_norm.shape[1]-1)
sumAccn = sumAccn + np.nansum(xyz_a_norm)
nAccn = nAccn + (~torch.isnan(xyz_a_norm[:,0:m])).sum() + (~torch.isnan(xyz_a_norm[:,m+1:])).sum() # xyz_a_norm.shape[0] * (xyz_a_norm.shape[1]-1)
scaleVals = [sumDist/nDist, sumVel/nVel, sumAccn/nAccn,nDist,nVel,nAccn]
# Calculate distances between markers on same body segments
dists_mean = []
dists_std = []
nframes = 0
for i in range(len(data_segs)):
nframes = nframes + data_segs[i].shape[0]
for bs in range(len(uniqueSegs)):
I = np.where(segID == bs)[0] # indices of markers on this body segment
dists = np.zeros((I.shape[0],I.shape[0],nframes))
dists_mean.append(np.zeros((I.shape[0],I.shape[0])))
dists_std.append(np.zeros((I.shape[0],I.shape[0])))
k = 0
for i in range(len(data_segs)):
pts = data_segs[i]
for m1 in range(len(I)):
for m2 in range(m1+1,len(I)):
dists[m1,m2,k:k+data_segs[i].shape[0]] = \
(pts[:,I[m1],:] - pts[:,I[m2],:]).norm(dim=1).numpy()
k = k+data_segs[i].shape[0]
dists_mean[bs] = dists.mean(axis=2)
dists_std[bs] = dists.std(axis=2)
for i in range(1,dists_mean[bs].shape[0]):
for j in range(0,i):
dists_mean[bs][i,j] = dists_mean[bs][j,i]
dists_std[bs][i,j] = dists_std[bs][j,i]
segdists = {'mean' : dists_mean,'std' : dists_std,'nframes' : nframes}
return scaleVals, segdists
# Generates the input data for the neural network.
class markerdata(torch.utils.data.Dataset):
def __init__(self,marker_data,num_mks,windowIdx,scaleVals):
self.marker_data = copy.deepcopy(marker_data)
self.num_mks = num_mks # Number of marker labels
self.windowIdx = windowIdx
self.scaleVals = scaleVals
def __len__(self): # Should be the number of items in this dataset
return len(self.windowIdx)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist() # index will be row major
num_mks = self.num_mks
t = self.windowIdx[idx][0]
m = self.windowIdx[idx][1]
i1 = self.windowIdx[idx][2]
i2 = self.windowIdx[idx][3]
xyz_raw = copy.deepcopy(self.marker_data[t][i1:i2,:,:])
xyz_m = xyz_raw[:,m,:] # current marker
xyz_raw = torch.cat((xyz_raw[:,0:m,:],xyz_raw[:,m+1:,:]),dim=1)
# check what is visible at this time and take the markers that are
# visible for the greatest number of frames
mksvis = (~torch.isnan(xyz_raw[:,:,0])).sum(dim=0)
if (mksvis == xyz_raw.shape[0]).sum() > num_mks:
# then also sort by distance
xyz_raw = xyz_raw[:,mksvis==xyz_raw.shape[0],:]
d = (xyz_raw - xyz_m.unsqueeze(1).repeat(1,xyz_raw.shape[1],1)).norm(dim=2)
_,I = (d.mean(0)).sort()
xyz_raw = xyz_raw[:,I[0:num_mks-1],:]
else:
_,I = mksvis.sort(descending=True)
xyz_raw = xyz_raw[:,I[0:num_mks-1],:]
# Fill in any missing markers with the mean of all of the other visible markers
if torch.isnan(xyz_raw[:,:,0]).sum() > 0:
inan = torch.where(torch.isnan(xyz_raw))
xyz_raw[inan] = torch.take(torch.from_numpy(np.nanmean(xyz_raw,1)),
inan[0]*3+inan[2])
if torch.isnan(xyz_raw[:,:,0]).any(1).sum() > 0:
# if there somehow ended up to be empty frames, delete them
xyz_m = xyz_m[~torch.isnan(xyz_raw[:,:,0]).any(1),:]
xyz_raw = xyz_raw[~torch.isnan(xyz_raw[:,:,0]).any(1),:,:]
xyz_raw = xyz_raw - xyz_m.unsqueeze(1).repeat(1,xyz_raw.shape[1],1)
d = (xyz_raw.mean(dim=0)).norm(dim=1)
_, I = d.sort() # Sort trajectories by distance relative to marker m
xyz = xyz_raw[:,I,:]
# Add in velocity and accn
xyz_v = torch.zeros(xyz.shape,dtype=xyz.dtype)
xyz_v[1:,:,:] = xyz[1:,:,:] - xyz[0:xyz.shape[0]-1,:,:]
xyz_v_norm = xyz_v.norm(dim=2)
if xyz_v_norm.shape[0] > 1:
xyz_v_norm[0,:] = xyz_v_norm[1,:]
xyz_a = torch.zeros(xyz.shape,dtype=xyz.dtype)
xyz_a[1:,:,:] = xyz_v[1:xyz_v.shape[0],:,:] - xyz_v[0:xyz_v.shape[0]-1,:,:]
xyz_a_norm = xyz_a.norm(dim=2)
if xyz_a_norm.shape[0] > 2:
xyz_a_norm[1,:] = xyz_a_norm[2,:]
xyz_a_norm[0,:] = xyz_a_norm[2,:]
# Scale input data
xyz = xyz / self.scaleVals[0]
xyz_v_norm = xyz_v_norm / self.scaleVals[1]
xyz_a_norm = xyz_a_norm / self.scaleVals[2]
out = torch.cat((xyz,xyz_v_norm.unsqueeze(2),xyz_a_norm.unsqueeze(2)),2)
out = out.reshape(-1,(num_mks-1)*5)
return out, m, idx
# Collate function for data loader. Pads the data to make it equally sized.
def pad_collate(batch):
batch = list(filter(lambda xx:xx[0] is not None,batch))
(X,Y,T) = zip(*batch)
# filter out None entries
x_lens = [len(x) for x in X]
Y_out = [y for y in Y]
T_out = [t for t in T]
X_pad = nn.utils.rnn.pad_sequence(X,batch_first=True,padding_value=0)
return X_pad, Y_out, T_out, x_lens
# Define network architecture
class Net(nn.Module):
def __init__(self, max_len,num_mks):
super(Net,self).__init__()
self.max_len = max_len
self.lstm = nn.LSTM((num_mks-1)*5,nLSTMcells,num_layers=nLSTMlayers,dropout=LSTMdropout)
self.fc = nn.Sequential(nn.Linear(max_len*nLSTMcells,FCnodes),
nn.BatchNorm1d(FCnodes),
nn.ReLU(),
nn.Linear(FCnodes,num_mks))
def forward(self,x,x_lens):
out = torch.nn.utils.rnn.pack_padded_sequence(x.float(),x_lens,batch_first=True,enforce_sorted=False)
out, (h_t,h_c) = self.lstm(out)
out,_ = torch.nn.utils.rnn.pad_packed_sequence(out,batch_first=True,total_length=self.max_len)
out = self.fc(out.view(out.shape[0],-1))
return out
def train_nn(data_segs,num_mks,max_len,windowIdx,scaleVals,num_epochs,prevModel,
tempCkpt=None,contFromTemp=False):
'''
Train the neural network.
Will use GPU if available.
Parameters
----------
data_segs : list of torch tensors
num_frames x num_markers x 3 tensors of marker trajectories in training set
num_mks : int
number of markers in marker set
max_len : int
maximum window length
windowIdx : list of lists
indices to use to window data, required input to training function
scaleVals : list of floats
mean relative distance, velocity, and acceleration in training set and
number of data frames used to calculate these. Used to scale variables
before inputting to neural network.
num_epochs : int
number of epoch to train for
prevModel : string
path to the .ckpt file for a previously trained model if using transfer
learning
set to None if not using transfer learning
tempCkpt : string
path to save model training progress after each epoch .ckpt
Set to None to only save after training completes
contFromTemp : boolean
set to True to continue a partially completed training from the tempCkpt file
Returns
-------
net : torch.nn.Module
trained neural network
running_loss: list of floats
running loss for network training
'''
# Use GPU if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Create dataset and torch data loader
traindata = markerdata(data_segs,num_mks,windowIdx,scaleVals)
trainloader = torch.utils.data.DataLoader(traindata,batch_size=batch_size,
shuffle=True,collate_fn=pad_collate)
# Create neural net
net = Net(max_len,num_mks).to(device)
# Load previous model if transfer learning
if (prevModel is not None) and (prevModel != ''):
net.load_state_dict(torch.load(prevModel,map_location=device))
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer= torch.optim.SGD(net.parameters(), lr=lr, momentum=momentum)
# Load a partially trained model to complete training
if contFromTemp == True:
checkpoint = torch.load(tempCkpt)
net.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch0 = checkpoint['epoch']
running_loss = checkpoint['running_loss']
loss = checkpoint['loss']
torch.set_rng_state(checkpoint['rng_state'])
else:
epoch0 = 0
running_loss = []
# Train Network
total_step = len(trainloader)
for epoch in range(epoch0,num_epochs):
for i, (data, labels, trials, data_lens) in enumerate(trainloader):
data = data.to(device)
labels = torch.LongTensor(labels)
labels = labels.to(device)
# Forward pass
outputs = net(data,data_lens)
loss = criterion(outputs,labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss.append(loss.item())
# Print stats
if (i+1) % 10 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1,num_epochs,i+1,
total_step,loss.item()))
if tempCkpt is not None:
torch.save({'epoch': epoch+1,'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),'running_loss': running_loss,'loss' : loss,
'rng_state': torch.get_rng_state()},tempCkpt)
return net, running_loss
def predict_nn(modelpath,pts,windowIdx,scaleVals,num_mks,max_len):
'''
Run the neural network to get label probabilities
Parameters
----------
modelpath : string
path to .ckpt file of trained neural network weights
pts : numpy array
num_frames x num_markers x 3 array of marker trajectories to be labelled
windowIdx : list of lists
indices to use to window data, required input to training function
scaleVals : list of floats
mean relative distance, velocity, and acceleration in training set and
number of data frames used to calculate these. Used to scale variables
before inputting to neural network.
num_mks : int
number of markers in marker set
max_len : int
max length of data windows
Returns
-------
probWindow : torch tensor
num_windows x num_mks tensor of label probabilities for each window