 The goal is to provide a database of false examples for training the algorithm on a chosen exercise, waterbag hard.
 All the data provided is put together in a single table and repetitions of each exercise are separated into unique rows of data.
 The main difficulty is to resize the range of movements so they all fit into the database. Within one exercise the length of movements is rather similar so deciding on a standard time does not impact the result; but using the same time for all exercises is misleading. The chosen solution is to remove or add (using forward-fill) lines of data at regular intervals until all data fit into the fixed 137 (i.e. 1.37 seconds) length, which is appropriate for the waterbag exercise.

In [1]:
import pandas as pd
trackers=pd.read_csv('data1.csv')
trackers.head()

Unnamed: 0,gyroX,gyroY,gyroZ,lowAccelX,lowAccelY,lowAccelZ,highAccelX,highAccelY,highAccelZ,exercise
0,617.0,342.0,-1120.0,1338.0,2865.0,2215.0,-267.0,686.0,621.0,(jab_)cross_shadowboxing_medium_20_T7R_1531551...
1,477.0,-49.0,-1214.0,1516.0,3041.0,2332.0,-249.0,704.0,633.0,(jab_)cross_shadowboxing_medium_20_T7R_1531551...
2,228.0,-596.0,-1256.0,1602.0,3406.0,1796.0,-240.0,740.0,579.0,(jab_)cross_shadowboxing_medium_20_T7R_1531551...
3,24.0,-1098.0,-1276.0,1264.0,3484.0,1194.0,-274.0,748.0,519.0,(jab_)cross_shadowboxing_medium_20_T7R_1531551...
4,-164.0,-1552.0,-1236.0,1016.0,3876.0,576.0,-299.0,787.0,457.0,(jab_)cross_shadowboxing_medium_20_T7R_1531551...


In [2]:
#Using a margin of 68 before and after the maximum as a standard based on waterbag hard (68*2+1=137)
sensors_columns=list(trackers.columns)
sensors_columns.remove('exercise')
col_names=[sensors_columns[i]+str(n) for n in range(0,137) for i in range(0,len(sensors_columns)) ]
flat_trackers = pd.DataFrame(columns=col_names)

In [3]:
#Find max of each row
trackers['max_value']=trackers.max(axis=1)
trackers.shape

(361386, 11)

In [4]:
trackers.dropna(inplace=True)

In [5]:
trackers.shape

(361310, 11)

In [6]:
#Count movements in each series from the series title (when the count is indicated)
trackers['repetitions']=trackers['exercise'].str.extract('(\d+)_T7', expand=True)
trackers.repetitions = pd.to_numeric(trackers.repetitions,errors='coerce')
#for series of noise, the range is a factor of the length of time / 137 (our chosen standard length based on waterbag_hard)
trackers['length'] = trackers.groupby(['exercise']).transform('count')['gyroX']
fill_value=trackers['length']/137
trackers['repetitions'].fillna(round(fill_value,0),inplace=True)
trackers.repetitions.unique()

array([ 20.,  10.,   8.,   9., 177., 178.,  40.,  44.,  32., 125., 121.,
       120.,  19.])

In [7]:
#Create a list of exercise names (there is a problem with the end parenthesis of the files starting with '(jab_)')
list_names=[s.replace('(jab_)','').replace('jab(_cross)','') for s in trackers['exercise'].unique()]

The below solution doesn't work very well. A demonstration of RapidMiner has shown that instead of trying to identify the local maximum as both a clue on the number of repetitions and the median of one movement, it would be more efficient to train the model to recognize the beginning of a pattern by using the first movement as a model. We propose to try to imitate this RapidMiner approach in another program in the future.

In [8]:
#Find local maximums & extract margin
def find_max(df):
    reps=int(df.repetitions.unique())
    maximums=df.max_value.sort_values(ascending=False)[0:reps] #imperfect solution when the maximum is reached slowly
    maximums.sort_index(inplace=True)
    #Select only the maximums corresponding to the number of repetitions
    margin=int((maximums.index[reps-1]-maximums.index[0])/(reps-1)/2)
    start=df.index[0]
    return maximums.index.values,margin,start

In [9]:
#Function to filter a list randomly by a certain ratio.
#Will be used to create a new index starting at the start of the df and jumping at random intervals for df longer than 137
import random
def strip_index(l,n):
    return sorted(random.sample(l,int(len(l)*(1-n))))
print(strip_index(list(range(1,11)),0.25))

[1, 3, 4, 5, 7, 8, 9]


In [10]:
#function to randomly inflate a list with zeros. Will be used to reindex df of length shorter than 137
def inflate_index(lst, n):
    insert_locations = random.sample(range(1,len(lst) + n), n)
    inserts = dict(zip(insert_locations, [0]*n))
    input = iter(lst)
    lst=[inserts[pos] if pos in inserts else next(input) for pos in list(range(len(lst)+n))]
    return lst
inflate_index(range(100,110),4)

[100, 0, 101, 102, 103, 0, 104, 105, 106, 107, 108, 0, 0, 109]

In [11]:
import numpy as np

for i in range(0,len(list_names)):
    #Take each exercise one by one
    exercise_indices = trackers[trackers.exercise.str.contains(list_names[i])].index
    exercise=trackers.loc[exercise_indices, :]
    print(list_names[i])
    loc_maxs,margin,start=find_max(exercise)
    #Separate by individual movements 
    for j in loc_maxs:     
        #cut into distinct movements
        exercise1=exercise.drop(['exercise','max_value','repetitions','length'],axis=1)
        one_mvt=exercise1[j-margin-start:j+1+margin-start]
          
        #Standardize the length of movements to 1.37 seconds (137 rows) (2)
        if margin*2+1>137:
            new_index=strip_index(list(range(j-margin,j+margin+1)),1-137/(margin*2+1))
        elif margin*2+1<=137:
            new_index=inflate_index(list(range(j-margin,j+margin+1)),137-(margin*2+1))
        one_mvt=one_mvt.reindex(new_index)
        #one_mvt.fillna(method='ffill',inplace=True)
        one_mvt.interpolate(inplace=True)
            
        #Flatten the data
        one_mvt.reset_index(inplace=True)
        one_mvt=one_mvt.drop('index',axis=1)
        one_mvt=one_mvt.reindex(range(0,137))
        a=pd.Series(one_mvt.values.flatten())
        a=a.rename(index=lambda x:col_names[x]).T
        
        #Add back name columns
        a['exercise']=list_names[i]
        
        #Fit into one dataframe
        flat_trackers=flat_trackers.append(a,ignore_index=True)

cross_shadowboxing_medium_20_T7R_1531551242371.txt
cross_shadowboxing_slow_20_T7R_1531551153638.txt
cross_waterbag_hard_20_T7R_1531551996366.txt
cross_waterbag_light_20_T7R_1531551877937.txt
cross_waterbag_mediumpower_20_T7R_1531551924743.txt
biceps_curls_10_T7L_1531913275450.txt
biceps_curls_10_T7L_1531913463782.txt
biceps_curls_10_T7L_1531913706454.txt
biceps_curls_10_T7R_1531913275450.txt
biceps_curls_10_T7R_1531913463782.txt
biceps_curls_10_T7R_1531913706454.txt
deadlifts_heavy_8_T7L_1531912543772.txt
deadlifts_heavy_8_T7L_1531912657776.txt
deadlifts_heavy_8_T7L_1531912925288.txt
deadlifts_heavy_8_T7R_1531912543772.txt
deadlifts_heavy_8_T7R_1531912657776.txt
deadlifts_heavy_8_T7R_1531912925288.txt
deadlifts_heavy_9_T7L_1531912395989.txt
deadlifts_heavy_9_T7R_1531912395989.txt
freestyle_punches_waterbag_T11L_1531742535492.txt
freestyle_punches_waterbag_T11R_1531742535492.txt
hooks_waterbag_hard_20_T7L_1531552258694.txt
hooks_waterbag_hard_20_T7R_1531552258694.txt
hooks_waterbag_hard

In [12]:
flat_trackers

Unnamed: 0,gyroX0,gyroY0,gyroZ0,lowAccelX0,lowAccelY0,lowAccelZ0,highAccelX0,highAccelY0,highAccelZ0,gyroX1,...,gyroX136,gyroY136,gyroZ136,lowAccelX136,lowAccelY136,lowAccelZ136,highAccelX136,highAccelY136,highAccelZ136,exercise
0,-693.0,-531.0,2708.0,1812.0,-1997.0,1564.0,-219.0,201.0,556.0,-670.285714,...,-480.0,1850.0,1470.0,-1070.0,-5290.0,2420.0,-507.0,-129.0,642.0,cross_shadowboxing_medium_20_T7R_1531551242371...
1,-1119.0,-703.0,1079.0,-36.0,-4948.0,1912.0,-403.0,-94.0,591.0,-1065.000000,...,-1156.0,912.0,1422.0,-756.0,-5540.0,1520.0,-475.0,-154.0,552.0,cross_shadowboxing_medium_20_T7R_1531551242371...
2,-1011.0,292.0,1235.0,-378.0,-5184.0,1796.0,-437.0,-118.0,579.0,-902.000000,...,-726.0,618.0,1356.0,-1152.0,-4842.0,1716.0,-515.0,-84.0,571.0,cross_shadowboxing_medium_20_T7R_1531551242371...
3,-1156.0,912.0,1422.0,-756.0,-5540.0,1520.0,-475.0,-154.0,552.0,-726.000000,...,-790.0,958.0,1332.0,-312.0,-4842.0,1466.0,-431.0,-84.0,546.0,cross_shadowboxing_medium_20_T7R_1531551242371...
4,-790.0,958.0,1332.0,-312.0,-4842.0,1466.0,-431.0,-84.0,546.0,-767.666667,...,-1432.0,706.0,1232.0,-442.0,-5674.0,798.0,-444.0,-167.0,479.0,cross_shadowboxing_medium_20_T7R_1531551242371...
5,-822.0,459.0,1203.0,-648.0,-5266.0,1608.0,-464.0,-126.0,560.0,-670.333333,...,-770.0,1465.0,1470.0,-612.0,-4630.0,1780.0,-461.0,-63.0,578.0,cross_shadowboxing_medium_20_T7R_1531551242371...
6,-770.0,1465.0,1470.0,-612.0,-4630.0,1780.0,-461.0,-63.0,578.0,-720.000000,...,-1330.0,1084.0,1128.0,-384.0,-4900.0,1170.0,-438.0,-90.0,517.0,cross_shadowboxing_medium_20_T7R_1531551242371...
7,-1036.0,744.0,1074.0,-580.0,-4920.0,1714.0,-458.0,-92.0,571.0,-1014.333333,...,-1090.0,554.0,1254.0,-100.0,-4426.0,1442.0,-410.0,-42.0,544.0,cross_shadowboxing_medium_20_T7R_1531551242371...
8,-906.0,190.0,1240.0,-382.0,-4100.0,1715.0,-438.0,-10.0,571.0,-735.000000,...,-878.0,1436.0,1221.0,774.0,-4427.0,1165.0,-323.0,-42.0,516.0,cross_shadowboxing_medium_20_T7R_1531551242371...
9,-666.0,477.0,1308.0,-68.0,-3620.0,2013.0,-406.0,38.0,601.0,-659.000000,...,-1060.0,214.0,1336.0,935.0,-4219.0,2141.0,-307.0,-21.0,614.0,cross_shadowboxing_medium_20_T7R_1531551242371...


In [13]:
flat_trackers.to_csv('flat_data.csv',index=False)

In [16]:
#Function to add descriptive statistics
def descr_stats(df,mvt_df):
    statistics=['mean','std','min','max']
    describe_df=mvt_df.describe()
    for k in sensors_columns:
        for l in statistics:
            stat_name=str(k)+'_'+str(l)
            df[stat_name]=describe_df[k].loc[l]
    return df[stat_name]

In [17]:
flat_statistics=pd.DataFrame()
for i in range(0,len(list_names)):
    #Take each exercise one by one
    exercise_indices = trackers[trackers.exercise.str.contains(list_names[i])].index
    exercise=trackers.loc[exercise_indices, :]
    print(list_names[i])
    loc_maxs,margin,start=find_max(exercise)
    #Separate by individual movements 
    for j in loc_maxs:     
        #cut into distinct movements
        exercise1=exercise.drop(['exercise','max_value','repetitions','length'],axis=1)
        one_mvt=exercise1[j-margin-start:j+1+margin-start]     
        b=pd.Series()
        b['exercise']=list_names[i]
        b['length_mvt']=margin*2+1
        b['length_exercise']=exercise.describe().loc['count'][0]
        descr_stats(b,one_mvt)
        flat_statistics=flat_statistics.append(b,ignore_index=True)

cross_shadowboxing_medium_20_T7R_1531551242371.txt
cross_shadowboxing_slow_20_T7R_1531551153638.txt
cross_waterbag_hard_20_T7R_1531551996366.txt
cross_waterbag_light_20_T7R_1531551877937.txt
cross_waterbag_mediumpower_20_T7R_1531551924743.txt
biceps_curls_10_T7L_1531913275450.txt
biceps_curls_10_T7L_1531913463782.txt
biceps_curls_10_T7L_1531913706454.txt
biceps_curls_10_T7R_1531913275450.txt
biceps_curls_10_T7R_1531913463782.txt
biceps_curls_10_T7R_1531913706454.txt
deadlifts_heavy_8_T7L_1531912543772.txt
deadlifts_heavy_8_T7L_1531912657776.txt
deadlifts_heavy_8_T7L_1531912925288.txt
deadlifts_heavy_8_T7R_1531912543772.txt
deadlifts_heavy_8_T7R_1531912657776.txt
deadlifts_heavy_8_T7R_1531912925288.txt
deadlifts_heavy_9_T7L_1531912395989.txt
deadlifts_heavy_9_T7R_1531912395989.txt
freestyle_punches_waterbag_T11L_1531742535492.txt
freestyle_punches_waterbag_T11R_1531742535492.txt
hooks_waterbag_hard_20_T7L_1531552258694.txt
hooks_waterbag_hard_20_T7R_1531552258694.txt
hooks_waterbag_hard

In [18]:
flat_statistics.head()

Unnamed: 0,exercise,gyroX_max,gyroX_mean,gyroX_min,gyroX_std,gyroY_max,gyroY_mean,gyroY_min,gyroY_std,gyroZ_max,...,lowAccelX_min,lowAccelX_std,lowAccelY_max,lowAccelY_mean,lowAccelY_min,lowAccelY_std,lowAccelZ_max,lowAccelZ_mean,lowAccelZ_min,lowAccelZ_std
0,cross_shadowboxing_medium_20_T7R_1531551242371...,6469.0,9.685714,-6130.0,2897.223547,7562.0,174.685714,-9640.0,3514.842418,4645.0,...,-10490.0,6751.292721,28628.0,-106.228571,-5600.0,7524.463429,5274.0,777.4,-6740.0,2470.094321
1,cross_shadowboxing_medium_20_T7R_1531551242371...,7605.0,-199.457143,-5688.0,2901.334869,8195.0,-248.4,-10366.0,3587.225507,5278.0,...,-8604.0,6000.038164,29190.0,-575.6,-6826.0,8328.615661,7354.0,499.6,-6514.0,2595.539491
2,cross_shadowboxing_medium_20_T7R_1531551242371...,7605.0,-188.228571,-5688.0,2898.430469,8195.0,-210.657143,-10366.0,3589.250626,5278.0,...,-8604.0,6001.783173,29190.0,-572.571429,-6826.0,8326.998058,7354.0,494.0,-6514.0,2592.612332
3,cross_shadowboxing_medium_20_T7R_1531551242371...,7079.0,41.2,-5586.0,3064.371693,8057.0,-59.285714,-7266.0,3002.697058,4432.0,...,-6058.0,5327.892535,28882.0,-567.371429,-5860.0,7678.061628,5468.0,265.914286,-5104.0,1887.553402
4,cross_shadowboxing_medium_20_T7R_1531551242371...,6440.0,-68.542857,-5383.0,3140.043811,9255.0,-195.942857,-11190.0,3878.607777,4528.0,...,-6542.0,5415.247724,27330.0,-568.228571,-6300.0,7050.464653,5572.0,560.714286,-5386.0,2042.10541


In [19]:
flat_statistics.shape

(2885, 39)

In [20]:
flat_statistics.to_csv('flat_statistics.csv',index=False)