 The goal is to provide a database of false examples for training the algorithm on a chosen exercise, waterbag hard.
 All the data provided is put together in a single table and repetitions of each exercise are separated into unique rows of data.
 The main difficulty is to resize the range of movements so they all fit into the database. Within one exercise the length of movements is rather similar so deciding on a standard time does not impact the result; but using the same time for all exercises is misleading. The chosen solution is to remove or add (using forward-fill) lines of data at regular intervals until all data fit into the fixed 137 (i.e. 1.37 seconds) length, which is appropriate for the waterbag exercise.

In [1]:
import pandas as pd
trackers=pd.read_csv('data2.csv')
trackers.head()

Unnamed: 0,gyroX,gyroY,gyroZ,lowAccelX,lowAccelY,lowAccelZ,highAccelX,highAccelY,highAccelZ,exercise
0,0.0,0.0,0.0,-120.0,8340.0,12970.0,-412.0,1234.0,1697.0,burpees_normal_speed_10_TM01L_1534060644190.txt
1,0.0,0.0,0.0,-75.0,8600.0,13105.0,-407.0,1260.0,1710.0,burpees_normal_speed_10_TM01L_1534060644190.txt
2,0.0,0.0,0.0,-30.0,8860.0,13240.0,-403.0,1286.0,1724.0,burpees_normal_speed_10_TM01L_1534060644190.txt
3,0.0,0.0,0.0,-74.0,9040.0,13310.0,-407.0,1304.0,1731.0,burpees_normal_speed_10_TM01L_1534060644190.txt
4,0.0,0.0,0.0,-140.0,9310.0,13380.0,-414.0,1331.0,1738.0,burpees_normal_speed_10_TM01L_1534060644190.txt


In [2]:
#Using a margin of 68 before and after the maximum as a standard based on waterbag hard (68*2+1=137)
sensors_columns=list(trackers.columns)
sensors_columns.remove('exercise')
col_names=[sensors_columns[i]+str(n) for n in range(0,137) for i in range(0,len(sensors_columns)) ]
flat_trackers = pd.DataFrame(columns=col_names)

In [3]:
#Find max of each row
trackers['max_value']=trackers.max(axis=1)
trackers.shape

(121700, 11)

In [4]:
trackers.dropna(inplace=True)

In [5]:
trackers.shape

(121650, 11)

In [6]:
#Count movements in each series from the series title (when the count is indicated)
trackers['repetitions']=trackers['exercise'].str.extract('(\d+)_T7', expand=True)
trackers.repetitions = pd.to_numeric(trackers.repetitions,errors='coerce')
#for series of noise, the range is a factor of the length of time / 137 (our chosen standard length based on waterbag_hard)
trackers['length'] = trackers.groupby(['exercise']).transform('count')['gyroX']
fill_value=trackers['length']/137
trackers['repetitions'].fillna(round(fill_value,0),inplace=True)
trackers.repetitions.unique()

array([26., 11., 13., 22., 16.])

In [7]:
#Create a list of exercise names (there is a problem with the end parenthesis of the files starting with '(jab_)')
list_names=[s.replace('(jab_)','').replace('jab(_cross)','') for s in trackers['exercise'].unique()]

The below solution doesn't work very well. A demonstration of RapidMiner has shown that instead of trying to identify the local maximum as both a clue on the number of repetitions and the median of one movement, it would be more efficient to train the model to recognize the beginning of a pattern by using the first movement as a model. We propose to try to imitate this RapidMiner approach in another program in the future.

In [8]:
#Find local maximums & extract margin
def find_max(df):
    reps=int(df.repetitions.unique())
    maximums=df.max_value.sort_values(ascending=False)[0:reps] #imperfect solution when the maximum is reached slowly
    maximums.sort_index(inplace=True)
    #Select only the maximums corresponding to the number of repetitions
    margin=int((maximums.index[reps-1]-maximums.index[0])/(reps-1)/2)
    start=df.index[0]
    return maximums.index.values,margin,start

In [9]:
#Function to filter a list randomly by a certain ratio.
#Will be used to create a new index starting at the start of the df and jumping at random intervals for df longer than 137
import random
def strip_index(l,n):
    return sorted(random.sample(l,int(len(l)*(1-n))))
print(strip_index(list(range(1,11)),0.25))

[1, 3, 5, 6, 8, 9, 10]


In [10]:
#function to randomly inflate a list with zeros. Will be used to reindex df of length shorter than 137
def inflate_index(lst, n):
    insert_locations = random.sample(range(1,len(lst) + n), n)
    inserts = dict(zip(insert_locations, [0]*n))
    input = iter(lst)
    lst=[inserts[pos] if pos in inserts else next(input) for pos in list(range(len(lst)+n))]
    return lst
inflate_index(range(100,110),4)

[100, 101, 102, 103, 104, 0, 0, 105, 106, 107, 0, 0, 108, 109]

In [11]:
import numpy as np

for i in range(0,len(list_names)):
    #Take each exercise one by one
    exercise_indices = trackers[trackers.exercise.str.contains(list_names[i])].index
    exercise=trackers.loc[exercise_indices, :]
    print(list_names[i])
    loc_maxs,margin,start=find_max(exercise)
    #Separate by individual movements 
    for j in loc_maxs:     
        #cut into distinct movements
        exercise1=exercise.drop(['exercise','max_value','repetitions','length'],axis=1)
        one_mvt=exercise1[j-margin-start:j+1+margin-start]
          
        #Standardize the length of movements to 1.37 seconds (137 rows) (2)
        if margin*2+1>137:
            new_index=strip_index(list(range(j-margin,j+margin+1)),1-137/(margin*2+1))
        elif margin*2+1<=137:
            new_index=inflate_index(list(range(j-margin,j+margin+1)),137-(margin*2+1))
        one_mvt=one_mvt.reindex(new_index)
        #one_mvt.fillna(method='ffill',inplace=True)
        one_mvt.interpolate(inplace=True)
            
        #Flatten the data
        one_mvt.reset_index(inplace=True)
        one_mvt=one_mvt.drop('index',axis=1)
        one_mvt=one_mvt.reindex(range(0,137))
        a=pd.Series(one_mvt.values.flatten())
        a=a.rename(index=lambda x:col_names[x]).T
        
        #Add back name columns
        a['exercise']=list_names[i]
        
        #Fit into one dataframe
        flat_trackers=flat_trackers.append(a,ignore_index=True)

burpees_normal_speed_10_TM01L_1534060644190.txt
burpees_normal_speed_10_TM01R_1534060644190.txt
burpees_normal_speed_10_TM02L_1534060644190.txt
burpees_normal_speed_10_TM02R_1534060644190.txt
burpees_normal_speed_10_TM05L_1534060644190.txt
burpees_normal_speed_10_TM05R_1534060644190.txt
burpees_normal_speed_10_TM06L_1534060644190.txt
burpees_normal_speed_10_TM06R_1534060644190.txt
burpees_normal_speed_10_TM07L_1534060644190.txt
burpees_normal_speed_10_TM07R_1534060644190.txt
burpees_normal_speed_10_TM08L_1534060644190.txt
burpees_normal_speed_10_TM08R_1534060644190.txt
jabcross_shadowboxing_normal_speed_10_TM01L_1534060055768.txt
jabcross_shadowboxing_normal_speed_10_TM01L_1534060385488.txt
jabcross_shadowboxing_normal_speed_10_TM01R_1534060055768.txt
jabcross_shadowboxing_normal_speed_10_TM01R_1534060385487.txt
jabcross_shadowboxing_normal_speed_10_TM02L_1534060385488.txt
jabcross_shadowboxing_normal_speed_10_TM02R_1534060385488.txt
jabcross_shadowboxing_normal_speed_10_TM05L_15340600

In [12]:
flat_trackers

Unnamed: 0,gyroX0,gyroY0,gyroZ0,lowAccelX0,lowAccelY0,lowAccelZ0,highAccelX0,highAccelY0,highAccelZ0,gyroX1,...,gyroX136,gyroY136,gyroZ136,lowAccelX136,lowAccelY136,lowAccelZ136,highAccelX136,highAccelY136,highAccelZ136,exercise
0,0.0,0.0,0.0,-1490.0,9020.0,12560.0,-549.0,1302.0,1656.0,0.000000,...,0.0,0.0,0.0,-1979.0,6320.0,12526.0,-597.0,1032.0,1652.0,burpees_normal_speed_10_TM01L_1534060644190.txt
1,0.0,0.0,0.0,-1450.0,8745.0,12635.0,-545.0,1274.0,1663.0,0.000000,...,0.0,0.0,0.0,-1340.0,6320.0,12750.0,-534.0,1032.0,1675.0,burpees_normal_speed_10_TM01L_1534060644190.txt
2,0.0,0.0,0.0,-1410.0,8470.0,12710.0,-541.0,1247.0,1671.0,0.000000,...,0.0,0.0,0.0,-1340.0,6320.0,12734.0,-534.0,1032.0,1673.0,burpees_normal_speed_10_TM01L_1534060644190.txt
3,0.0,0.0,0.0,-1840.0,6540.0,12740.0,-584.0,1054.0,1674.0,0.000000,...,0.0,0.0,0.0,-1940.0,8990.0,14160.0,-594.0,1299.0,1816.0,burpees_normal_speed_10_TM01L_1534060644190.txt
4,0.0,0.0,0.0,-112.0,6045.0,8170.0,-411.0,1004.0,1217.0,0.000000,...,0.0,0.0,0.0,-2100.0,7730.0,14370.0,-610.0,1173.0,1837.0,burpees_normal_speed_10_TM01L_1534060644190.txt
5,0.0,0.0,0.0,350.0,7270.0,7680.0,-365.0,1127.0,1168.0,0.000000,...,0.0,0.0,0.0,-2150.0,7615.0,14410.0,-615.0,1161.0,1841.0,burpees_normal_speed_10_TM01L_1534060644190.txt
6,0.0,0.0,0.0,-60.0,6110.0,7620.0,-406.0,1011.0,1162.0,0.000000,...,0.0,0.0,0.0,5438.0,6088.0,10843.0,143.0,1008.0,1484.0,burpees_normal_speed_10_TM01L_1534060644190.txt
7,0.0,0.0,0.0,-60.0,6110.0,7620.0,-406.0,1011.0,1162.0,0.000000,...,0.0,0.0,0.0,-5790.0,5570.0,10710.0,-979.0,957.0,1471.0,burpees_normal_speed_10_TM01L_1534060644190.txt
8,0.0,0.0,0.0,174.0,6569.0,7602.0,-383.0,1056.0,1160.0,0.000000,...,0.0,0.0,0.0,-4970.0,5532.0,11056.0,-897.0,953.0,1505.0,burpees_normal_speed_10_TM01L_1534060644190.txt
9,0.0,0.0,0.0,145.0,6551.0,7547.0,-386.0,1055.0,1154.0,0.000000,...,0.0,0.0,0.0,-1690.0,5380.0,12440.0,-569.0,938.0,1644.0,burpees_normal_speed_10_TM01L_1534060644190.txt


In [13]:
flat_trackers.to_csv('flat_dataAug13.csv',index=False)

In [14]:
#Function to add descriptive statistics
def descr_stats(df,mvt_df):
    statistics=['mean','std','min','max']
    describe_df=mvt_df.describe()
    for k in sensors_columns:
        for l in statistics:
            stat_name=str(k)+'_'+str(l)
            df[stat_name]=describe_df[k].loc[l]
    return df[stat_name]

In [15]:
flat_statistics=pd.DataFrame()
for i in range(0,len(list_names)):
    #Take each exercise one by one
    exercise_indices = trackers[trackers.exercise.str.contains(list_names[i])].index
    exercise=trackers.loc[exercise_indices, :]
    print(list_names[i])
    loc_maxs,margin,start=find_max(exercise)
    #Separate by individual movements 
    for j in loc_maxs:     
        #cut into distinct movements
        exercise1=exercise.drop(['exercise','max_value','repetitions','length'],axis=1)
        one_mvt=exercise1[j-margin-start:j+1+margin-start]     
        b=pd.Series()
        b['exercise']=list_names[i]
        b['length_mvt']=margin*2+1
        b['length_exercise']=exercise.describe().loc['count'][0]
        descr_stats(b,one_mvt)
        flat_statistics=flat_statistics.append(b,ignore_index=True)

burpees_normal_speed_10_TM01L_1534060644190.txt
burpees_normal_speed_10_TM01R_1534060644190.txt
burpees_normal_speed_10_TM02L_1534060644190.txt
burpees_normal_speed_10_TM02R_1534060644190.txt
burpees_normal_speed_10_TM05L_1534060644190.txt
burpees_normal_speed_10_TM05R_1534060644190.txt
burpees_normal_speed_10_TM06L_1534060644190.txt
burpees_normal_speed_10_TM06R_1534060644190.txt
burpees_normal_speed_10_TM07L_1534060644190.txt
burpees_normal_speed_10_TM07R_1534060644190.txt
burpees_normal_speed_10_TM08L_1534060644190.txt
burpees_normal_speed_10_TM08R_1534060644190.txt
jabcross_shadowboxing_normal_speed_10_TM01L_1534060055768.txt
jabcross_shadowboxing_normal_speed_10_TM01L_1534060385488.txt
jabcross_shadowboxing_normal_speed_10_TM01R_1534060055768.txt
jabcross_shadowboxing_normal_speed_10_TM01R_1534060385487.txt
jabcross_shadowboxing_normal_speed_10_TM02L_1534060385488.txt
jabcross_shadowboxing_normal_speed_10_TM02R_1534060385488.txt
jabcross_shadowboxing_normal_speed_10_TM05L_15340600

In [16]:
flat_statistics.head()

Unnamed: 0,exercise,gyroX_max,gyroX_mean,gyroX_min,gyroX_std,gyroY_max,gyroY_mean,gyroY_min,gyroY_std,gyroZ_max,...,lowAccelX_min,lowAccelX_std,lowAccelY_max,lowAccelY_mean,lowAccelY_min,lowAccelY_std,lowAccelZ_max,lowAccelZ_mean,lowAccelZ_min,lowAccelZ_std
0,burpees_normal_speed_10_TM01L_1534060644190.txt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-11130.0,1707.074825,9020.0,5973.142857,1110.0,1576.924363,23330.0,12301.444444,2730.0,2676.835667
1,burpees_normal_speed_10_TM01L_1534060644190.txt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-11130.0,1707.647254,8745.0,5930.285714,1110.0,1528.735852,23330.0,12304.460317,2730.0,2677.23867
2,burpees_normal_speed_10_TM01L_1534060644190.txt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-11130.0,1708.085544,8470.0,5891.793651,1110.0,1486.671875,23330.0,12306.031746,2730.0,2677.464858
3,burpees_normal_speed_10_TM01L_1534060644190.txt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-3930.0,2082.758759,12270.0,6296.126984,2050.0,2790.428667,17400.0,11313.68254,5050.0,2253.13076
4,burpees_normal_speed_10_TM01L_1534060644190.txt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-3330.0,1149.953152,12280.0,7864.888889,3674.0,1991.755225,17090.0,13072.349206,7680.0,2158.426751


In [17]:
flat_statistics.shape

(891, 39)

In [18]:
flat_statistics.to_csv('flat_statisticsAug13.csv',index=False)