In [65]:
import pandas as pd
from datetime import datetime
from os import listdir
from os.path import isfile, join
import numpy as np

In [66]:
data_path = 'data/'
files = [f for f in listdir(data_path) if isfile(join(data_path, f))]

In [67]:
def data_from_files():
    
    acc = pd.DataFrame()
    gyr = pd.DataFrame()
    
    acc_set = 1
    gyr_set = 1
    
    for f in files:
        
        name = f.split('-')[0]
        label = f.split('-')[1]
        category = f.split('-')[2].split('_')[0].rstrip('123')
         
        def read_file():
            data = pd.read_csv(data_path+f)
            data['label'] = label
            data['category'] = category
            data['participant'] = name
            return data
            
        
        if 'Accelerometer' in f:
            data = read_file()
            data['set'] = acc_set
            acc_set += 1
            acc = acc.append(data)
            
               
        if 'Gyroscope' in f:
            data = read_file()
            data['set'] = gyr_set
            gyr_set += 1
            gyr = gyr.append(data)
               
    
    acc.index = acc['epoch (ms)'].map(lambda x: datetime.fromtimestamp(x*1e-3))
    gyr.index = gyr['epoch (ms)'].map(lambda x: datetime.fromtimestamp(x*1e-3))
    
    acc.name = 'acc'
    gyr.name = 'gyr'     

    return acc, gyr

In [68]:
acc, gyr = data_from_files()

In [69]:
def clean_data(df):
    del df['epoch (ms)']
    del df['time (01:00)']
    del df['elapsed (s)']
    df.index.names = ['time']

In [70]:
clean_data(acc)
clean_data(gyr)

In [71]:
data_merged = pd.concat([acc[acc.columns[:3]], gyr], axis=1)

In [72]:
data_merged.columns = ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 
                       'gyr_z','label','category', 'participant', 'set']

In [73]:
data_merged.shape

(69677, 10)

In [74]:
def mode_(s):
    try:
        return s.mode()[0]
    except IndexError:
        return np.nan

In [75]:
sampling = {'acc_x':'mean',
          'acc_y':'mean',
          'acc_z':'mean',
          'gyr_x':'mean',
          'gyr_y':'mean',
          'gyr_z':'mean',
          'label': mode_,
          'category': mode_,
          'participant': mode_,
          'set': mode_}

In [76]:
data_resampled = data_merged.resample('200L', how=sampling)

the new syntax is .resample(...)..apply(<func>)
  """Entry point for launching an IPython kernel.


In [77]:
data_resampled = data_resampled[['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 
                       'gyr_z', 'label','category','participant', 'set']]

In [78]:
data_resampled.dropna(inplace=True)

In [79]:
data_resampled['labelBenchPress'] = [1 if x == 'bench' else 0 for x in data_resampled['label']]
data_resampled['labelDeadlift'] = [1 if x == 'dead' else 0 for x in data_resampled['label']]
data_resampled['labelOHP'] = [1 if x == 'ohp' else 0 for x in data_resampled['label']]
data_resampled['labelRow'] = [1 if x == 'row' else 0 for x in data_resampled['label']]
data_resampled['labelSquat'] = [1 if x == 'squat' else 0 for x in data_resampled['label']]
data_resampled['labelRest'] = [1 if x == 'rest' else 0 for x in data_resampled['label']]
data_resampled['set'] = data_resampled['set'].astype(int)
del data_resampled['label']

In [80]:
data_resampled.head()

Unnamed: 0_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,category,participant,set,labelBenchPress,labelDeadlift,labelOHP,labelRow,labelSquat,labelRest
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2019-01-11 16:08:05.200,0.0135,0.977,-0.071,-1.8904,2.4392,0.9388,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:05.400,-0.0015,0.9705,-0.0795,-1.6826,-0.8904,2.1708,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:05.600,0.001333,0.971667,-0.064333,2.5608,-0.256,-1.4146,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:05.800,-0.024,0.957,-0.0735,8.061,-4.5244,-2.073,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:06.000,-0.028,0.957667,-0.115,2.439,-1.5486,-3.6098,heavy,B,30,1,0,0,0,0,0


In [81]:
data_resampled

Unnamed: 0_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,category,participant,set,labelBenchPress,labelDeadlift,labelOHP,labelRow,labelSquat,labelRest
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2019-01-11 16:08:05.200,0.013500,0.977000,-0.071000,-1.8904,2.4392,0.9388,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:05.400,-0.001500,0.970500,-0.079500,-1.6826,-0.8904,2.1708,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:05.600,0.001333,0.971667,-0.064333,2.5608,-0.2560,-1.4146,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:05.800,-0.024000,0.957000,-0.073500,8.0610,-4.5244,-2.0730,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:06.000,-0.028000,0.957667,-0.115000,2.4390,-1.5486,-3.6098,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:06.200,-0.026000,0.965000,-0.118000,0.4634,5.2194,-6.4636,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:06.400,-0.048667,0.790000,-0.145333,21.6950,8.1708,-28.2196,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:06.600,-0.170000,0.899500,-0.250000,17.5246,1.5976,-17.5854,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:06.800,-0.222667,0.907000,-0.204333,-7.2318,-1.3536,-0.4026,heavy,B,30,1,0,0,0,0,0
2019-01-11 16:08:07.000,-0.204500,0.930000,-0.149000,-28.6830,-10.2076,20.5732,heavy,B,30,1,0,0,0,0,0


In [82]:
filename = 'all_data_converted'
data_resampled.to_csv('intermediate_datafiles/'+filename+'.csv')