In [5]:
import pandas as pd
from datetime import datetime
from os import listdir
from os.path import isfile, join
import numpy as np

In [6]:
data_path = 'data/'
files = [f for f in listdir(data_path) if isfile(join(data_path, f))]

In [7]:
def data_from_files():
    
    acc = pd.DataFrame()
    gyr = pd.DataFrame()
    
    acc_set = 1
    gyr_set = 1
    
    for f in files:
        
        name = f.split('-')[0]
        label = f.split('-')[1]
        category = f.split('-')[2].split('_')[0].rstrip('123')
         
        def read_file():
            data = pd.read_csv(data_path+f)
            data['label'] = label
            data['category'] = category
            data['participant'] = name
            return data
            
        
        if 'Accelerometer' in f:
            data = read_file()
            data['set'] = acc_set
            acc_set += 1
            acc = acc.append(data)
            
               
        if 'Gyroscope' in f:
            data = read_file()
            data['set'] = gyr_set
            gyr_set += 1
            gyr = gyr.append(data)
               
    
    acc.index = acc['epoch (ms)'].map(lambda x: datetime.fromtimestamp(x*1e-3))
    gyr.index = gyr['epoch (ms)'].map(lambda x: datetime.fromtimestamp(x*1e-3))
    
    acc.name = 'acc'
    gyr.name = 'gyr'     

    return acc, gyr

In [8]:
acc, gyr = data_from_files()

In [9]:
def clean_data(df):
    del df['epoch (ms)']
    del df['time (01:00)']
    del df['elapsed (s)']
    df.index.names = ['time']

In [10]:
clean_data(acc)
clean_data(gyr)

In [11]:
data_merged = pd.concat([acc[acc.columns[:3]], gyr], axis=1)

In [12]:
data_merged.head()

Unnamed: 0_level_0,x-axis (g),y-axis (g),z-axis (g),x-axis (deg/s),y-axis (deg/s),z-axis (deg/s),label,category,participant,set
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-18 18:27:01.146,,,,-0.671,-6.402,2.744,bench,high,A,2.0
2019-01-18 18:27:01.186,,,,-0.793,-5.732,1.159,bench,high,A,2.0
2019-01-18 18:27:01.226,,,,-3.841,-0.793,-2.622,bench,high,A,2.0
2019-01-18 18:27:01.266,,,,-5.427,-0.122,-1.707,bench,high,A,2.0
2019-01-18 18:27:01.306,,,,-3.841,-1.463,1.646,bench,high,A,2.0


In [13]:
data_merged.columns = ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 
                       'gyr_z','label','category', 'participant', 'set']

In [14]:
def mode_(s):
    try:
        return s.mode()[0]
    except IndexError:
        return np.nan

In [15]:
sampling = {'acc_x':'mean',
          'acc_y':'mean',
          'acc_z':'mean',
          'gyr_x':'mean',
          'gyr_y':'mean',
          'gyr_z':'mean',
          'label': mode_,
          'category': mode_,
          'participant': mode_,
          'set': mode_}

In [16]:
data_resampled = data_merged.resample('200L', how=sampling)

the new syntax is .resample(...)..apply(<func>)
  """Entry point for launching an IPython kernel.


In [17]:
data_resampled = data_resampled[['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 
                       'gyr_z', 'label','category','participant', 'set']]

In [18]:
data_resampled.dropna(inplace=True)

In [19]:
data_resampled['labelBenchPress'] = [1 if x == 'bench' else 0 for x in data_resampled['label']]
data_resampled['labelDeadlift'] = [1 if x == 'dead' else 0 for x in data_resampled['label']]
data_resampled['labelOHP'] = [1 if x == 'ohp' else 0 for x in data_resampled['label']]
data_resampled['labelRow'] = [1 if x == 'row' else 0 for x in data_resampled['label']]
data_resampled['labelSquat'] = [1 if x == 'squat' else 0 for x in data_resampled['label']]
data_resampled['labelRest'] = [1 if x == 'rest' else 0 for x in data_resampled['label']]
data_resampled['set'] = data_resampled['set'].astype(int)
del data_resampled['label']

In [20]:
data_resampled.head()

Unnamed: 0_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,category,participant,set,labelBenchPress,labelDeadlift,labelOHP,labelRow,labelSquat,labelRest
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2019-01-18 18:27:01.400,0.0415,0.9715,-0.078,2.4026,-1.7318,-1.2562,high,A,2,1,0,0,0,0,0
2019-01-18 18:27:01.600,0.048,0.97,-0.087,1.9024,-0.7804,-0.5976,high,A,2,1,0,0,0,0,0
2019-01-18 18:27:01.800,0.048,0.967,-0.104333,4.6096,-2.951,0.8658,high,A,2,1,0,0,0,0,0
2019-01-18 18:27:02.000,0.0395,0.9825,-0.1155,0.9878,-0.3292,0.3538,high,A,2,1,0,0,0,0,0
2019-01-18 18:27:02.200,0.049667,0.965667,-0.114667,1.1828,-1.9024,-0.744,high,A,2,1,0,0,0,0,0


In [22]:
filename = 'all_data_converted'
data_resampled.to_csv('intermediate_datafiles/'+filename+'.csv')