In [1]:
import pandas as pd
import numpy as np
import os

from datetime import datetime, timedelta

In [2]:
data_files = []
for root, dirs, files in os.walk('04_exercise-4-traces'):
    for f in files:
        data_files.append(os.path.join(root, f))
data_files

['04_exercise-4-traces/accelerometer_sitting.txt',
 '04_exercise-4-traces/accelerometer_walking.txt',
 '04_exercise-4-traces/accelerometer_standing.txt']

In [3]:
def nanos_to_datetime(nanos):
    if type(nanos) == str:
        nanos = int(nanos)
    return datetime.fromtimestamp(nanos / 1e9)

In [4]:
def read_trace(file):
    label = file[file.find('_', file.find('acce')) + 1 : file.find('.')]
    frame = pd.read_csv(file,
                        usecols=[3, 4, 5, 6, 7],
                        names=['Timestamp (ns) recorded', 
                               'Accuracy', 'X', 'Y', 'Z'],
                       converters={3: nanos_to_datetime})
    #print(label)
    #print(frame)
    return (label, frame)

In [5]:
frames = []
labels = []
for src in data_files:
    label, df = read_trace(src)
    frames.append(df)
    labels.append(label)
data = pd.concat(frames, keys=labels)

In [6]:
data.loc['walking']

Unnamed: 0,Timestamp (ns) recorded,Accuracy,X,Y,Z
0,2016-01-27 10:26:37.031901,0,-1.402847,3.176971,9.590897
1,2016-01-27 10:26:37.099101,0,-1.235031,3.223389,9.762283
2,2016-01-27 10:26:37.166331,0,-0.739914,3.341217,9.628983
3,2016-01-27 10:26:37.233469,0,-1.628983,2.902039,9.292160
4,2016-01-27 10:26:37.300607,0,-0.973190,2.771118,9.330246
5,2016-01-27 10:26:37.367990,0,-2.043167,2.252197,9.494492
6,2016-01-27 10:26:37.434946,0,-1.400467,2.448578,9.925339
7,2016-01-27 10:26:37.502054,0,-1.212418,2.902039,9.442123
8,2016-01-27 10:26:37.569772,0,-0.931534,2.979401,10.131241
9,2016-01-27 10:26:37.636331,0,-1.464737,3.683990,9.199326


In [7]:
col = 'Timestamp (ns) recorded'

In [8]:
def force(x, y, z):
    return np.sqrt(np.power(x, 2) + np.power(y, 2) + np.power(z, 2))

In [9]:
def prepare_training_data(df, output, time_window_ms=timedelta(milliseconds=500)):
    
    # sort source frame by timestamp column
    ts_col = 'Timestamp (ns) recorded'
    frame = df.sort_values(by=[ts_col])
    
    # first and last timestamp that was recorded
    first_time = frame[:1][col][0]
    last_time = frame[-1:][col].array[0]        
    
    i = first_time
    csv_data = []
    
    # iterate through all data given inside target data frame (df)
    while i < last_time:
        
        # start and end timestamp of a window
        start = frame[ts_col] >= i
        end = frame[ts_col] < i + time_window_ms

        # select all rows from the window
        window = frame[start & end]
        
        last_row = None
        forces = {}
        
        # collect all forces inside the window
        for label, data in window.iterrows():
            
            if last_row is not None:
                
                f2 = force(last_row['X'], last_row['Y'], last_row['Z'])
                f1 = force(data['X'], data['Y'], data['Z'])
                
                if label[0] not in forces:
                    forces[label[0]] = []
                forces[label[0]].append(abs(f2 - f1))
                
            last_row = data
            
        # calculated all forces for the window
        for label in forces:
            avg = np.average(forces[label])
            minimum = min(forces[label]) if len(forces[label]) > 0 else 0
            maximum = max(forces[label]) if len(forces[label]) > 0 else 0
            csv_data.append((avg, maximum, minimum, label))
            #print('%f,%f,%f,%s' % (avg, maximum, minimum, label))
                
        i += time_window_ms
        
    csv_frame = pd.DataFrame(data=csv_data, columns=('AVG', 'MAX', 'MIN', 'MODALITY'))
    csv_frame.to_csv(output)

In [18]:
prepare_training_data(data, 'training.csv', timedelta(milliseconds=1100))