### Import libraries

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
from scipy import stats
from scipy import integrate
from scipy.stats import norm
from scipy.stats import t as the
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 70)

from google.colab import drive
drive.mount("/content/drive", force_remount=False)

%cd "/content/drive/My Drive/"

Mounted at /content/drive
/content/drive/My Drive/sensors2020


### Load and clean data

In [None]:
# activity ID map
activity_id = {0: 'transient', 1:'lying', 2:'sitting', 3:'standing',
              4:'walking', 5:'running', 6:'cycling', 7:'Nordic walking',
              9:'watching TV', 10:'computer work', 11:'car driving',
              12:'ascending stairs', 13:'descending stairs', 16:'vacuum cleaning',
              17:'ironing', 18:'folding laundry', 19:'house cleaning',
              20:'playing soccer', 24:'rope jumping'}

# protocol activities
protocol_acts = [1,2,3,4,5,6,7,17,16,24,12,13]

# optional activities
optional_acts = [9,10,11,18,19,20]

# light
light_acts = [1,2,3,17]
# moderate
mod_acts = [16,13,4,7,6]
# vigorous 
vig_acts = [12,5,24]

# classify activities
def map_met(act_id):
    if act_id in light_acts:
        return 'light'
    if act_id in mod_acts:
        return 'moderate'
    if act_id in vig_acts:
        return 'vigorous'

# make list for updating column names in dataframe
col_names=['timestamp', 'activity_id', 'heart_rate']
IMU_locations = ['hand', 'chest', 'ankle']
IMU_data = ['tmp', 'acc_16_01', 'acc_16_02', 'acc_16_03',
            'acc_06_01', 'acc_06_02', 'acc_06_03',
            'gyr_01', 'gyr_02', 'gyr_03',
            'mag_01', 'mag_02', 'mag_03',
            'ori_01', 'ori_02', 'ori_03', 'ori_04']
col_names = col_names + [item for subolist in [[dat+'_'+loc for dat in IMU_data] for loc in IMU_locations] for item in sublist]

In [None]:
files = [
    'dataset/protocol/subject101.dat',
    'dataset/protocol/subject102.dat',
    'dataset/protocol/subject103.dat',
    'dataset/protocol/subject104.dat',
    'dataset/protocol/subject105.dat',
    'dataset/protocol/subject106.dat',
    'dataset/protocol/subject107.dat',
    'dataset/protocol/subject108.dat',
    'dataset/protocol/subject109.dat'
]

data = pd.DataFrame()

for file in files:
    sub_data = pd.read_table(file, header=None, sep='\s+')
    sub_data.columns = col_names
    sub_data['sub_id'] = int(file[-5])
    sub_data['act_level'] = sub_data['activity_id'].apply(map_met)
    data = data.append(sub_data, ignore_index=True)

In [None]:
drop_index = []

# get indexes of activity 0
drop_index += list(data.index[data['activity_id']==0])

# keep only activities as the person has performed
drop_index += list(data.index[(data['sub_id']==1) & (data['activity_id'].isin([10,20]))])
drop_index += list(data.index[(data['sub_id']==2) & (data['activity_id'].isin([9,10,11,18,19,20]))])
drop_index += list(data.index[(data['sub_id']==3) & (data['activity_id'].isin([5,6,7,9,10,11,18,19,20,24]))])
drop_index += list(data.index[(data['sub_id']==4) & (data['activity_id'].isin([5,9,10,11,18,19,20,24]))])
drop_index += list(data.index[(data['sub_id']==5) & (data['activity_id'].isin([9,11,18,20]))])
drop_index += list(data.index[(data['sub_id']==6) & (data['activity_id'].isin([9,11,20]))])
drop_index += list(data.index[(data['sub_id']==7) & (data['activity_id'].isin([9,10,11,18,19,20,24]))])
drop_index += list(data.index[(data['sub_id']==8) & (data['activity_id'].isin([9,11]))])
drop_index += list(data.index[(data['sub_id']==9) & (data['activity_id'].isin([1,2,3,4,5,6,7,9,11,12,13,16,17]))])

# drop index
data = data.drop(drop_index)
    
# interpolate data
data = data.interpolate()

In [None]:
# remove transients, 10 seconds from the start and end of each activity
freq = 100
data['act_block'] = ((data['activity_id'].shift(1) != data['activity_id']) | (data['sub_id'].shift(1) != data['sub_id'])).astype(int).cumsum()
drop_index = []
numblocks = data['act_block'].max()
for block in range(1, numblocks+1):
    drop_index += list(data[data['act_block']==block].head(10 * freq).index)
    drop_index += list(data[data['act_block']==block].tail(10 * freq).index)

# drop index    
data = data.drop(drop_index)

### Segmentation

In [None]:
# config variables for data preprocessing 
sampling_freq = 100
window_size = int(5.12*sampling_freq)
overlap = 1*sampling_freq # 1s overlap
feature_size = 18
columns_used = ['sub_id', 'activity_id', 'act_level', 'heart_rate',
                'tmp_hand','acc_16_01_hand','acc_16_02_hand','acc_16_03_hand',
                'gyr_01_hand','gyr_02_hand','gyr_03_hand',
                'tmp_chest','acc_16_01_chest','acc_16_02_chest','acc_16_03_chest',
                'gyr_01_chest','gyr_02_chest','gyr_03_chest',
                'tmp_ankle','acc_16_01_ankle','acc_16_02_ankle','acc_16_03_ankle',
                'gyr_01_ankle','gyr_02_ankle','gyr_03_ankle']

# slide windows
def windows(data, size):
    start = 0
    while start < data.count():
        yield int(start), int(start + size)
        start += overlap

# segment signal
def segment_signal(data,window_size = window_size):
    segments = np.empty((0,window_size,feature_size))
    labels = np.empty((0))
    for (start, end) in windows(data['timestamp'], window_size):      
        acc_16_01_hand = data["acc_16_01_hand"][start:end] #4      
        acc_16_02_hand = data["acc_16_02_hand"][start:end] #5
        acc_16_03_hand = data["acc_16_03_hand"][start:end] #6 
        gyr_01_hand = data["gyr_01_hand"][start:end] #7 
        gyr_02_hand = data["gyr_02_hand"][start:end] #8
        gyr_03_hand = data["gyr_03_hand"][start:end] #9
        acc_16_01_chest = data["acc_16_01_chest"][start:end]#11
        acc_16_02_chest = data["acc_16_02_chest"][start:end]#12
        acc_16_03_chest = data["acc_16_03_chest"][start:end]#13
        gyr_01_chest = data["gyr_01_chest"][start:end]#14
        gyr_02_chest = data["gyr_02_chest"][start:end]#15
        gyr_03_chest = data["gyr_03_chest"][start:end]#16
        acc_16_01_ankle = data["acc_16_01_ankle"][start:end]#18
        acc_16_02_ankle = data["acc_16_02_ankle"][start:end]#19
        acc_16_03_ankle = data["acc_16_03_ankle"][start:end]#20
        gyr_01_ankle = data["gyr_01_ankle"][start:end]#21
        gyr_02_ankle = data["gyr_02_ankle"][start:end]#22
        gyr_03_ankle = data["gyr_03_ankle"][start:end]#23
        if(len(data['timestamp'][start:end]) == window_size):
            segments = np.vstack([segments,np.dstack([
                                                      # act_level, heart_rate, tmp_hand, 
                                                      acc_16_01_hand, acc_16_02_hand, acc_16_03_hand,
                                                      gyr_01_hand, gyr_02_hand, gyr_03_hand,
                                                      # tmp_chest, 
                                                      acc_16_01_chest, acc_16_02_chest, acc_16_03_chest,
                                                      gyr_01_chest, gyr_02_chest, gyr_03_chest, 
                                                      # tmp_ankle, 
                                                      acc_16_01_ankle, acc_16_02_ankle, acc_16_03_ankle,
                                                      gyr_01_ankle, gyr_02_ankle, gyr_03_ankle,
                                                      ])])
            labels = np.append(labels,stats.mode(data["activity_id"][start:end])[0][0])
    return segments, labels

In [None]:
segments, labels = segment_signal(data)