In [52]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,accuracy_score,log_loss
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

In [27]:
#Data Pre processing the Idea is to define the activities and adress them to the type of Activity difficulty
activity = {0: 'transient', 1:'lying', 2:'sitting', 3:'standing',4:'walking', 5:'running', 6:'cycling', 7:'Nordic walking',
            9:'watching TV', 10:'computer work', 11:'car driving',12:'ascending stairs', 13:'descending stairs', 16:'vacuum cleaning',
            17:'ironing', 18:'folding laundry', 19:'house cleaning',20:'playing soccer', 24:'rope jumping'}

# The activities for protocol are lie, sit, stand, walk, run, cycle, Nordic walk, iron, vacuum cleaning, 
#rope jump, ascend and descend stairs
protocol = [1,2,3,4,5,6,7,12,13,16,17,24]

# Activities that are optional are watch TV, computer work, drive car, fold laundry, house cleaning, play soccer
optional = [9,10,11,18,19,20]

# Now we can make the classification for each Activity to it respecting level of effort
#lying, sitting, standing and ironing
light = [1,2,3,17]
#vacuum cleaning, descending stairs, walking, Nordic walking and cycling
moderate = [4,6,7,13,16]
#ascending stairs, running and rope jumping
vigorous = [5,12,24]



In [28]:
#Classifying activities
def map_met(act):
    if act in light:
        return 'light'
    if act in moderate:
        return 'moderate'
    if act in vigorous:
        return 'vigorous'
    
#Making list for updating column names in dataframe
col_names=['timestamp', 'activity', 'heart_rate']

Sensor = ['hand', 'chest', 'ankle']
IMU_data = ['tmp', 'acc_16_01', 'acc_16_02', 'acc_16_03',
            'acc_06_01', 'acc_06_02', 'acc_06_03',
            'gyr_01', 'gyr_02', 'gyr_03',
            'mag_01', 'mag_02', 'mag_03',
            'ori_01', 'ori_02', 'ori_03', 'ori_04']

col_names = col_names + [item for sublist in [[dat+'_'+loc for dat in IMU_data] for loc in Sensor] for item in sublist]

In [29]:
files = [
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject101.dat',
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject102.dat',
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject103.dat',
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject104.dat',
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject105.dat',
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject106.dat',
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject107.dat',
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject108.dat',
    'C:/Users/Administrator/Downloads/PAMAP2_Dataset/Protocol/subject109.dat'
]

In [30]:
pamap2 = pd.DataFrame()

for file in files:
    sub_data = pd.read_table(file, header=None, sep='\s+')
    sub_data.columns = col_names
    sub_data['sub_id'] = int(file[-5])
    sub_data['act_level'] = sub_data['activity'].apply(map_met)
    pamap2 = pamap2.append(sub_data, ignore_index=True)
pamap2

Unnamed: 0,timestamp,activity,heart_rate,tmp_hand,acc_16_01_hand,acc_16_02_hand,acc_16_03_hand,acc_06_01_hand,acc_06_02_hand,acc_06_03_hand,...,gyr_03_ankle,mag_01_ankle,mag_02_ankle,mag_03_ankle,ori_01_ankle,ori_02_ankle,ori_03_ankle,ori_04_ankle,sub_id,act_level
0,8.38,0,104.0,30.0000,2.37223,8.60074,3.51048,2.43954,8.76165,3.35465,...,-0.017580,-61.1888,-38.95990,-58.143800,1.000000,0.000000,0.000000,0.000000,1,
1,8.39,0,,30.0000,2.18837,8.56560,3.66179,2.39494,8.55081,3.64207,...,0.000368,-59.8479,-38.89190,-58.525300,1.000000,0.000000,0.000000,0.000000,1,
2,8.40,0,,30.0000,2.37357,8.60107,3.54898,2.30514,8.53644,3.73280,...,0.022495,-60.7361,-39.41380,-58.399900,1.000000,0.000000,0.000000,0.000000,1,
3,8.41,0,,30.0000,2.07473,8.52853,3.66021,2.33528,8.53622,3.73277,...,0.011275,-60.4091,-38.76350,-58.395600,1.000000,0.000000,0.000000,0.000000,1,
4,8.42,0,,30.0000,2.22936,8.83122,3.70000,2.23055,8.59741,3.76295,...,-0.002823,-61.5199,-39.38790,-58.269400,1.000000,0.000000,0.000000,0.000000,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2872528,100.19,0,,25.1875,-4.71493,10.22250,4.66893,-5.04654,9.94944,4.50736,...,-0.127084,-46.5153,3.58240,-0.035995,0.598531,0.033615,0.799791,-0.031075,9,
2872529,100.20,0,,25.1875,-4.95932,10.37130,4.12594,-4.96890,10.29620,4.43102,...,-0.089808,-45.7474,3.54453,0.108583,0.598428,0.033012,0.799933,-0.030018,9,
2872530,100.21,0,,25.1875,-4.93997,9.83615,3.70468,-5.04613,10.35690,4.14405,...,-0.064709,-46.3997,4.22078,0.105504,0.598233,0.033172,0.800095,-0.029416,9,
2872531,100.22,0,,25.1875,-4.64941,9.11129,3.51904,-5.06854,9.75268,3.87359,...,-0.064357,-46.5282,4.48593,0.530240,0.598116,0.033427,0.800180,-0.029207,9,


In [31]:
drop_index = []

#We need to drop 0 activity 
drop_index += list(pamap2.index[pamap2['activity']==0])

#This will drop all of the cctivities not on the Preformance Summary and order by activity 
drop_index += list(pamap2.index[(pamap2['sub_id']==1) & (pamap2['activity'].isin([10,20]))])
drop_index += list(pamap2.index[(pamap2['sub_id']==2) & (pamap2['activity'].isin([9,10,11,18,19,20]))])
drop_index += list(pamap2.index[(pamap2['sub_id']==3) & (pamap2['activity'].isin([5,6,7,9,10,11,18,19,20,24]))])
drop_index += list(pamap2.index[(pamap2['sub_id']==4) & (pamap2['activity'].isin([5,9,10,11,18,19,20,24]))])
drop_index += list(pamap2.index[(pamap2['sub_id']==5) & (pamap2['activity'].isin([9,11,18,20]))])
drop_index += list(pamap2.index[(pamap2['sub_id']==6) & (pamap2['activity'].isin([9,11,20]))])
drop_index += list(pamap2.index[(pamap2['sub_id']==7) & (pamap2['activity'].isin([9,10,11,18,19,20,24]))])
drop_index += list(pamap2.index[(pamap2['sub_id']==8) & (pamap2['activity'].isin([9,11]))])
drop_index += list(pamap2.index[(pamap2['sub_id']==9) & (pamap2['activity'].isin([1,2,3,4,5,6,7,9,11,12,13,16,17]))])

pamap2 = pamap2.drop(drop_index)
    
#Interpolate
pamap2 = pamap2.interpolate()
pamap2

Unnamed: 0,timestamp,activity,heart_rate,tmp_hand,acc_16_01_hand,acc_16_02_hand,acc_16_03_hand,acc_06_01_hand,acc_06_02_hand,acc_06_03_hand,...,gyr_03_ankle,mag_01_ankle,mag_02_ankle,mag_03_ankle,ori_01_ankle,ori_02_ankle,ori_03_ankle,ori_04_ankle,sub_id,act_level
2928,37.66,1,,30.375,2.21530,8.27915,5.58753,2.24689,8.55387,5.77143,...,0.001752,-61.1081,-36.863600,-58.369600,1.000000,0.000000,0.000000,0.000000,1,light
2929,37.67,1,,30.375,2.29196,7.67288,5.74467,2.27373,8.14592,5.78739,...,0.006007,-60.8916,-36.319700,-58.365600,1.000000,0.000000,0.000000,0.000000,1,light
2930,37.68,1,,30.375,2.29090,7.14240,5.82342,2.26966,7.66268,5.78846,...,-0.004882,-60.3407,-35.784200,-58.611900,1.000000,0.000000,0.000000,0.000000,1,light
2931,37.69,1,,30.375,2.21800,7.14365,5.89930,2.22177,7.25535,5.88000,...,0.026950,-60.7646,-37.102800,-57.879900,1.000000,0.000000,0.000000,0.000000,1,light
2932,37.70,1,100.0,30.375,2.30106,7.25857,6.09259,2.20720,7.24042,5.95555,...,-0.006328,-60.2040,-37.122500,-57.884700,1.000000,0.000000,0.000000,0.000000,1,light
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2872015,95.06,24,162.0,25.125,4.99466,6.01881,5.59830,4.90787,6.05780,5.68357,...,0.005878,-45.7855,-0.831734,-0.170139,0.522929,-0.291612,0.705786,-0.378648,9,vigorous
2872016,95.07,24,162.0,25.125,5.02764,5.90369,5.48372,4.89090,5.95209,5.56301,...,-0.004235,-46.0331,-0.817288,0.538134,0.522880,-0.291694,0.705895,-0.378450,9,vigorous
2872017,95.08,24,162.0,25.125,5.06409,5.71370,5.48491,4.97981,5.87584,5.45738,...,-0.002309,-45.5140,-1.229410,0.540438,0.522625,-0.291978,0.706161,-0.378084,9,vigorous
2872018,95.09,24,162.0,25.125,5.13914,5.63724,5.48629,4.97690,5.69448,5.29167,...,-0.007076,-45.9093,-0.565555,0.680109,0.522536,-0.291955,0.706426,-0.377733,9,vigorous


In [32]:
# We then take the 10s off the begining and end of each activity 
freq = 100
pamap2['act_block'] = ((pamap2['activity'].shift(1) != pamap2['activity']) | (pamap2['sub_id'].shift(1) != pamap2['sub_id'])).astype(int).cumsum()
drop_index = []
numblocks = pamap2['act_block'].max()
for block in range(1, numblocks+1):
    drop_index += list(pamap2[pamap2['act_block']==block].head(10 * freq).index)
    drop_index += list(pamap2[pamap2['act_block']==block].tail(10 * freq).index)
    
pamap2 = pamap2.drop(drop_index)
pamap2

Unnamed: 0,timestamp,activity,heart_rate,tmp_hand,acc_16_01_hand,acc_16_02_hand,acc_16_03_hand,acc_06_01_hand,acc_06_02_hand,acc_06_03_hand,...,mag_01_ankle,mag_02_ankle,mag_03_ankle,ori_01_ankle,ori_02_ankle,ori_03_ankle,ori_04_ankle,sub_id,act_level,act_block
3928,47.66,1,103.0,30.500,-7.763190,-6.596560,0.831271,-7.598760,-6.81413,0.952719,...,-74.7494,-45.4312,-41.39170,1.000000,0.000000,0.000000,0.000000,1,light,1
3929,47.67,1,103.0,30.500,-7.962830,-6.787140,0.559511,-7.811010,-6.84291,0.832193,...,-76.0839,-46.1565,-41.38860,1.000000,0.000000,0.000000,0.000000,1,light,1
3930,47.68,1,103.0,30.500,-7.561590,-6.945460,0.181182,-7.783900,-6.97924,0.605953,...,-75.4083,-45.9693,-40.64940,1.000000,0.000000,0.000000,0.000000,1,light,1
3931,47.69,1,103.0,30.500,-7.271300,-7.140180,-0.121712,-7.592270,-7.11690,0.228539,...,-76.1764,-46.5085,-39.53490,1.000000,0.000000,0.000000,0.000000,1,light,1
3932,47.70,1,103.0,30.500,-7.195730,-6.951400,-0.159704,-7.249560,-7.30087,-0.058323,...,-76.4170,-45.6111,-40.01850,1.000000,0.000000,0.000000,0.000000,1,light,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2871015,85.06,24,156.0,25.125,-0.697385,-2.994830,6.698070,-1.328490,-3.32025,6.525930,...,-44.2625,-13.4962,9.49449,0.447386,-0.498313,0.624246,-0.402302,9,vigorous,107
2871016,85.07,24,156.0,25.125,0.359898,-1.980660,6.479560,-0.436278,-2.94914,6.509110,...,-43.6102,-14.1724,9.49757,0.448317,-0.500807,0.622206,-0.401328,9,vigorous,107
2871017,85.08,24,156.0,25.125,1.041960,-1.152250,6.371520,0.398843,-2.07937,6.370440,...,-44.2485,-14.0110,9.77880,0.449788,-0.503479,0.620000,-0.399750,9,vigorous,107
2871018,85.09,24,156.0,25.125,0.609343,-0.840543,6.902810,0.707871,-1.25090,6.443810,...,-42.7001,-14.6020,10.21040,0.450860,-0.507112,0.617611,-0.397643,9,vigorous,107


In [45]:
# To prevent Exceedingly long process times I will sample 10000 records at random
pamap2.reset_index(inplace=True)
pamap2.drop('index', inplace=True, axis=1)
pamap2 = pamap2.sample(n=10000)
pamap2

Unnamed: 0,timestamp,activity,heart_rate,tmp_hand,acc_16_01_hand,acc_16_02_hand,acc_16_03_hand,acc_06_01_hand,acc_06_02_hand,acc_06_03_hand,...,mag_01_ankle,mag_02_ankle,mag_03_ankle,ori_01_ankle,ori_02_ankle,ori_03_ankle,ori_04_ankle,sub_id,act_level,act_block
6511,3571.13,24,179.0,30.1875,-10.231200,15.120300,-4.01291,-9.798230,14.204500,-3.92333,...,-48.858800,-32.17860,39.36540,1.000000,0.000000,0.000000,0.000000,1,vigorous,14
9934,949.35,17,89.0,34.3125,-2.979530,9.223630,4.81376,-2.816020,8.106000,4.55442,...,-36.963200,13.64480,38.14780,0.129713,0.749206,0.030196,0.648809,2,light,18
8455,81.06,24,153.0,25.1250,2.633990,5.082000,-7.82751,2.570420,-0.626399,-5.82128,...,-32.682500,-29.64930,10.40500,0.482270,-0.613851,0.541871,-0.311412,9,vigorous,107
7978,3072.86,6,134.0,28.5625,-9.185880,-2.992810,3.72284,-9.693690,-1.586880,2.90651,...,-32.929400,-25.75030,18.62580,0.006690,0.370996,0.674667,0.638077,4,moderate,51
7116,997.25,17,80.0,33.8125,-5.202680,0.778601,8.88861,-5.471710,1.122270,9.44999,...,-29.345400,1.97153,49.66750,0.022239,0.809473,-0.061424,0.583512,7,light,83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9725,3295.98,5,171.0,30.5000,-11.893400,35.254500,-2.72018,-11.040300,39.782200,-3.41936,...,-56.417600,-38.98540,6.67702,1.000000,0.000000,0.000000,0.000000,1,vigorous,13
895,2867.92,6,128.0,33.0000,-6.741990,1.333070,10.13380,-6.049580,2.002650,9.75067,...,-33.981800,-38.45570,14.52370,0.578895,-0.613114,0.510011,-0.169883,7,moderate,91
958,133.27,1,87.0,31.1250,0.431327,5.772990,8.03245,0.403508,5.790440,8.01428,...,-12.640800,45.01220,-4.66783,1.000000,0.000000,0.000000,0.000000,1,light,1
5903,1214.23,16,111.0,33.5625,-4.815390,6.931690,5.71768,-5.159090,6.901680,6.74907,...,-0.569572,-11.92750,-11.06770,1.000000,0.000000,0.000000,0.000000,1,moderate,5


In [51]:
ext = pamap2.agg(['sum', 'mean','var', 'std','skew', 'kurt', 'median']).unstack()
npamp2 = pd.concat([pamap2, ext.setaxis([f'{x}{y}'for x, y in ext.index]).to_frame().T], axis=1)

AttributeError: 'Series' object has no attribute 'setaxis'

In [37]:
ext

timestamp  sum       1.69325e+07
           mean          1693.25
           var       1.20589e+06
           std           1098.13
           skew         0.304025
                        ...     
act_block  var           954.649
           std           30.8974
           skew       -0.0166786
           kurt         -1.20375
           median             54
Length: 399, dtype: object

In [None]:
#logistic regression model
log_reg = LogisticRegression()
log_reg.fit(x, y)

#KNN
knn = KNeighborsClassifier('n_neighbors': np.arange(1, 25))
knn.fit(x, y)
knnb = knn.best_estimator_

#RFC
rf = RandomForestClassifier('n_estimators': [50, 100, 200])
rf.fit(x, y)
rfb = rf.best_estimator_

ModEst=[('log_reg', log_reg),('knn', knnb), ('rf', rfb)]
vote = VotingClassifier(ModEst, voting='hard')

#fit model to training data
vote.fit(x, y)
