In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [2]:
def load_data(features):
    dfs = [] 
    for i in range(1,16):
    
        filename = 'Activity Recognition from Single Chest-Mounted Accelerometer/'+str(i)+'.csv'
    
        df_temp = pd.read_csv(filename)
        df_temp.columns = [features]
        dfs.append(df_temp)

        print("Time Duration for the {}-th participant is {} seconds".format(i,np.ceil(df_temp.shape[0]/52)))
    return dfs

In [3]:
features = ['Sequential_number', 'x_acceleration', 'y_acceleration', 'z_acceleration', 'label']
dfs = load_data(features)

Time Duration for the 1-th participant is 3125.0 seconds
Time Duration for the 2-th participant is 2654.0 seconds
Time Duration for the 3-th participant is 1969.0 seconds
Time Duration for the 4-th participant is 2350.0 seconds
Time Duration for the 5-th participant is 3077.0 seconds
Time Duration for the 6-th participant is 2710.0 seconds
Time Duration for the 7-th participant is 3135.0 seconds
Time Duration for the 8-th participant is 2654.0 seconds
Time Duration for the 9-th participant is 3207.0 seconds
Time Duration for the 10-th participant is 2439.0 seconds
Time Duration for the 11-th participant is 2009.0 seconds
Time Duration for the 12-th participant is 2206.0 seconds
Time Duration for the 13-th participant is 1301.0 seconds
Time Duration for the 14-th participant is 2233.0 seconds
Time Duration for the 15-th participant is 1991.0 seconds


In [4]:
dfs[0].head(2)

Unnamed: 0,Sequential_number,x_acceleration,y_acceleration,z_acceleration,label
0,1.0,1667,2072,2047,1
1,2.0,1611,1957,1906,1


In [5]:
df = pd.concat(dfs[i] for i in range(0,15))
df.shape

(1926881, 5)

In [6]:
window_size = 520
step_size   = 52*5

In [7]:

def get_features(df):
    
    segments = []
    
    for class_label in range(1,8): 

        df_class = df[df['label'].values == class_label]
        
        assert len(df_class) > window_size

        for i in range(0, len(df_class) - window_size, step_size):

            start = i
            end   = i + window_size

            x_seg = df_class['x_acceleration'].values[start: end]
            y_seg = df_class['y_acceleration'].values[start: end]        
            z_seg = df_class['z_acceleration'].values[start: end]

            segments.append([x_seg.mean(), y_seg.mean(), z_seg.mean(), 
                             x_seg.var(), y_seg.var(), z_seg.var(),
                             x_seg.max(), y_seg.max(), z_seg.max(),
                             x_seg.min(), y_seg.min(), z_seg.min(),
                             pd.DataFrame(x_seg).mad(), pd.DataFrame(y_seg).mad(), 
                             pd.DataFrame(z_seg).mad(),
                             class_label]) 
            
    return segments
        

In [8]:
segments = get_features(df)

In [9]:
segments_array = np.array(segments)

In [10]:
segments_array.shape

(7386, 16)

In [11]:
seg_df = pd.DataFrame(segments_array)

In [12]:
seg_df.columns =['x_mean', 'y_mean', 'z_mean', 
                 'x_var', 'y_var', 'z_var', 
                 'x_max', 'y_max', 'z_max', 
                 'x_min', 'y_min', 'z_min', 
                 'x_mad', 'y_mad', 'z_mad', 
                 'label']
seg_df.head()

Unnamed: 0,x_mean,y_mean,z_mean,x_var,y_var,z_var,x_max,y_max,z_max,x_min,y_min,z_min,x_mad,y_mad,z_mad,label
0,1897.030769,2292.657692,2064.6,28877.845207,20426.744364,15684.470769,2356.0,2552.0,2739.0,1455.0,1697.0,1644.0,133.93,110.050991,92.264615,1.0
1,1959.719231,2376.551923,2110.215385,605.075015,535.420381,1579.407456,2111.0,2552.0,2281.0,1808.0,2197.0,1958.0,13.338743,11.370754,27.438462,1.0
2,1957.184615,2379.226923,2108.601923,122.254379,61.713891,267.093458,1999.0,2409.0,2168.0,1919.0,2340.0,2062.0,8.534497,5.782322,12.795126,1.0
3,1958.707692,2379.638462,2107.288462,124.968402,52.380828,204.243713,1999.0,2409.0,2168.0,1919.0,2340.0,2062.0,8.577811,4.954763,10.555695,1.0
4,1962.921154,2378.396154,2112.584615,35.576476,11.08537,32.750533,1980.0,2390.0,2135.0,1941.0,2369.0,2086.0,4.702019,2.56676,4.372663,1.0


In [13]:
seg_df['label'] = seg_df['label'].map(int)

In [14]:
seg_df.shape

(7386, 16)

## <font color='red'> Model trainning

In [15]:
X = seg_df.iloc[:,:-1].values
y = seg_df['label'].iloc[:].values

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [17]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(5908, 15)
(5908,)
(1478, 15)
(1478,)


In [18]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn import metrics

### LogisticRegression

In [19]:
from sklearn.linear_model import LogisticRegression

LR = make_pipeline(RobustScaler(),
                   LogisticRegression()).fit(X=X_train, y=y_train)

predLR = LR.predict(X_test)       
print("Test set Accuracy: ", metrics.accuracy_score(y_test, predLR))     

cmLR = metrics.confusion_matrix(y_test,predLR)
print(cmLR)

Test set Accuracy:  0.5960757780784844
[[377   1   0   4   0   0 114]
 [ 13   1   2   3   0   0  15]
 [ 72   0   2  36   1   0  45]
 [ 13   0   1 243   0   0   6]
 [  9   0   0  15   0   0   6]
 [  9   0   0   7   0   0  19]
 [190   0   2  12   1   1 258]]


### SGDClassifier

In [20]:
from sklearn.linear_model import SGDClassifier

SGD = make_pipeline(RobustScaler(),
                    SGDClassifier()).fit(X=X_train, y=y_train)

predSGD = SGD.predict(X_test)       
print("Test set Accuracy: ", metrics.accuracy_score(y_test, predSGD))     

cmSGD = metrics.confusion_matrix(y_test,predSGD)
print(cmSGD)

Test set Accuracy:  0.5087956698240866
[[254   1 103   7   0   2 129]
 [  7   0   8   3   0   1  15]
 [ 53   0  23  32   0   6  42]
 [  8   0   9 237   0   5   4]
 [  9   0   6  12   0   0   3]
 [  4   0   4   7   0   2  18]
 [115   0  81   8   1  23 236]]




### DecisionTree

In [19]:
from sklearn.tree import DecisionTreeClassifier

tree = make_pipeline(RobustScaler(),
                   DecisionTreeClassifier()).fit(X=X_train, y=y_train)

predTree = tree.predict(X_test)
print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_test, predTree))

cmDTree = metrics.confusion_matrix(y_test,predTree)
print(cmDTree)

DecisionTrees's Accuracy:  0.8139377537212449
[[449  15   9   1   2   1  19]
 [ 16  13   2   0   0   0   3]
 [  9   3  97  14   4   5  24]
 [  2   1  17 232   6   3   2]
 [  1   3   1   4  19   0   2]
 [  1   0   6   4   0  12  12]
 [ 28   5  37   2   2   9 381]]


In [30]:
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import f1_score
from sklearn.metrics import jaccard_similarity_score

print('Accuracy score in Training set is:{:.3f}'.format(cross_val_score(tree, X_train, y_train, cv=5).mean()))
print('F1 score is:{:.3f}'.format(f1_score(y_test,tree.predict(X_test),average='weighted')))
print('Jaccard score is:{:.3f}'.format(jaccard_similarity_score(y_test,tree.predict(X_test))))

Accuracy score in Training set is:0.790
F1 score is:0.815
Jaccard score is:0.814


In [34]:
predTree2 = cross_val_predict(tree, X_test, y_test, cv=5)
print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_test, predTree2))

DecisionTrees's Accuracy:  0.7043301759133965


### KNN

In [22]:
from sklearn.neighbors import KNeighborsClassifier

neigh = make_pipeline(RobustScaler(),
                   KNeighborsClassifier()).fit(X=X_train, y=y_train)
predNeigh = neigh.predict(X_test)                                                                            
print("Test set Accuracy: ", metrics.accuracy_score(y_test, predNeigh))     

cmKNN = metrics.confusion_matrix(y_test,predNeigh)
print(cmKNN)

Test set Accuracy:  0.8504736129905277
[[460   5   5   3   2   1  20]
 [ 12   7   6   3   0   0   6]
 [  5   1  93  25   4   4  24]
 [  3   1  13 244   0   1   1]
 [  1   1   6   4  15   0   3]
 [  0   2   2   7   1  14   9]
 [ 11   2  18   7   0   2 424]]


### SVC

In [23]:
from sklearn.svm import SVC

svc = make_pipeline(RobustScaler(),
                    SVC()).fit(X=X_train, y=y_train)

predSVC = svc.predict(X_test)       
print("Test set Accuracy: ", metrics.accuracy_score(y_test, predSVC))     

cmSVC = metrics.confusion_matrix(y_test,predSVC)
print(cmSVC)

Test set Accuracy:  0.7537212449255751
[[417   4   3   1   0   0  71]
 [ 19   2   4   1   0   0   8]
 [ 20   0  67  26   0   0  43]
 [  2   0  15 239   0   0   7]
 [  3   0   8  12   2   0   5]
 [  1   1  11   7   0   0  15]
 [ 48   3  19   7   0   0 387]]


### RandomForest

In [24]:
from sklearn.ensemble import RandomForestClassifier

rnd = make_pipeline(RobustScaler(),
                    RandomForestClassifier()).fit(X=X_train, y=y_train)

predRND = rnd.predict(X_test)       

print("Test set Accuracy: ", metrics.accuracy_score(y_test, predRND))     

cmRND = metrics.confusion_matrix(y_test,predRND)
print(cmRND)

Test set Accuracy:  0.8673883626522327
[[479   1   5   2   0   0   9]
 [ 15   6   3   3   0   0   7]
 [  5   1 103  17   3   3  24]
 [  2   2  15 242   0   0   2]
 [  0   1   8   5  12   0   4]
 [  0   1   3   6   0  16   9]
 [ 19   3  10   4   0   4 424]]


### <font color='blue'> Ensemble model

In [25]:
from sklearn.ensemble import VotingClassifier
tree_clf = DecisionTreeClassifier()
neigh_clf = KNeighborsClassifier()
rnd_clf = RandomForestClassifier()

voting_clf = make_pipeline(RobustScaler(), 
            VotingClassifier(estimators=[('tree',tree_clf),('rf',rnd_clf),('neigh',neigh_clf)],
                             voting='hard')).fit(X_train,y_train)

predVot = voting_clf.predict(X_test)       
print("Test set Accuracy: ", metrics.accuracy_score(y_test, predVot))     

cmVot = metrics.confusion_matrix(y_test,predVot)
print(cmVot)

Test set Accuracy:  0.8687415426251691
[[479   4   2   1   0   0  10]
 [ 15   9   4   2   0   0   4]
 [ 10   0 104  16   1   3  22]
 [  2   2  13 245   0   0   1]
 [  0   3   6   2  16   0   3]
 [  0   1   3   7   0  13  11]
 [ 17   2  21   4   0   2 418]]


  if diff:


```Python
tree_params = {"criterion": ["gini", "entropy"], "max_depth": list(range(2,4,1)), 
              "min_samples_leaf": list(range(5,7,1))}


knears_params = {"n_neighbors": list(range(2,5,1)), 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']}


```