In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
import os

directory = os.listdir('data') # get the list of directories
inpDf = pd.DataFrame() # create an empty dataframe
for _ in directory:
    for j in os.listdir('data/' + _): # get the list of files in the directory
        data = pd.read_csv('data/' + _ + '/' + j) # read the file
        data['type'] = _
        inpDf = pd.concat([inpDf, data]) # concatenate the data to the dataframe

In [4]:
inpDf.reset_index(drop=True, inplace=True) # reset the index
inpDf

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,type
0,1.757344,-5.875371,-3.878607,running
1,2.461239,12.986150,1.762132,running
2,22.682667,22.922087,-1.738190,running
3,11.875240,4.611233,-8.389283,running
4,-5.295974,4.783615,-1.273715,running
...,...,...,...,...
193855,-4.137180,-4.429273,-1.517924,stairs
193856,1.829170,-9.313444,-0.330400,stairs
193857,-7.350199,-12.670115,-1.460463,stairs
193858,3.969586,-6.320692,9.255983,stairs


In [5]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() # create a standard scaler object
normalised_data = inpDf.copy() # create a copy of the dataframe
scaler.fit(normalised_data[['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']]) # fit the scaler object
normalised_data[['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']] = scaler.transform(normalised_data[['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']]) # transform the data

In [6]:
normalised_data

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,type
0,-0.019775,-0.599143,-0.790301,running
1,0.063974,0.912923,-0.005946,running
2,2.469899,1.709454,-0.492672,running
3,1.184042,0.241533,-1.417518,running
4,-0.858972,0.255353,-0.428086,running
...,...,...,...,...
193855,-0.721099,-0.483214,-0.462044,stairs
193856,-0.011229,-0.874762,-0.296916,stairs
193857,-1.103381,-1.143855,-0.454054,stairs
193858,0.243435,-0.634843,1.036086,stairs


In [7]:
len(normalised_data)

193860

In [8]:
segment_length = 208
step = 104
segments = []
labels = []

for start_pos in range(0, len(normalised_data) - segment_length + 1, step):
    segment = normalised_data.iloc[start_pos:start_pos + segment_length] # get the segment

    features = {
        'mean_X': segment['accelerometer_X'].mean(),
        'std_X': segment['accelerometer_X'].std(),
        'median_X': segment['accelerometer_X'].median(),
        'max_X': segment['accelerometer_X'].max(),
        'min_X': segment['accelerometer_X'].min(),
        'range_X': segment['accelerometer_X'].max() - segment['accelerometer_X'].min(),
        '25%_X': segment['accelerometer_X'].quantile(0.25),
        '75%_X': segment['accelerometer_X'].quantile(0.75),
        'mean_Y': segment['accelerometer_Y'].mean(),
        'std_Y': segment['accelerometer_Y'].std(),
        'median_Y': segment['accelerometer_Y'].median(),
        'max_Y': segment['accelerometer_Y'].max(),
        'min_Y': segment['accelerometer_Y'].min(),
        'range_Y': segment['accelerometer_Y'].max() - segment['accelerometer_Y'].min(),
        '25%_Y': segment['accelerometer_Y'].quantile(0.25),
        '75%_Y': segment['accelerometer_Y'].quantile(0.75),
        'mean_Z': segment['accelerometer_Z'].mean(),
        'std_Z': segment['accelerometer_Z'].std(),
        'median_Z': segment['accelerometer_Z'].median(),
        'max_Z': segment['accelerometer_Z'].max(),
        'min_Z': segment['accelerometer_Z'].min(),
        'range_Z': segment['accelerometer_Z'].max() - segment['accelerometer_Z'].min(),
        '25%_Z': segment['accelerometer_Z'].quantile(0.25),
        '75%_Z': segment['accelerometer_Z'].quantile(0.75)
    }

    segments.append(features) # append the features to the segments list
    labels.append(segment['type'].iloc[0]) # append the label to the labels list

features_df = pd.DataFrame(segments) # create a dataframe from the segments list
features_df['type'] = labels # add the labels to the dataframe

features_df # display the dataframe

Unnamed: 0,mean_X,std_X,median_X,max_X,min_X,range_X,25%_X,75%_X,mean_Y,std_Y,...,75%_Y,mean_Z,std_Z,median_Z,max_Z,min_Z,range_Z,25%_Z,75%_Z,type
0,0.370787,1.224754,0.210392,4.315789,-4.891445,9.207234,-0.313750,0.959430,0.572162,0.976348,...,1.237485,-0.158275,0.977193,-0.337865,4.075625,-3.313817,7.389442,-0.736867,0.317983,running
1,0.096675,1.288898,0.077647,4.315789,-4.891445,9.207234,-0.749158,0.661040,0.535580,1.065825,...,1.300344,-0.118882,0.930535,-0.255635,4.075625,-2.708573,6.784198,-0.673779,0.313655,running
2,0.260226,1.152578,0.207258,3.207685,-3.466008,6.673693,-0.579239,0.834662,0.513519,1.005835,...,1.153418,-0.051479,0.933322,-0.174070,2.721981,-2.708573,5.430554,-0.651141,0.387230,running
3,0.546063,1.022021,0.370483,3.510776,-1.073186,4.583962,-0.184281,1.120376,0.559950,0.862563,...,1.126739,0.057030,0.934260,-0.107819,2.807874,-1.755762,4.563636,-0.573904,0.427180,running
4,0.602542,1.107749,0.370483,4.247423,-1.256636,5.504059,-0.257633,1.126358,0.549307,0.924195,...,1.143053,0.026804,1.045175,-0.136450,4.827353,-1.755762,6.583115,-0.642818,0.523393,running
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1858,-0.412887,0.456655,-0.442222,0.846198,-1.565993,2.412192,-0.688625,-0.065922,-0.970137,0.442793,...,-0.677740,-0.558254,0.669643,-0.481353,1.503503,-3.898420,5.401923,-0.779814,-0.209858,stairs
1859,-0.410805,0.463398,-0.428264,0.846198,-1.565993,2.412192,-0.690904,-0.030600,-0.998431,0.461903,...,-0.680044,-0.546538,0.676565,-0.471365,1.503503,-3.402373,4.905876,-0.729710,-0.177066,stairs
1860,-0.436919,0.463782,-0.428264,0.770426,-1.565993,2.336419,-0.743319,-0.110930,-0.882478,0.392258,...,-0.654036,-0.508934,0.652899,-0.447728,1.553440,-3.898420,5.451861,-0.709901,-0.226837,stairs
1861,-0.324389,0.450124,-0.290107,0.770426,-1.565993,2.336419,-0.586646,0.066253,-0.951139,0.431782,...,-0.682059,-0.559035,0.795416,-0.458049,1.553440,-3.898420,5.451861,-0.778981,-0.182393,stairs


In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features_df.drop('type', axis=1), features_df['type'], test_size=0.3, random_state=42) # split the data into training and testing data

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
random_forest = RandomForestClassifier(n_estimators=100, random_state=42) # create a random forest classifier
random_forest.fit(X_train, y_train) # fit the classifier
y_pred = random_forest.predict(X_test) # predict the test data
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        idle       1.00      1.00      1.00        83
     running       1.00      1.00      1.00       295
      stairs       1.00      1.00      1.00        14
     walking       1.00      1.00      1.00       167

    accuracy                           1.00       559
   macro avg       1.00      1.00      1.00       559
weighted avg       1.00      1.00      1.00       559


In [16]:
print(random_forest.score(X_test, y_test))
print(random_forest.score(X_train, y_train))

1.0
1.0


In [17]:
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
svc = LinearSVC(random_state=42) # create a linear support vector classifier
svc.fit(X_train, y_train) # fit the classifier
y_pred = svc.predict(X_test) # predict the test data
print(svc.score(X_test, y_test)) # print the accuracy of the classifier
print(svc.score(X_train, y_train)) # print the accuracy of the classifier

1.0
1.0




In [18]:
print(classification_report(y_test, y_pred)) # print the classification report

              precision    recall  f1-score   support

        idle       1.00      1.00      1.00        83
     running       1.00      1.00      1.00       295
      stairs       1.00      1.00      1.00        14
     walking       1.00      1.00      1.00       167

    accuracy                           1.00       559
   macro avg       1.00      1.00      1.00       559
weighted avg       1.00      1.00      1.00       559


In [23]:
normalised_data.isnull().sum() # check for missing values

accelerometer_X    0
accelerometer_Y    0
accelerometer_Z    0
type               0
dtype: int64

In [26]:
X_train, X_test, y_train, y_test = train_test_split(normalised_data.drop('type', axis=1), normalised_data['type'], test_size=0.3, random_state=42) # split the data into training and testing data

In [27]:
svc2 = LinearSVC(random_state=42) # create a linear support vector classifier
svc2.fit(X_train, y_train) # fit the classifier
y_pred = svc2.predict(X_test) # predict the test data
print(svc2.score(X_test, y_test)) # print the accuracy of the classifier
print(svc2.score(X_train, y_train)) # print the accuracy of the classifier



0.8050999002716737
0.8029800592474687


In [28]:
print(classification_report(y_test, y_pred)) # print the classification report

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        idle       0.80      0.65      0.72      9275
     running       0.82      0.87      0.85     30640
      stairs       0.00      0.00      0.00      1396
     walking       0.78      0.84      0.81     16847

    accuracy                           0.81     58158
   macro avg       0.60      0.59      0.59     58158
weighted avg       0.79      0.81      0.79     58158


  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
random_forest2 = RandomForestClassifier(n_estimators=100, random_state=42) # create a random forest classifier
random_forest2.fit(X_train, y_train) # fit the classifier
y_pred = random_forest2.predict(X_test) # predict the test data
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      9275
     running       1.00      1.00      1.00     30640
      stairs       1.00      0.99      0.99      1396
     walking       1.00      1.00      1.00     16847

    accuracy                           1.00     58158
   macro avg       1.00      1.00      1.00     58158
weighted avg       1.00      1.00      1.00     58158


In [32]:
print(random_forest.score(X_test, y_test))
print(random_forest.score(X_train, y_train))

0.9996561092197118
1.0
