In [1]:
# Importing the necessary libraries to make the program work
import numpy as np
import pandas as pd
import os
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def obtain_labels(filename):
    '''
    Function to correctly process the given .txt file into the given labels.
    '''
    file_array = []
    with open(filename, 'r') as fin:
        element = fin.readlines()
    for i in element:
        i = i.strip()
        file_array.append(i)
    activityLabels = {'1': 'Walking','2': 'ClimbingStairs', '3': 'ClimbingStairs','4': 'Sitting',
                      '5': 'Standing', '6': 'Laying'}
    array_labels = []
    for i in file_array:
        label = activityLabels.get(i)
        array_labels.append(label)
    return array_labels

In [3]:
train_labels = obtain_labels('HumanActivity/UCI HAR Dataset/train/y_train.txt')
test_labels = obtain_labels('HumanActivity/UCI HAR Dataset/test/y_test.txt')

In [4]:
def create_dataframe_from_raw_data(filename):
    '''
    Creates a dataframe from a .txt file. Due to sampling rate, 
    every dataframe has 128 columns.
    '''
    sensor_data = []
    with open (filename, 'r') as fin:
        for line in fin:
            # Appending data one by one 
            for data in line.split():
                sensor_data.append(data)
    sensor_data = np.reshape(sensor_data, (-1,128))
    sensor_dataframe = pd.DataFrame(sensor_data).astype(float)
    return sensor_dataframe

In [5]:
# Using the above function to create the dataframes
total_acc_x_train = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/train/Inertial Signals/total_acc_x_train.txt')
total_acc_y_train = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/train/Inertial Signals/total_acc_y_train.txt')
total_acc_z_train = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/train/Inertial Signals/total_acc_z_train.txt')
body_gyro_x_train = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/train/Inertial Signals/body_gyro_x_train.txt')
body_gyro_y_train = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/train/Inertial Signals/body_gyro_y_train.txt')
body_gyro_z_train = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/train/Inertial Signals/body_gyro_z_train.txt')
total_acc_x_test = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/test/Inertial Signals/total_acc_x_test.txt')
total_acc_y_test = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/test/Inertial Signals/total_acc_y_test.txt')
total_acc_z_test = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/test/Inertial Signals/total_acc_z_test.txt')
body_gyro_x_test = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/test/Inertial Signals/body_gyro_x_test.txt')
body_gyro_y_test = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/test/Inertial Signals/body_gyro_y_test.txt')
body_gyro_z_test = create_dataframe_from_raw_data('HumanActivity/UCI HAR Dataset/test/Inertial Signals/body_gyro_z_test.txt')

In [6]:
# Sanity check to confirm the right dataframes are produced
print(total_acc_x_train.shape, total_acc_y_train.shape, total_acc_z_train.shape, body_gyro_x_train.shape, 
      body_gyro_y_train.shape, body_gyro_z_train.shape)
print(total_acc_x_test.shape, total_acc_y_test.shape, total_acc_z_test.shape, body_gyro_x_test.shape, 
      body_gyro_y_test.shape, body_gyro_z_test.shape)

(7352, 128) (7352, 128) (7352, 128) (7352, 128) (7352, 128) (7352, 128)
(2947, 128) (2947, 128) (2947, 128) (2947, 128) (2947, 128) (2947, 128)


In [7]:
# Functions to help us extract features from the data...
def row_wise_mean(dataframe):
    mean = dataframe.mean(axis=1)
    return mean

def row_wise_stddev(dataframe):
    stddev = dataframe.std(axis=1)
    return stddev

def row_wise_pca(dataframe):
    pca = PCA(n_components = 1)
    comps = pca.fit(dataframe.transpose())
    return list(comps.components_[0])

def feature_extraction(dataframe, name):
    mean = row_wise_mean(dataframe)
    stddev = row_wise_stddev(dataframe)
    pca = row_wise_pca(dataframe)
    features = pd.DataFrame()
    features[name + '_Mean'] = mean
    features[name + '_StdDev'] = stddev
    features[name + '_PCA'] = pca
    return features
    

In [15]:
# Calculating the mean, stddev and pca row wise for each set of data...
features_total_acc_x_train = feature_extraction(total_acc_x_train, 'acc_x_train')
features_total_acc_y_train = feature_extraction(total_acc_y_train, 'acc_y_train')
features_total_acc_z_train = feature_extraction(total_acc_z_train, 'acc_z_train')
features_body_gyro_x_train = feature_extraction(body_gyro_x_train, 'gyro_x_train')
features_body_gyro_y_train = feature_extraction(body_gyro_y_train, 'gyro_y_train')
features_body_gyro_z_train = feature_extraction(body_gyro_z_train, 'gyro_z_train')
features_total_acc_x_test = feature_extraction(total_acc_x_test, 'acc_x_train')
features_total_acc_y_test = feature_extraction(total_acc_y_test, 'acc_y_train')
features_total_acc_z_test = feature_extraction(total_acc_z_test, 'acc_z_train')
features_body_gyro_x_test = feature_extraction(body_gyro_x_test, 'gyro_x_train')
features_body_gyro_y_test = feature_extraction(body_gyro_y_test, 'gyro_y_train')
features_body_gyro_z_test = feature_extraction(body_gyro_z_test, 'gyro_z_train')

In [16]:
# Appending all the features together into one dataframe...
features_x_train = pd.concat([features_total_acc_x_train,
                         features_total_acc_y_train,
                         features_total_acc_z_train,
                         features_body_gyro_x_train,
                         features_body_gyro_y_train,
                         features_body_gyro_z_train], axis = 1)
features_x_test = pd.concat([features_total_acc_x_test,
                         features_total_acc_y_test,
                         features_total_acc_z_test,
                         features_body_gyro_x_test,
                         features_body_gyro_y_test,
                         features_body_gyro_z_test], axis = 1)

In [17]:
features_x_train['Activity'] = train_labels
features_x_test['Activity'] = test_labels

In [18]:
features_x_test

Unnamed: 0,acc_x_train_Mean,acc_x_train_StdDev,acc_x_train_PCA,acc_y_train_Mean,acc_y_train_StdDev,acc_y_train_PCA,acc_z_train_Mean,acc_z_train_StdDev,acc_z_train_PCA,gyro_x_train_Mean,gyro_x_train_StdDev,gyro_x_train_PCA,gyro_y_train_Mean,gyro_y_train_StdDev,gyro_y_train_PCA,gyro_z_train_Mean,gyro_z_train_StdDev,gyro_z_train_PCA,Activity
0,0.993803,0.019582,-0.000565,-0.267531,0.009874,-0.000540,0.138710,0.019993,-0.003836,0.152280,0.103506,-0.007468,-0.007880,0.142741,-0.005298,0.045718,0.030818,0.001860,Standing
1,0.993323,0.009382,0.000146,-0.268710,0.006110,-0.000659,0.140849,0.013297,-0.001546,0.026941,0.066516,-0.005794,-0.050997,0.055904,-0.003298,0.041768,0.017890,0.001521,Standing
2,0.992927,0.003178,-0.000034,-0.273769,0.007360,0.000204,0.134786,0.008678,0.000077,-0.021182,0.025221,0.000820,-0.041510,0.018462,-0.001289,0.030026,0.013983,0.000659,Standing
3,0.991127,0.003113,-0.000090,-0.283087,0.008192,-0.000566,0.131889,0.009171,-0.000383,-0.029875,0.029747,-0.002190,-0.023333,0.021047,0.000064,0.014165,0.019995,-0.000710,Standing
4,0.990448,0.003395,-0.000247,-0.290141,0.005998,0.000419,0.123383,0.007722,0.001013,-0.033330,0.023769,0.002230,-0.004688,0.018890,0.001205,-0.003396,0.017867,-0.002217,Standing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2942,0.972528,0.232039,-0.009273,-0.277329,0.144054,-0.002765,-0.230078,0.140362,-0.019828,-0.118397,0.382469,0.019471,0.045051,0.364212,0.009051,0.051575,0.362070,0.033592,ClimbingStairs
2943,0.983719,0.226491,-0.002850,-0.268153,0.171203,-0.003660,-0.228845,0.144047,-0.013612,0.092597,0.468420,-0.004859,0.014079,0.370876,0.001312,0.012066,0.418783,0.060825,ClimbingStairs
2944,0.980682,0.218500,0.014153,-0.232996,0.161051,0.014731,-0.230343,0.134415,0.017832,-0.099051,0.505045,-0.032823,0.072537,0.341090,0.013750,0.077493,0.390760,0.064541,ClimbingStairs
2945,0.958825,0.220656,0.019143,-0.225295,0.130834,0.005747,-0.232670,0.142838,0.032105,-0.317900,0.290931,0.008035,0.078418,0.342539,0.017360,0.118935,0.320495,0.041965,ClimbingStairs


In [19]:
features_x_train.to_csv('features_x_train.csv', index = False)
features_x_test.to_csv('features_x_test.csv', index = False)

In [20]:
# Decision Tree Classifier
classifier = DecisionTreeClassifier()
classifier.fit(features_x_train.drop('Activity', axis = 1), train_labels)
y_pred = classifier.predict(features_x_test.drop('Activity', axis = 1))
print(confusion_matrix(test_labels, y_pred))
print(classification_report(test_labels, y_pred))

[[745   0   0   0 146]
 [  0 537   0   0   0]
 [  0   0 361 128   2]
 [  0   0  85 447   0]
 [190   0   0   0 306]]
                precision    recall  f1-score   support

ClimbingStairs       0.80      0.84      0.82       891
        Laying       1.00      1.00      1.00       537
       Sitting       0.81      0.74      0.77       491
      Standing       0.78      0.84      0.81       532
       Walking       0.67      0.62      0.64       496

      accuracy                           0.81      2947
     macro avg       0.81      0.81      0.81      2947
  weighted avg       0.81      0.81      0.81      2947



In [21]:
# Bagging Classifier
classifier = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
classifier.fit(features_x_train.drop('Activity', axis = 1), train_labels)
y_pred = classifier.predict(features_x_test.drop('Activity', axis = 1))
print(confusion_matrix(test_labels, y_pred))
print(classification_report(test_labels, y_pred))

[[687   0   0   0 204]
 [  0 537   0   0   0]
 [  2   1 390  97   1]
 [  3   0  73 455   1]
 [ 96   0   0   0 400]]
                precision    recall  f1-score   support

ClimbingStairs       0.87      0.77      0.82       891
        Laying       1.00      1.00      1.00       537
       Sitting       0.84      0.79      0.82       491
      Standing       0.82      0.86      0.84       532
       Walking       0.66      0.81      0.73       496

      accuracy                           0.84      2947
     macro avg       0.84      0.85      0.84      2947
  weighted avg       0.85      0.84      0.84      2947

