In [21]:
# import packages

import numpy as np 
import pandas as pd 
from scipy import signal
import matplotlib.pyplot as plt 
import math
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

---
# Data Loading and Preprocessing

In [22]:
# load datasets
def visualisation():
    for i in range(19):
        df = pd.read_csv('daliac/dataset_' + str(i+1) + '.txt', sep=',', header=None)
    for i in range(1, 14):
        df_exercise = df[df[24] == i].values
        plt.plot(df_exercise[500:1500, :])
        plt.show()

In [23]:
# remove noise
def removeNoise():
    for i in range(19):
        df = pd.read_csv('daliac/dataset_' + str(i+1) + '.txt', sep=',', header=None)
    b, a = signal.butter(4, 0.04, 'lowpass', analog=False)
    for j in range(1, 14):
        df_exercise = df[df[24] == j].values
    for k in range(24):
        df_exercise[:,k] = signal.lfilter(b, a, df_exercise[:, k])
    plt.plot(df_exercise[500:1500, 0:])
    plt.show()

---
# Feature Engineering

In [43]:
def featureEngineering():    
    for i in range(19):
        df = pd.read_csv('daliac/dataset_' + str(i + 1) + '.txt', sep=',', header=None)
        print('deal with dataset ' + str(i + 1))
        for c in range(1, 14):
            activity_data = df[df[24] == c].values
            b, a = signal.butter(4, 0.04, 'low', analog=False)
            for j in range(24):
                activity_data[:, j] = signal.lfilter(b, a, activity_data[:, j])

            training = np.empty(shape=(0, 10))
            testing = np.empty(shape=(0, 10))
            

            datat_len = len(activity_data)
            training_len = math.floor(datat_len * 0.8)
            training_data = activity_data[:training_len, :]
            testing_data = activity_data[training_len:, :]
            training_sample_number = training_len // 1000 + 1
            testing_sample_number = (datat_len - training_len) // 1000 + 1

            for s in range(1):
                if s < training_sample_number - 1 and s < testing_sample_number - 1:
                    sample_data = training_data[1000*s:1000*(s + 1), :]
                    sample_data = testing_data[1000*s:1000*(s + 1), :]
                else:
                    sample_data = training_data[1000*s:, :]
                    sample_data = testing_data[1000*s:, :]
                # in this example code, only three accelerometer data in wrist sensor is used to extract three simple features: min, max, and mean value in
                # a period of time. Finally we get 9 features and 1 label to construct feature dataset. You may consider all sensors' data and extract more

                feature_sample = []
                for i in range(3):
                    feature_sample.append(np.min(sample_data[:, i]))
                    feature_sample.append(np.max(sample_data[:, i]))
                    feature_sample.append(np.mean(sample_data[:, i]))
                feature_sample.append(sample_data[0, -1])
                feature_sample = np.array([feature_sample])
                training = np.concatenate((training, feature_sample), axis=0)
                testing = np.concatenate((testing, feature_sample), axis=0)

    df_training = pd.DataFrame(training)
    df_testing = pd.DataFrame(testing)
    df_training.to_csv('training_data.csv', index=None, header=None)
    df_testing.to_csv('testing_data.csv', index=None, header=None)

In [44]:
if __name__ == '__main__':
    visualisation()
    removeNoise()
    featureEngineering()

deal with dataset 1
deal with dataset 2
deal with dataset 3
deal with dataset 4
deal with dataset 5
deal with dataset 6
deal with dataset 7
deal with dataset 8
deal with dataset 9
deal with dataset 10
deal with dataset 11
deal with dataset 12
deal with dataset 13
deal with dataset 14
deal with dataset 15
deal with dataset 16
deal with dataset 17
deal with dataset 18
deal with dataset 19
