In this documents, we'll check if all the library has been correctly installed. 

In [2]:
# -- IMPORTS START --
import matplotlib
import matplotlib.pyplot as plt

import os
import re
import warnings
import datetime
import pathlib
import glob
import numpy as np
import pandas as pd
import joblib
from scipy import signal
from sklearn import metrics

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn import tree


from scipy.signal import butter, filtfilt, find_peaks
# -- IMPORTS END --

# enable zooming into graphs
%matplotlib inline
plt.rcParams['figure.figsize'] = [9, 6] # width, height in inches

# Part 1: Define Helper Functions

In [39]:
# These helper functions exist to help avoid menually adding dictionary entries for each features every time.

def row_features(feature_strs, source):
    new_row = {}
    for feature_str in feature_strs:
        new_row[feature_str] = source[feature_str]
    return new_row
def row_features_iloc(feature_strs, source):
    new_row = {}
    for feature_str in feature_strs:
        new_row[feature_str] = source.iloc[0][feature_str]
    return new_row
feature_list = ['avg', 'max', 'med', 'min', 'q25', 'q75', 'std', 'avg_roll', 'max_roll', 'med_roll', 'min_roll', 'q25_roll', 'q75_roll', 'std_roll', 'avg_pitch', 'max_pitch', 'med_pitch', 'min_pitch', 'q25_pitch', 'q75_pitch', 'std_pitch', 'avg_yaw', 'max_yaw', 'med_yaw', 'min_yaw', 'q25_yaw', 'q75_yaw', 'std_yaw']

In [40]:
def plot_xyz(data):

    # Set axis
    axis = ['x', 'y', 'z']

    # Generate datetime index
    start = pd.Timestamp('2023-01-01')

    fig, axs = plt.subplots(ncols=3, nrows=1)

    for i in range(len(axis)):

        # Select random window
        start = 0
        end = 10 * 100
        window = data.iloc[start:end]

        ax = axs.flat[i]

        # Plot data
        ax.set_xticklabels([])        
        ax.plot(window.index, window[axis[i]], label=axis[i])

        # Plot peaks
        # peak_mask = window['peaks'] != 0
        # ax.plot(window.index[peak_mask], window['accel_mag'][peak_mask], 'ro', label='Peaks')

        ax.legend()
        ax.set_title(f"Axis-{axis[i]}")

    fig.tight_layout()
    fig.show()

In [41]:
# Helper function to visualize model
def viz_tree(dt_model,features_frames,cnames):
    # Fix feature names as list
    feature_names = features_frames.columns.tolist()

    fig, ax = plt.subplots(figsize=(9,4))
    tree.plot_tree(dt_model,
                   feature_names=feature_names,
                   fontsize=7,
                   class_names=cnames,
                   filled=True,
                   ax=ax)

    plt.title('Decision Tree')
    plt.savefig('dt.png')

In [42]:
def calc_magnitude(data):

    # Calculate magnitude
    data['accel_mag'] = np.sqrt(data['accelerationX']**2 + data['accelerationY']**2 + data['accelerationZ']**2) # absolute accel magnitude
    data['accel_mag'] = data['accel_mag'] - data['accel_mag'].mean() # detrend: "remove gravity"

    return data

In [43]:
def remove_noise(data,sampling_rate):
    from scipy.signal import butter, filtfilt, find_peaks

    # Low pass filter
    cutoff = 5 # Hz
    order = 2
    b, a = butter(order, cutoff/(sampling_rate/2), btype='lowpass')
    data['filtered_accel_mag'] = filtfilt(b, a, data['accel_mag'])

    return data

In [53]:
def add_features(window):
    features = {}

    # Filtered acceleration magnitude features
    features['avg'] = window['filtered_accel_mag'].mean()
    features['max'] = window['filtered_accel_mag'].quantile(1)
    features['med'] = window['filtered_accel_mag'].quantile(0.5)
    features['min'] = window['filtered_accel_mag'].quantile(0)
    features['q25'] = window['filtered_accel_mag'].quantile(0.25)
    features['q75'] = window['filtered_accel_mag'].quantile(0.75)
    features['std'] = window['filtered_accel_mag'].std()

    # Roll features
    features['avg_roll'] = window['roll'].mean()
    features['max_roll'] = window['roll'].quantile(1)
    features['med_roll'] = window['roll'].quantile(0.5)
    features['min_roll'] = window['roll'].quantile(0)
    features['q25_roll'] = window['roll'].quantile(0.25)
    features['q75_roll'] = window['roll'].quantile(0.75)
    features['std_roll'] = window['roll'].std()

    # Pitch features
    features['avg_pitch'] = window['pitch'].mean()
    features['max_pitch'] = window['pitch'].quantile(1)
    features['med_pitch'] = window['pitch'].quantile(0.5)
    features['min_pitch'] = window['pitch'].quantile(0)
    features['q25_pitch'] = window['pitch'].quantile(0.25)
    features['q75_pitch'] = window['pitch'].quantile(0.75)
    features['std_pitch'] = window['pitch'].std()

    # Yaw features
    features['avg_yaw'] = window['yaw'].mean()
    features['max_yaw'] = window['yaw'].max()
    features['med_yaw'] = window['yaw'].quantile(0.5)
    features['min_yaw'] = window['yaw'].quantile(0)
    features['q25_yaw'] = window['yaw'].quantile(0.25)
    features['q75_yaw'] = window['yaw'].quantile(0.75)
    features['std_yaw'] = window['yaw'].std()

    df = pd.DataFrame()
    df = df._append(features,ignore_index=True)
    return df

In [46]:
def train_decision_tree(frames):
    # Extract feature columns
    X = frames[['avg', 'max', 'med', 'min', 'q25', 'q75', 'std']]

    # Extract target column
    y = frames['activity']

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Create model
    dt_model = DecisionTreeClassifier(criterion='entropy',max_depth=5).fit(X_train, y_train)
    dt_pred = dt_model.predict(X_test)

    # Evaluate on test set
    acc = dt_model.score(X_test, y_test)
    dt_cm = confusion_matrix(y_test, dt_pred, labels=dt_model.classes_)
    print(classification_report(y_test, dt_pred))
    print("Accuracy on test set:", acc)

    return dt_model,dt_cm,acc

In [47]:
# Function to extract windows and features
def extract_features(data, window_sec, sample_rate, activity):
	window_str = f"{window_sec}s"
	data['time'] = pd.to_datetime(data['time'])
	data.set_index('time', inplace=True)
	resampled_data = data.resample(window_str)
	all_features = []
	for time, window in resampled_data:
		features = add_features(window)
		new_row = row_features_iloc(feature_list, features)
		new_row['activity'] = activity
		all_features.append(new_row)
	return pd.DataFrame(all_features)

In [None]:
def all_data_to_combined_csv(root, output_filename = 'all_data.csv'):
    all_data = []
    sampling_rate = 100
    window_sec = 5

    
    activity_folders = os.listdir(root)
    for folder in activity_folders:
        activity_files = glob.glob(f"{root}/{folder}/*.csv")

        for file in activity_files:
            df = pd.read_csv(file, parse_dates=['time'])
            df = calc_magnitude(df)
            df = remove_noise(df, sampling_rate)
            df_features = extract_features(df, window_sec, sampling_rate, folder)

            for index, row in df_features.iterrows():
                new_row = row_features(feature_list, row)
                new_row['activity'] = row['activity']
                all_data.append(new_row)

    all_data = pd.DataFrame(all_data)
    all_data.to_csv(f"{root}/{output_filename}")  
    return all_data

# Part 2: Collect Training Data Into A Combined CSV

In [54]:
def transform_time_to_datetime(root):
    
    dateparse = lambda dates: [pd.to_datetime(d, unit='ns') for d in dates]
    # Get list of all activity folders
    activity_folders = os.listdir(root)
    # print(activity_folders)

    for folder in activity_folders:
        # print(folder)
        files = glob.glob(f"{root}/{folder}/*.csv")
        for filename in files:
            # print(filename)
            df = pd.read_csv(filename, parse_dates=['time'])
            df['time'] = pd.to_datetime(pd.to_numeric(df['time']), unit='ns')
            df.to_csv(filename, index=False)
warnings.filterwarnings('ignore')
transform_time_to_datetime('data/training_data')

all_data_to_combined_csv('data/training_data', 'all_data.csv')

Unnamed: 0,avg,max,med,min,q25,q75,std,avg_roll,max_roll,med_roll,...,q25_pitch,q75_pitch,std_pitch,avg_yaw,max_yaw,med_yaw,min_yaw,q25_yaw,q75_yaw,std_yaw
0,0.060695,0.296322,0.007509,-0.015785,-0.012766,0.104109,0.101829,-0.324749,-0.027619,-0.344395,...,-0.346606,-0.333215,0.013706,-0.161423,-0.005392,-0.170106,-0.184377,-0.175657,-0.166868,0.043619
1,-0.000094,0.004421,-0.000069,-0.009568,-0.001074,0.001064,0.001934,-0.350678,-0.342832,-0.350972,...,-0.356143,-0.345609,0.007442,-0.196670,-0.172231,-0.200167,-0.215398,-0.206408,-0.185721,0.012260
2,-0.000587,0.003371,-0.000638,-0.002130,-0.001012,-0.000196,0.000820,-0.346165,-0.341390,-0.346277,...,-0.355973,-0.349134,0.003702,-0.183935,-0.177852,-0.182364,-0.195394,-0.187658,-0.180011,0.004899
3,-0.001077,0.000170,-0.001133,-0.002136,-0.001573,-0.000537,0.000607,-0.346448,-0.335276,-0.347784,...,-0.359809,-0.347970,0.007834,-0.175259,-0.169176,-0.174989,-0.180920,-0.178224,-0.173073,0.003288
4,-0.001322,0.001519,-0.001510,-0.002631,-0.002042,-0.000963,0.000948,-0.338411,-0.333621,-0.338006,...,-0.375249,-0.366725,0.005378,-0.170279,-0.168955,-0.169994,-0.172674,-0.170589,-0.169759,0.000890
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
477,-0.001011,0.000811,-0.001212,-0.002320,-0.001462,-0.000955,0.000786,-0.287166,-0.285749,-0.287142,...,-0.651416,-0.649648,0.001857,-0.429450,-0.428033,-0.429107,-0.433254,-0.430049,-0.428739,0.001100
478,-0.000111,0.001037,-0.000228,-0.000992,-0.000436,0.000226,0.000511,-0.287421,-0.285807,-0.287398,...,-0.661599,-0.654714,0.004235,-0.426714,-0.423216,-0.426595,-0.430498,-0.428126,-0.425502,0.001768
479,-0.000569,0.001863,-0.000745,-0.001874,-0.001243,-0.000052,0.000890,-0.285867,-0.283670,-0.285813,...,-0.676373,-0.667568,0.005875,-0.419735,-0.409140,-0.420828,-0.424926,-0.421919,-0.418874,0.003464
480,-0.000566,0.003482,-0.000988,-0.001934,-0.001247,-0.000207,0.001233,-0.278023,-0.271389,-0.278536,...,-0.665173,-0.663469,0.002167,-0.384286,-0.353899,-0.384766,-0.407031,-0.391559,-0.382101,0.010124
