In this documents, we'll check if all the library has been correctly installed. 

In [2]:
# -- IMPORTS START --
import matplotlib
import matplotlib.pyplot as plt

import os
import re
import warnings
import datetime
import pathlib
import glob
import numpy as np
import pandas as pd
import joblib
from scipy import signal
from sklearn import metrics

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn import tree


from scipy.signal import butter, filtfilt, find_peaks
# -- IMPORTS END --

# enable zooming into graphs
%matplotlib inline
plt.rcParams['figure.figsize'] = [9, 6] # width, height in inches

# Part 1: Define Helper Functions

In [3]:
def plot_xyz(data):

    # Set axis
    axis = ['x', 'y', 'z']

    # Generate datetime index
    start = pd.Timestamp('2023-01-01')

    fig, axs = plt.subplots(ncols=3, nrows=1)

    for i in range(len(axis)):

        # Select random window
        start = 0
        end = 10 * 100
        window = data.iloc[start:end]

        ax = axs.flat[i]

        # Plot data
        ax.set_xticklabels([])        
        ax.plot(window.index, window[axis[i]], label=axis[i])

        # Plot peaks
        # peak_mask = window['peaks'] != 0
        # ax.plot(window.index[peak_mask], window['accel_mag'][peak_mask], 'ro', label='Peaks')

        ax.legend()
        ax.set_title(f"Axis-{axis[i]}")

    fig.tight_layout()
    fig.show()

In [4]:
# Helper function to visualize model
def viz_tree(dt_model,features_frames,cnames):
    # Fix feature names as list
    feature_names = features_frames.columns.tolist()

    fig, ax = plt.subplots(figsize=(9,4))
    tree.plot_tree(dt_model,
                   feature_names=feature_names,
                   fontsize=7,
                   class_names=cnames,
                   filled=True,
                   ax=ax)

    plt.title('Decision Tree')
    plt.savefig('dt.png')

In [12]:
def calc_magnitude(data):

    # Calculate magnitude
    data['accel_mag'] = np.sqrt(data['accelerationX']**2 + data['accelerationY']**2 + data['accelerationZ']**2) # absolute accel magnitude
    data['accel_mag'] = data['accel_mag'] - data['accel_mag'].mean() # detrend: "remove gravity"

    return data

In [6]:
def remove_noise(data,sampling_rate):
    from scipy.signal import butter, filtfilt, find_peaks

    # Low pass filter
    cutoff = 5 # Hz
    order = 2
    b, a = butter(order, cutoff/(sampling_rate/2), btype='lowpass')
    data['filtered_accel_mag'] = filtfilt(b, a, data['accel_mag'])

    return data

In [7]:
def add_features(window):
    features = {}
    features['avg'] = window['filtered_accel_mag'].mean()
    features['max'] = window['filtered_accel_mag'].quantile(1)
    features['med'] = window['filtered_accel_mag'].quantile(0.5)
    features['min'] = window['filtered_accel_mag'].quantile(0)
    features['q25'] = window['filtered_accel_mag'].quantile(0.25)
    features['q75'] = window['filtered_accel_mag'].quantile(0.75)
    features['std'] = window['filtered_accel_mag'].std()
    df = pd.DataFrame()
    df = df._append(features,ignore_index=True)
    return df

In [8]:
def train_decision_tree(frames):
    # Extract feature columns
    X = frames[['avg', 'max', 'med', 'min', 'q25', 'q75', 'std']]

    # Extract target column
    y = frames['activity']

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Create model
    dt_model = DecisionTreeClassifier(criterion='entropy',max_depth=5).fit(X_train, y_train)
    dt_pred = dt_model.predict(X_test)

    # Evaluate on test set
    acc = dt_model.score(X_test, y_test)
    dt_cm = confusion_matrix(y_test, dt_pred, labels=dt_model.classes_)
    print(classification_report(y_test, dt_pred))
    print("Accuracy on test set:", acc)

    return dt_model,dt_cm,acc

In [9]:
# Function to extract windows and features
def extract_features(data, window_sec, sample_rate, activity):
	window_str = f"{window_sec}s"
	data['time'] = pd.to_datetime(data['time'])
	data.set_index('time', inplace=True)
	resampled_data = data.resample(window_str)
	all_features = []
	for time, window in resampled_data:
		features = add_features(window)
		new_row = {
			'avg': features.iloc[0]['avg'],
			'max': features.iloc[0]['max'],
			'med': features.iloc[0]['med'],
			'min': features.iloc[0]['min'],
			'q25': features.iloc[0]['q25'],
			'q75': features.iloc[0]['q75'],
			'std': features.iloc[0]['std'],
			'activity': activity #?
		}
		all_features.append(new_row)
	return pd.DataFrame(all_features)

In [10]:
def all_data_to_combined_csv(root, output_filename = 'all_data.csv'):
    all_data = []
    sampling_rate = 100
    window_sec = 5

    
    activity_folders = os.listdir(root)
    print(activity_folders)
    for folder in activity_folders:
        activity_files = glob.glob(f"{root}/{folder}/*.csv")

        for file in activity_files:
            df = pd.read_csv(file, parse_dates=['time'])
            df = calc_magnitude(df)
            df = remove_noise(df, sampling_rate)
            df_features = extract_features(df, window_sec, sampling_rate, folder)
            
            for index, row in df_features.iterrows():
                new_row = {
                    'avg': row['avg'],
                    'max': row['max'],
                    'med': row['med'],
                    'min': row['min'],
                    'q25': row['q25'],
                    'q75': row['q75'],
                    'std': row['std'],
                    'activity': row['activity']
                }
                all_data.append(new_row)

    print(all_data)
    all_data = pd.DataFrame(all_data)
    all_data.to_csv(f"{root}/{output_filename}")  
    return all_data

# Part 2: Collect Training Data Into A Combined CSV

In [None]:
def transform_time_to_datetime(root):
    
    dateparse = lambda dates: [pd.to_datetime(d, unit='ns') for d in dates]
    # Get list of all activity folders
    activity_folders = os.listdir(root)
    # print(activity_folders)

    for folder in activity_folders:
        # print(folder)
        files = glob.glob(f"{root}/{folder}/*.csv")
        for filename in files:
            # print(filename)
            df = pd.read_csv(filename, parse_dates=['time'])
            df['time'] = pd.to_datetime(pd.to_numeric(df['time']), unit='ns')
            df.to_csv(filename, index=False)
warnings.filterwarnings('ignore')
transform_time_to_datetime('data/training_data')

all_data_to_combined_csv('data/training_data', 'all_data.csv')

['bad_back_tall_arch', 'bad_neck', '.DS_Store', 'good_posture', 'all_data.csv', 'bad_back_slouch']
[{'avg': 0.060694635854036644, 'max': 0.29632236254302624, 'med': 0.007509197578073237, 'min': -0.01578451808509011, 'q25': -0.01276606673400912, 'q75': 0.10410900437548719, 'std': 0.10182852482480866, 'activity': 'bad_back_tall_arch'}, {'avg': -9.423853695612354e-05, 'max': 0.004420639537258741, 'med': -6.941742947167638e-05, 'min': -0.00956818647205697, 'q25': -0.0010741340434913353, 'q75': 0.0010639444755881347, 'std': 0.001934074731684406, 'activity': 'bad_back_tall_arch'}, {'avg': -0.0005868418937759747, 'max': 0.003370686064680674, 'med': -0.0006383083667109351, 'min': -0.002129772438442995, 'q25': -0.0010120917330970398, 'q75': -0.0001959288901705931, 'std': 0.0008201915168886918, 'activity': 'bad_back_tall_arch'}, {'avg': -0.0010765447040029786, 'max': 0.00017006544667855193, 'med': -0.001133442097229491, 'min': -0.0021355998279883642, 'q25': -0.0015729219670016022, 'q75': -0.0005

Unnamed: 0,avg,max,med,min,q25,q75,std,activity
0,0.060695,0.296322,0.007509,-0.015785,-0.012766,0.104109,0.101829,bad_back_tall_arch
1,-0.000094,0.004421,-0.000069,-0.009568,-0.001074,0.001064,0.001934,bad_back_tall_arch
2,-0.000587,0.003371,-0.000638,-0.002130,-0.001012,-0.000196,0.000820,bad_back_tall_arch
3,-0.001077,0.000170,-0.001133,-0.002136,-0.001573,-0.000537,0.000607,bad_back_tall_arch
4,-0.001322,0.001519,-0.001510,-0.002631,-0.002042,-0.000963,0.000948,bad_back_tall_arch
...,...,...,...,...,...,...,...,...
278,-0.001011,0.000811,-0.001212,-0.002320,-0.001462,-0.000955,0.000786,bad_back_slouch
279,-0.000111,0.001037,-0.000228,-0.000992,-0.000436,0.000226,0.000511,bad_back_slouch
280,-0.000569,0.001863,-0.000745,-0.001874,-0.001243,-0.000052,0.000890,bad_back_slouch
281,-0.000566,0.003482,-0.000988,-0.001934,-0.001247,-0.000207,0.001233,bad_back_slouch
