In [22]:
import pandas as pd 
import numpy as np
from ahrs.filters import Madgwick
from ahrs.common.orientation import q2euler
from tqdm import tqdm as tqdm

In [16]:
train_body_acc_x = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/body_acc_x_train.txt', delim_whitespace=True, header=None)
train_body_acc_y = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/body_acc_y_train.txt', delim_whitespace=True, header=None)
train_body_acc_z = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/body_acc_z_train.txt', delim_whitespace=True, header=None)
train_gyro_x = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/body_gyro_x_train.txt', delim_whitespace=True, header=None)
train_gyro_y = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/body_gyro_y_train.txt', delim_whitespace=True, header=None)
train_gyro_z = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/body_gyro_z_train.txt', delim_whitespace=True, header=None)
train_total_acc_x = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/total_acc_x_train.txt', delim_whitespace=True, header=None)
train_total_acc_y = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/total_acc_y_train.txt', delim_whitespace=True, header=None)
train_total_acc_z = pd.read_csv('../data/HAR/UCI HAR Dataset/train/Inertial Signals/total_acc_z_train.txt', delim_whitespace=True, header=None)
y_train = pd.read_csv('../data/HAR/UCI HAR Dataset/train/y_train.txt', delim_whitespace=True, header=None)

In [24]:
# Calculate gravity (assuming total_acc - body_acc)
gravity_x = train_total_acc_x - train_body_acc_x
gravity_y = train_total_acc_y - train_body_acc_y
gravity_z = train_total_acc_z - train_body_acc_z

# User acceleration is the same as body acceleration for this purpose
user_acc_x = train_body_acc_x
user_acc_y = train_body_acc_y
user_acc_z = train_body_acc_z

# Initialize the Madgwick filter
madgwick = Madgwick()

num_samples, sample_size = train_body_acc_x.shape
features = np.zeros((num_samples, sample_size, 12))  # Placeholder for all features

for i in tqdm(range(num_samples)):
    for t in range(sample_size):
        if t == 0:
            # Initialize quaternion to [1, 0, 0, 0] for each sample
            q = np.array([1.0, 0.0, 0.0, 0.0])
        gyro_data = np.radians([train_gyro_x.iloc[i, t], train_gyro_y.iloc[i, t], train_gyro_z.iloc[i, t]])
        acc_data = [train_body_acc_x.iloc[i, t], train_body_acc_y.iloc[i, t], train_body_acc_z.iloc[i, t]]
        q = madgwick.updateIMU(q=q, gyr=gyro_data, acc=acc_data)
        euler = q2euler(q)  # Convert quaternion to Euler angles (roll, pitch, yaw)
        features[i, t, 0:3] = euler  # Attitude (roll, pitch, yaw)
        features[i, t, 3:6] = [gravity_x.iloc[i, t], gravity_y.iloc[i, t], gravity_z.iloc[i, t]]  # Gravity
        features[i, t, 6:9] = [train_gyro_x.iloc[i, t], train_gyro_y.iloc[i, t], train_gyro_z.iloc[i, t]]  # Rotation Rate
        features[i, t, 9:12] = [user_acc_x.iloc[i, t], user_acc_y.iloc[i, t], user_acc_z.iloc[i, t]]  # User Acceleration


100%|██████████| 7352/7352 [05:30<00:00, 22.23it/s]


In [32]:
label_map = {
    3:0,
    2:1,
    1:2,
    5:4,
    4:5,
}

# Save the features

#only keep samples with labels that are 1-5
mask = y_train[0] < 6
features_ = features[mask]
y_train_ = y_train[mask]

# change the labels based on the label_map
y_train_ = y_train_.replace(label_map)

np.save('../data/HAR/train_data.npy', features_)
np.save('../data/HAR/train_labels.npy', y_train_)

print(features_.shape, y_train_.shape)


(5945, 128, 12) (5945, 1)


In [29]:
# Load the test data
test_body_acc_x = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/body_acc_x_test.txt', delim_whitespace=True, header=None)
test_body_acc_y = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/body_acc_y_test.txt', delim_whitespace=True, header=None)
test_body_acc_z = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/body_acc_z_test.txt', delim_whitespace=True, header=None)
test_gyro_x = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/body_gyro_x_test.txt', delim_whitespace=True, header=None)
test_gyro_y = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/body_gyro_y_test.txt', delim_whitespace=True, header=None)
test_gyro_z = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/body_gyro_z_test.txt', delim_whitespace=True, header=None)
test_total_acc_x = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/total_acc_x_test.txt', delim_whitespace=True, header=None)
test_total_acc_y = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/total_acc_y_test.txt', delim_whitespace=True, header=None)
test_total_acc_z = pd.read_csv('../data/HAR/UCI HAR Dataset/test/Inertial Signals/total_acc_z_test.txt', delim_whitespace=True, header=None)
y_test = pd.read_csv('../data/HAR/UCI HAR Dataset/test/y_test.txt', delim_whitespace=True, header=None)

In [30]:
# Calculate gravity (assuming total_acc - body_acc)
gravity_x = test_total_acc_x - test_body_acc_x
gravity_y = test_total_acc_y - test_body_acc_y
gravity_z = test_total_acc_z - test_body_acc_z

# User acceleration is the same as body acceleration for this purpose
user_acc_x = test_body_acc_x
user_acc_y = test_body_acc_y
user_acc_z = test_body_acc_z

# Initialize the Madgwick filter
madgwick = Madgwick()

num_samples, sample_size = test_body_acc_x.shape
features = np.zeros((num_samples, sample_size, 12))  # Placeholder for all features

for i in tqdm(range(num_samples)):
    for t in range(sample_size):
        if t == 0:
            # Initialize quaternion to [1, 0, 0, 0] for each sample
            q = np.array([1.0, 0.0, 0.0, 0.0])
        gyro_data = np.radians([test_gyro_x.iloc[i, t], test_gyro_y.iloc[i, t], test_gyro_z.iloc[i, t]])
        acc_data = [test_body_acc_x.iloc[i, t], test_body_acc_y.iloc[i, t], test_body_acc_z.iloc[i, t]]
        q = madgwick.updateIMU(q=q, gyr=gyro_data, acc=acc_data)
        euler = q2euler(q)  # Convert quaternion to Euler angles (roll, pitch, yaw)
        features[i, t, 0:3] = euler  # Attitude (roll, pitch, yaw)
        features[i, t, 3:6] = [gravity_x.iloc[i, t], gravity_y.iloc[i, t], gravity_z.iloc[i, t]]  # Gravity
        features[i, t, 6:9] = [test_gyro_x.iloc[i, t], test_gyro_y.iloc[i, t], test_gyro_z.iloc[i, t]]  # Rotation Rate
        features[i, t, 9:12] = [user_acc_x.iloc[i, t], user_acc_y.iloc[i, t], user_acc_z.iloc[i, t]]  # User Acceleration

100%|██████████| 2947/2947 [02:11<00:00, 22.34it/s]


In [33]:
# Save the features

#only keep samples with labels that are 1-5
mask = y_test[0] < 6
features_ = features[mask]
y_test_ = y_test[mask]

# change the labels based on the label_map
y_test_ = y_test_.replace(label_map)


np.save('../data/HAR/test_data.npy', features_)
np.save('../data/HAR/test_labels.npy', y_test_)

print(features_.shape, y_test_.shape)


(2410, 128, 12) (2410, 1)


#### Merge datasets

In [38]:
# har data
har_train = np.load('../data/HAR/train_data.npy')
har_train_labels = np.load('../data/HAR/train_labels.npy')
har_test = np.load('../data/HAR/test_data.npy')
har_test_labels = np.load('../data/HAR/test_labels.npy')

print(har_train.shape, har_train_labels.shape, har_test.shape, har_test_labels.shape)

# motionsense data
motionsense_train = np.load('../data/MotionSense/train_data.npy')
motionsense_train_labels = np.load('../data/MotionSense/train_labels.npy')
motionsense_test = np.load('../data/MotionSense/test_data.npy')
motionsense_test_labels = np.load('../data/MotionSense/test_labels.npy')
motionsense_val = np.load('../data/MotionSense/val_data.npy')
motionsense_val_labels = np.load('../data/MotionSense/val_labels.npy')


print(motionsense_train.shape, motionsense_train_labels.shape, motionsense_test.shape, motionsense_test_labels.shape)

# pad har_labels with 0's to match the length of motionsense_labels,har shape is (n, 1), to (n, 4)
har_test_labels = np.pad(har_test_labels, ((0, 0), (0, 3)))
har_train_labels = np.pad(har_train_labels, ((0, 0), (0, 3)))

print(har_train_labels.shape, har_test_labels.shape)

# combine the data
combined_train = np.concatenate((har_train, motionsense_train), axis=0)
combined_train_labels = np.concatenate((har_train_labels, motionsense_train_labels), axis=0)
combined_test = np.concatenate((har_test, motionsense_test), axis=0)
combined_test_labels = np.concatenate((har_test_labels, motionsense_test_labels), axis=0)

print(combined_train.shape, combined_train_labels.shape, combined_test.shape, combined_test_labels.shape)

import os
os.makedirs('../data/combined', exist_ok=True)

np.save('../data/combined/train_data.npy', combined_train)
np.save('../data/combined/train_labels.npy', combined_train_labels)
np.save('../data/combined/test_data.npy', combined_test)
np.save('../data/combined/test_labels.npy', combined_test_labels)
np.save('../data/combined/val_data.npy', motionsense_val)
np.save('../data/combined/val_labels.npy', motionsense_val_labels)


(5945, 128, 12) (5945, 1) (2410, 128, 12) (2410, 1)
(29389, 128, 12) (29389, 4) (7227, 128, 12) (7227, 4)
(5945, 4) (2410, 4)
(35334, 128, 12) (35334, 4) (9637, 128, 12) (9637, 4)
