# Human Activity Recognition (HAR) Preprocessing Notebook
This notebook extracts time and frequency domain features from raw accelerometer and gyroscope signals in the UCI HAR Dataset.

In [16]:

import numpy as np
from scipy.fft import fft
import os

# Mount Google Drive if using your own dataset
# from google.colab import drive
# drive.mount('/content/drive')

# Path to the unzipped UCI HAR Dataset (edit as needed)
base_path = "UCI HAR Dataset"
inertial_path = os.path.join(base_path, "train", "Inertial Signals") 
#This is where the raw intertial data is present

# Function to load signal file
def load_signal_file(filename):
    return np.loadtxt(os.path.join(inertial_path, filename))

# List of all 9 raw signal files
signal_files = [
    'body_acc_x_train.txt', 'body_acc_y_train.txt', 'body_acc_z_train.txt',
    'body_gyro_x_train.txt', 'body_gyro_y_train.txt', 'body_gyro_z_train.txt',
    'total_acc_x_train.txt', 'total_acc_y_train.txt', 'total_acc_z_train.txt'
]

# Load all signals into a dict
raw_signals = {fname: load_signal_file(fname) for fname in signal_files}
num_samples = raw_signals[signal_files[0]].shape[0]


## Feature Extraction Functions

In [17]:
#These features describe overall shape, energy, and trends in the signal. 
# For example, someone walking will have different acceleration magnitude compared to sitting.

def extract_time_features(signal):
    return [
        np.mean(signal),
        np.std(signal),
        np.min(signal),
        np.max(signal),
        np.median(signal),
        np.sum(signal**2) / len(signal),  # energy
        np.sum(np.abs(signal)) / len(signal),  # SMA
    ]

#Human movements have rhythmic components (like walking), which show up as peaks in frequency space.
def extract_freq_features(signal):
    freq = np.abs(fft(signal)) #apply fast fourier transform to look at how much signal lies in different frequency bands
    return [
        np.mean(freq), #average magnitude of frequency spectrum
        np.std(freq), #spread of frequency distribution
        np.argmax(freq), #Index of highest peak in FFT (can represent periodic movement)
        np.sum(freq**2) / len(freq) #Energy of the signal in the frequency domain
    ]


## Extract Features for All Samples

In [18]:

# Extract features for all samples
all_features = []

for i in range(num_samples):
    features = []
    for fname in signal_files:
        signal = raw_signals[fname][i]
        features.extend(extract_time_features(signal))
        features.extend(extract_freq_features(signal))
    all_features.append(features)

X_train_generated = np.array(all_features)
print(f"Generated feature matrix shape: {X_train_generated.shape}")


Generated feature matrix shape: (7352, 99)


## Load Labels (`y_train`, `y_test`) and Define Activity Mapping

In [19]:

# Load activity labels
y_train = np.loadtxt(os.path.join(base_path, "train", "y_train.txt")).astype(int)
y_test = np.loadtxt(os.path.join(base_path, "test", "y_test.txt")).astype(int)

# Optional: Map label to activity name
activity_map = {
    1: "WALKING",
    2: "WALKING_UPSTAIRS",
    3: "WALKING_DOWNSTAIRS",
    4: "SITTING",
    5: "STANDING",
    6: "LAYING"
}
print("Sample y_train:", y_train[:10])


Sample y_train: [5 5 5 5 5 5 5 5 5 5]


## Process `test/Inertial Signals` to Generate `X_test`

In [20]:
# List of all 9 raw signal files
signal_files = [
    'body_acc_x_test.txt', 'body_acc_y_test.txt', 'body_acc_z_test.txt',
    'body_gyro_x_test.txt', 'body_gyro_y_test.txt', 'body_gyro_z_test.txt',
    'total_acc_x_test.txt', 'total_acc_y_test.txt', 'total_acc_z_test.txt'
]
# Load test inertial signals
test_inertial_path = os.path.join(base_path, "test", "Inertial Signals")
test_signals = {fname: np.loadtxt(os.path.join(test_inertial_path, fname)) for fname in signal_files}
num_test_samples = test_signals[signal_files[0]].shape[0]

# Extract features for all test samples
test_features = []
for i in range(num_test_samples):
    features = []
    for fname in signal_files:
        signal = test_signals[fname][i]
        features.extend(extract_time_features(signal))
        features.extend(extract_freq_features(signal))
    test_features.append(features)

X_test_generated = np.array(test_features)
print(f"Generated X_test shape: {X_test_generated.shape}")


Generated X_test shape: (2947, 99)


## Save All Outputs

In [21]:

np.savetxt("X_train_archana.txt", X_train_generated)
np.savetxt("X_test_archana.txt", X_test_generated)
np.savetxt("y_train_archana.txt", y_train)
np.savetxt("y_test_archana.txt", y_test)
