In [1]:
import numpy as np
from datetime import datetime
from datetime import timedelta
import sys
import re
import os
import itertools
import glob
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!ls /content/drive/My\ Drive/FYP/Synchronized/bitf21m541

clean_the_table				fall_left		  slow_walk
downstair				fall_right		  standing
fall_backward				fast_walk		  standing_up_from_laying
fall_backward_while_trying_to_sit_down	jogging			  talk_using_phone
fall_backward_while_trying_to_stand_up	laying			  typing
fall_forward				laying_down_from_sitting  upstair
fall_forward_when_trying_to_sit_down	reading			  walking
fall_forward_while_trying_to_stand_up	sitting


In [4]:
def prep_data(feat_filename, lab_cls, target_length):
    # Multiply target_length by 4 as per your original logic.
    target_length = target_length * 4
    wholeList = []

    try:
        X = pd.read_csv(feat_filename, delimiter=",", decimal='.')
    except Exception as e:
        print(f"Error reading {feat_filename}: {e}")
        return [], []

    # If the file has 4 or more columns, use only the first 4 columns.
    if len(X.columns) >= 4:
        X = X.iloc[:, :4]  # Take only the first 4 columns
        X.columns = ['Timestamp', 'X', 'Y', 'Z']
    elif len(X.columns) == 3:
        X.columns = ['X', 'Y', 'Z']
    else:
        print(f"Unexpected number of columns in {feat_filename}.")
        return [], []


    # Check if there are enough samples in the X channel.
    if len(X['X']) >= target_length:
        # Extract the first target_length samples from each channel.
        new_list1 = X['X'][0:target_length].tolist()
        new_list2 = X['Y'][0:target_length].tolist()
        new_list3 = X['Z'][0:target_length].tolist()
        # Concatenate the channels into one feature vector.
        wholeList.append(new_list1 + new_list2 + new_list3)
        Y = [lab_cls]
        # print(f"Loaded {feat_filename}: extracted {target_length} samples per channel.")
        return wholeList, Y
    else:
        print(f"File {feat_filename} does not have enough samples (needed: {target_length}).")
        return [], []

In [5]:
def load_data(filename, target_length):
    fname_lower = filename.lower()

    # Determine class based on activity keyword in the filename.
    # Check more specific keywords before their generic counterparts.
    if 'clean_the_table' in fname_lower:
        cls = 0
    elif 'fall_left' in fname_lower:
        cls = 1
    elif 'fall_right' in fname_lower:
        cls = 2
    # For fall_backward, check the longer strings first.
    elif 'fall_backward_while_trying_to_sit_down' in fname_lower:
        cls = 4
    elif 'fall_backward_while_trying_to_stand_up' in fname_lower:
        cls = 5
    elif 'fall_backward' in fname_lower:
        cls = 3
    # For fall_forward, check the more detailed actions before the generic one.
    elif 'fall_forward_when_trying_to_sit_down' in fname_lower:
        cls = 7
    elif 'fall_forward_while_trying_to_stand_up' in fname_lower:
        cls = 8
    elif 'fall_forward' in fname_lower:
        cls = 6
    elif 'downstair' in fname_lower:
        cls = 9
    elif 'fast_walk' in fname_lower:
        cls = 10
    elif 'jogging' in fname_lower:
        cls = 11
    # For laying, check the more specific action first.
    elif 'laying_down_from_sitting' in fname_lower:
        cls = 13
    elif 'laying' in fname_lower:
        cls = 12
    elif 'reading' in fname_lower:
        cls = 14
    elif 'sitting' in fname_lower:
        cls = 15
    elif 'slow_walk' in fname_lower:
        cls = 16
    # For standing, check the specific "standing up" before the general "standing".
    elif 'standing_up_from_laying' in fname_lower:
        cls = 18
    elif 'standing' in fname_lower:
        cls = 17
    elif 'talk_using_phone' in fname_lower:
        cls = 19
    elif 'typing' in fname_lower:
        cls = 20
    elif 'upstair' in fname_lower:
        cls = 21
    elif 'walking' in fname_lower:
        cls = 22
    else:
        # If no valid keyword is found, skip this file.
        return [], []

    return prep_data(filename, cls, target_length)

In [6]:
import os
import glob
import pandas as pd

# ==============================
# Define training arrays for 9 sensors and labels.
# ==============================
GlassAccelerometer_train = []
GlassGyroscope_train = []
GlassMagnetometer_train = []
PhoneAccelerometer_train = []
PhoneGyroscope_train = []
PhoneMagnetometer_train = []
WatchAccelerometer_train = []
WatchGyroscope_train = []
WatchMagnetometer_train = []
Y_train = []

# ==============================
# Define target lengths for each sensor type.
# These represent the number of samples (per channel) to extract.
# Adjust these values based on your data.
# ==============================
target_lengths = {
    'glass_accelerometer': 4,
    'glass_gyroscope': 4,
    'glass_magnetometer': 4,
    'phone_accelerometer': 100,
    'phone_gyroscope': 490,
    'phone_magnetometer': 100,
    'watch_accelerometer': 98,
    'watch_gyroscope': 98,
    'watch_magnetometer': 98
}

# ==============================
# Define the fixed sensor order.
# This order is used to align events across sensors.
# ==============================
sensor_order = [
    'glass_accelerometer',
    'glass_gyroscope',
    'glass_magnetometer',
    'phone_accelerometer',
    'phone_gyroscope',
    'phone_magnetometer',
    'watch_accelerometer',
    'watch_gyroscope',
    'watch_magnetometer'
]

base_path = r"/content/drive/My Drive/FYP/Sync_502"  # Adjust this path as needed to synchronized and structured base folder

# Get all subject folders under base_path.
subject_folders = glob.glob(os.path.join(base_path, "*"))

# Process each subject folder.
for subject in subject_folders:
    if not os.path.isdir(subject):
        continue
    print(f"🤵🏻🤵🏻🤵🏻 Processing subject: {subject}")

    # Get all activity folders in this subject folder.
    activity_folders = glob.glob(os.path.join(subject, "*"))
    for activity_folder in activity_folders:
        if not os.path.isdir(activity_folder):
            continue
        print(f"🚀🚀🚀 Processing activity: {activity_folder}")

        # Build a dictionary to group files by event number.
        # The dictionary structure will be:
        #    event_dict[event_number][sensor] = file_path
        event_dict = {}
        # Get all CSV event files in the activity folder.
        event_files = glob.glob(os.path.join(activity_folder, "*.csv"))
        for f in event_files:
            filename = os.path.basename(f).lower()
            if "_e" in filename:
                try:
                    # Extract the event number as an integer.
                    part = filename.split("_e")[1].split(".")[0]
                    event_num = int(part)  # This removes any leading zeros.
                except Exception as e:
                    print(f"Error parsing event number in {filename}: {e}")
                    continue
                # Determine the sensor type for this file using sensor_order.
                sensor_found = None
                for sensor in sensor_order:
                    if sensor in filename:
                        sensor_found = sensor
                        break
                if sensor_found is None:
                    # File does not match any of the 9 sensors; skip it.
                    continue
                # Add file to event_dict.
                if event_num not in event_dict:
                    event_dict[event_num] = {}
                event_dict[event_num][sensor_found] = f

        # Process events in sorted order (ensuring event 0, then event 1, etc.)
        for event_num in sorted(event_dict.keys()):
            event_files_dict = event_dict[event_num]
            # For each sensor in our fixed order, load the event file if it exists.
            event_label = None
            for sensor in sensor_order:
                if sensor in event_files_dict:
                    file_path = event_files_dict[sensor]
                    t_length = target_lengths[sensor]
                    data_list, labels = load_data(file_path, t_length)
                    # Append the data to the appropriate training array.
                    if data_list:
                        if sensor == 'glass_accelerometer':
                            GlassAccelerometer_train.extend(data_list)
                        elif sensor == 'glass_gyroscope':
                            GlassGyroscope_train.extend(data_list)
                        elif sensor == 'glass_magnetometer':
                            GlassMagnetometer_train.extend(data_list)
                        elif sensor == 'phone_accelerometer':
                            PhoneAccelerometer_train.extend(data_list)
                        elif sensor == 'phone_gyroscope':
                            PhoneGyroscope_train.extend(data_list)
                        elif sensor == 'phone_magnetometer':
                            PhoneMagnetometer_train.extend(data_list)
                        elif sensor == 'watch_accelerometer':
                            WatchAccelerometer_train.extend(data_list)
                        elif sensor == 'watch_gyroscope':
                            WatchGyroscope_train.extend(data_list)
                        elif sensor == 'watch_magnetometer':
                            WatchMagnetometer_train.extend(data_list)
                        # Assume all files in the same event share the same label.
                        event_label = labels
            if event_label is not None:
                Y_train.extend(event_label)

            min_length = min(
                len(GlassAccelerometer_train),
                len(GlassGyroscope_train),
                len(GlassMagnetometer_train),
                len(PhoneAccelerometer_train),
                len(PhoneGyroscope_train),
                len(PhoneMagnetometer_train),
                len(WatchAccelerometer_train),
                len(WatchGyroscope_train),
                len(WatchMagnetometer_train),
                len(Y_train)
            )

            GlassAccelerometer_train = GlassAccelerometer_train[:min_length]
            GlassGyroscope_train = GlassGyroscope_train[:min_length]
            GlassMagnetometer_train = GlassMagnetometer_train[:min_length]
            PhoneAccelerometer_train = PhoneAccelerometer_train[:min_length]
            PhoneGyroscope_train = PhoneGyroscope_train[:min_length]
            PhoneMagnetometer_train = PhoneMagnetometer_train[:min_length]
            WatchAccelerometer_train = WatchAccelerometer_train[:min_length]
            WatchGyroscope_train = WatchGyroscope_train[:min_length]
            WatchMagnetometer_train = WatchMagnetometer_train[:min_length]
            Y_train = Y_train[:min_length]
print("Total sample for accelerometer:", len(WatchAccelerometer_train))
print("Total label samples after truncation:", len(Y_train))


🤵🏻🤵🏻🤵🏻 Processing subject: /content/drive/My Drive/FYP/Sync_502/bitf21m502
🚀🚀🚀 Processing activity: /content/drive/My Drive/FYP/Sync_502/bitf21m502/slow_walk
File /content/drive/My Drive/FYP/Sync_502/bitf21m502/slow_walk/phone_gyroscope_e1.csv does not have enough samples (needed: 1960).
Total sample for accelerometer: 6
Total label samples after truncation: 6


In [7]:
print('GlassAccelerometer_train:', len(GlassAccelerometer_train))
print('GlassGyroscope_train:', len(GlassGyroscope_train))
print('GlassMagnetometer_train:', len(GlassMagnetometer_train))
print('PhoneAccelerometer_train:', len(PhoneAccelerometer_train))
print('PhoneGyroscope_train:', len(PhoneGyroscope_train))
print('PhoneMagnetometer_train:', len(PhoneMagnetometer_train))
print('WatchAccelerometer_train:', len(WatchAccelerometer_train))
print('WatchGyroscope_train:', len(WatchGyroscope_train))
print('WatchMagnetometer_train:', len(WatchMagnetometer_train))
print('Y_train:', len(Y_train))

GlassAccelerometer_train: 6
GlassGyroscope_train: 6
GlassMagnetometer_train: 6
PhoneAccelerometer_train: 6
PhoneGyroscope_train: 6
PhoneMagnetometer_train: 6
WatchAccelerometer_train: 6
WatchGyroscope_train: 6
WatchMagnetometer_train: 6
Y_train: 6


In [8]:
unique_labels, counts = np.unique(Y_train, return_counts=True)

In [9]:
print("Unique labels and counts:", unique_labels, counts)
print(Y_train)

Unique labels and counts: [16] [6]
[16, 16, 16, 16, 16, 16]


In [12]:
#Save data json format
import json
data_dict = {
    'GlassAccelerometer_train': GlassAccelerometer_train,
    'GlassGyroscope_train': GlassGyroscope_train,
    'GlassMagnetometer_train': GlassMagnetometer_train,
    'PhoneAccelerometer_train': PhoneAccelerometer_train,
    'PhoneGyroscope_train': PhoneGyroscope_train,
    'PhoneMagnetometer_train': PhoneMagnetometer_train,
    'WatchAccelerometer_train': WatchAccelerometer_train,
    'WatchGyroscope_train': WatchGyroscope_train,
    'WatchMagnetometer_train': WatchMagnetometer_train,
    'Y_train': Y_train
}

# Save the data dictionary to a JSON file.
with open(r'/content/drive/My Drive/FYP/test_541.json', 'w') as file:
    json.dump(data_dict, file)

print("Training data saved to test_541.json")

Training data saved to test_541.json


In [14]:
json_file_path = r"/content/drive/My Drive/FYP/test_541.json"
with open(json_file_path, 'r') as json_file:
    data = json.load(json_file)
df = pd.DataFrame(data)

In [15]:
df.head()

Unnamed: 0,GlassAccelerometer_train,GlassGyroscope_train,GlassMagnetometer_train,PhoneAccelerometer_train,PhoneGyroscope_train,PhoneMagnetometer_train,WatchAccelerometer_train,WatchGyroscope_train,WatchMagnetometer_train,Y_train
0,"[4502.0, 3541.0, 3910.0, 3651.0, 5004.0, 4213....","[62238.0, 1444.0, 4948.0, 1994.0, 6036.0, 318....","[65412.0, 65406.0, 65418.0, 65412.0, 65422.0, ...","[0.5650489, 0.2777359, 0.2657645, 0.32562137, ...","[0.3008884, 0.3112731, 0.33326426, 0.3412055, ...","[127.979996, 128.52, 128.64, 129.18, 129.36, 1...","[9.52174, 9.208099, 8.971074, 8.825027, 8.6191...","[1.3329079, 1.1704178, 1.1704178, 1.0384709, 1...","[46.98, 46.8, 47.64, 47.7, 47.52, 47.46, 47.39...",16
1,"[3732.0, 3945.0, 3563.0, 4115.0, 3646.0, 4311....","[1298.0, 4653.0, 10737.0, 11900.0, 6384.0, 203...","[65493.0, 65478.0, 65490.0, 65477.0, 65483.0, ...","[0.9960183, 1.0965779, 1.3431882, 1.4916332, 1...","[-0.4620823, -0.46452576, -0.47002354, -0.4797...","[123.06, 122.939995, 122.7, 122.1, 122.7, 123....","[9.0476885, 8.805874, 8.746018, 8.616732, 8.58...","[0.516792, 0.5717699, 0.5717699, 0.6401868, 0....","[45.36, 44.94, 44.52, 44.04, 44.52, 44.64, 45....",16
2,"[4899.0, 4178.0, 3858.0, 3826.0, 4974.0, 3684....","[64993.0, 1982.0, 884.0, 64744.0, 62820.0, 622...","[65395.0, 65382.0, 65371.0, 65374.0, 65375.0, ...","[-2.6289136, -3.0287576, -3.2394538, -3.174808...","[-0.16398005, -0.16825612, -0.17314303, -0.178...","[105.299995, 105.6, 106.14, 106.259995, 106.14...","[8.015787, 5.056554, 5.6335564, 5.592855, 5.48...","[0.28221974, 0.16737708, 0.16737708, -0.107512...","[-17.76, -17.82, -18.24, -18.3, -18.0, -18.119...",16
3,"[3903.0, 3800.0, 4842.0, 4199.0, 3767.0, 4079....","[1052.0, 64934.0, 64165.0, 63823.0, 63460.0, 6...","[65463.0, 65472.0, 65479.0, 65476.0, 65487.0, ...","[-3.2394538, -2.466103, -1.7190893, -1.1205206...","[0.17138496, 0.12190488, 0.1005246, 0.07853345...","[114.54, 114.72, 114.6, 114.479996, 114.479996...","[8.868123, 9.018958, 8.9998045, 8.985439, 9.08...","[-0.7086037, -0.5510005, -0.5510005, -0.459370...","[47.879997, 47.879997, 47.76, 48.12, 47.76, 47...",16
4,"[3899.0, 3369.0, 3748.0, 3534.0, 5033.0, 3656....","[65133.0, 219.0, 2158.0, 598.0, 65024.0, 62831...","[32.0, 9.0, 15.0, 65533.0, 65526.0, 65522.0, 6...","[1.0534809, 1.2306572, 1.3072741, 1.0534809, 0...","[-0.74552375, -0.7559085, -0.76507145, -0.7681...","[87.42, 87.78, 88.74, 88.56, 88.2, 88.02, 87.2...","[11.008539, 11.262324, 11.224017, 11.056423, 1...","[1.3891076, 1.4404203, 1.4746287, 1.4807373, 1...","[-35.76, -35.579998, -35.7, -35.1, -34.739998,...",16
