In [None]:
import numpy as np
from datetime import datetime
from datetime import timedelta
import sys
import re
import os
import itertools
import glob
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls /content/drive/My\ Drive/FYP/Synchronized/bitf21m541

clean_the_table				fall_left		  slow_walk
downstair				fall_right		  standing
fall_backward				fast_walk		  standing_up_from_laying
fall_backward_while_trying_to_sit_down	jogging			  talk_using_phone
fall_backward_while_trying_to_stand_up	laying			  typing
fall_forward				laying_down_from_sitting  upstair
fall_forward_when_trying_to_sit_down	reading			  walking
fall_forward_while_trying_to_stand_up	sitting


In [None]:
def prep_data(feat_filename, lab_cls, target_length):
    # Multiply target_length by 4 as per your original logic.
    target_length = target_length * 4
    wholeList = []

    try:
        X = pd.read_csv(feat_filename, delimiter=",", decimal='.')
    except Exception as e:
        print(f"Error reading {feat_filename}: {e}")
        return [], []

    # If the file has 4 or more columns, use only the first 4 columns.
    if len(X.columns) >= 4:
        X = X.iloc[:, :4]  # Take only the first 4 columns
        X.columns = ['Timestamp', 'X', 'Y', 'Z']
    elif len(X.columns) == 3:
        X.columns = ['X', 'Y', 'Z']
    else:
        print(f"Unexpected number of columns in {feat_filename}.")
        return [], []


    # Check if there are enough samples in the X channel.
    if len(X['X']) >= target_length:
        # Extract the first target_length samples from each channel.
        new_list1 = X['X'][0:target_length].tolist()
        new_list2 = X['Y'][0:target_length].tolist()
        new_list3 = X['Z'][0:target_length].tolist()
        # Concatenate the channels into one feature vector.
        wholeList.append(new_list1 + new_list2 + new_list3)
        Y = [lab_cls]
        # print(f"Loaded {feat_filename}: extracted {target_length} samples per channel.")
        return wholeList, Y
    else:
        print(f"File {feat_filename} does not have enough samples (needed: {target_length}).")
        return [], []

In [None]:
def load_data(filename, target_length):
    fname_lower = filename.lower()

    # Determine class based on activity keyword in the filename.
    if '/clean_the_table/' in fname_lower:
        cls = 0
    elif '/fall_left/' in fname_lower:
        cls = 1
    elif '/fall_right/' in fname_lower:
        cls = 2
    elif '/fall_backward/' in fname_lower:
        cls = 3
    elif '/fall_backward_while_trying_to_sit_down/' in fname_lower:
        cls = 4
    elif '/fall_backward_while_trying_to_stand_up/' in fname_lower:
        cls = 5
    elif '/fall_forward/' in fname_lower:
        cls = 6
    elif '/fall_forward_when_trying_to_sit_down/' in fname_lower:
        cls = 7
    elif '/fall_forward_while_trying_to_stand_up/' in fname_lower:
        cls = 8
    elif '/downstair/' in fname_lower:
        cls = 9
    elif '/fast_walk/' in fname_lower:
        cls = 10
    elif '/jogging/' in fname_lower:
        cls = 11
    elif '/laying/' in fname_lower:
        cls = 12
    elif '/laying_down_from_sitting/' in fname_lower:
        cls = 13
    elif '/reading/' in fname_lower:
        cls = 14
    elif '/sitting/' in fname_lower:
        cls = 15
    elif '/slow_walk/' in fname_lower:
        cls = 16
    elif '/standing/' in fname_lower:
        cls = 17
    elif '/standing_up_from_laying/' in fname_lower:
        cls = 18
    elif '/talk_using_phone/' in fname_lower:
        cls = 19
    elif '/typing/' in fname_lower:
        cls = 20
    elif '/upstair/' in fname_lower:
        cls = 21
    elif '/walking/' in fname_lower:
        cls = 22
    else:
        # If no valid keyword is found, skip this file.
        return [], []
    return prep_data(filename, cls, target_length)

In [None]:
import os
import glob
import pandas as pd

# ==============================
# Define training arrays for 9 sensors and labels.
# ==============================
GlassAccelerometer_train = []
GlassGyroscope_train = []
GlassMagnetometer_train = []
PhoneAccelerometer_train = []
PhoneGyroscope_train = []
PhoneMagnetometer_train = []
WatchAccelerometer_train = []
WatchGyroscope_train = []
WatchMagnetometer_train = []
Y_train = []

# ==============================
# Define target lengths for each sensor type.
# These represent the number of samples (per channel) to extract.
# Adjust these values based on your data.
# ==============================
target_lengths = {
    'glass_accelerometer': 4,
    'glass_gyroscope': 4,
    'glass_magnetometer': 4,
    'phone_accelerometer': 100,
    'phone_gyroscope': 490,
    'phone_magnetometer': 100,
    'watch_accelerometer': 98,
    'watch_gyroscope': 98,
    'watch_magnetometer': 98
}

# ==============================
# Define the fixed sensor order.
# This order is used to align events across sensors.
# ==============================
sensor_order = [
    'glass_accelerometer',
    'glass_gyroscope',
    'glass_magnetometer',
    'phone_accelerometer',
    'phone_gyroscope',
    'phone_magnetometer',
    'watch_accelerometer',
    'watch_gyroscope',
    'watch_magnetometer'
]

base_path = r"/content/drive/My Drive/FYP/Sync_502"  # Adjust this path as needed to synchronized and structured base folder

# Get all subject folders under base_path.
subject_folders = glob.glob(os.path.join(base_path, "*"))

# Process each subject folder.
for subject in subject_folders:
    if not os.path.isdir(subject):
        continue
    print(f"ü§µüèªü§µüèªü§µüèª Processing subject: {subject}")

    # Get all activity folders in this subject folder.
    activity_folders = glob.glob(os.path.join(subject, "*"))
    for activity_folder in activity_folders:
        if not os.path.isdir(activity_folder):
            continue
        print(f"üöÄüöÄüöÄ Processing activity: {activity_folder}")

        # Build a dictionary to group files by event number.
        # The dictionary structure will be:
        #    event_dict[event_number][sensor] = file_path
        event_dict = {}
        # Get all CSV event files in the activity folder.
        event_files = glob.glob(os.path.join(activity_folder, "*.csv"))
        for f in event_files:
            filename = os.path.basename(f).lower()
            if "_e" in filename:
                try:
                    # Extract the event number as an integer.
                    part = filename.split("_e")[1].split(".")[0]
                    event_num = int(part)  # This removes any leading zeros.
                except Exception as e:
                    print(f"Error parsing event number in {filename}: {e}")
                    continue
                # Determine the sensor type for this file using sensor_order.
                sensor_found = None
                for sensor in sensor_order:
                    if sensor in filename:
                        sensor_found = sensor
                        break
                if sensor_found is None:
                    # File does not match any of the 9 sensors; skip it.
                    continue
                # Add file to event_dict.
                if event_num not in event_dict:
                    event_dict[event_num] = {}
                event_dict[event_num][sensor_found] = f

        # Process events in sorted order (ensuring event 0, then event 1, etc.)
        for event_num in sorted(event_dict.keys()):
            event_files_dict = event_dict[event_num]
            # For each sensor in our fixed order, load the event file if it exists.
            event_label = None
            for sensor in sensor_order:
                if sensor in event_files_dict:
                    file_path = event_files_dict[sensor]
                    t_length = target_lengths[sensor]
                    data_list, labels = load_data(file_path, t_length)
                    # Append the data to the appropriate training array.
                    if data_list:
                        if sensor == 'glass_accelerometer':
                            GlassAccelerometer_train.extend(data_list)
                        elif sensor == 'glass_gyroscope':
                            GlassGyroscope_train.extend(data_list)
                        elif sensor == 'glass_magnetometer':
                            GlassMagnetometer_train.extend(data_list)
                        elif sensor == 'phone_accelerometer':
                            PhoneAccelerometer_train.extend(data_list)
                        elif sensor == 'phone_gyroscope':
                            PhoneGyroscope_train.extend(data_list)
                        elif sensor == 'phone_magnetometer':
                            PhoneMagnetometer_train.extend(data_list)
                        elif sensor == 'watch_accelerometer':
                            WatchAccelerometer_train.extend(data_list)
                        elif sensor == 'watch_gyroscope':
                            WatchGyroscope_train.extend(data_list)
                        elif sensor == 'watch_magnetometer':
                            WatchMagnetometer_train.extend(data_list)
                        # Assume all files in the same event share the same label.
                        event_label = labels
            if event_label is not None:
                Y_train.extend(event_label)

            min_length = min(
                len(GlassAccelerometer_train),
                len(GlassGyroscope_train),
                len(GlassMagnetometer_train),
                len(PhoneAccelerometer_train),
                len(PhoneGyroscope_train),
                len(PhoneMagnetometer_train),
                len(WatchAccelerometer_train),
                len(WatchGyroscope_train),
                len(WatchMagnetometer_train),
                len(Y_train)
            )

            GlassAccelerometer_train = GlassAccelerometer_train[:min_length]
            GlassGyroscope_train = GlassGyroscope_train[:min_length]
            GlassMagnetometer_train = GlassMagnetometer_train[:min_length]
            PhoneAccelerometer_train = PhoneAccelerometer_train[:min_length]
            PhoneGyroscope_train = PhoneGyroscope_train[:min_length]
            PhoneMagnetometer_train = PhoneMagnetometer_train[:min_length]
            WatchAccelerometer_train = WatchAccelerometer_train[:min_length]
            WatchGyroscope_train = WatchGyroscope_train[:min_length]
            WatchMagnetometer_train = WatchMagnetometer_train[:min_length]
            Y_train = Y_train[:min_length]
print("Total sample for accelerometer:", len(WatchAccelerometer_train))
print("Total label samples after truncation:", len(Y_train))


ü§µüèªü§µüèªü§µüèª Processing subject: /content/drive/My Drive/FYP/Sync_502/bitf21m502
üöÄüöÄüöÄ Processing activity: /content/drive/My Drive/FYP/Sync_502/bitf21m502/slow_walk
File /content/drive/My Drive/FYP/Sync_502/bitf21m502/slow_walk/phone_gyroscope_e1.csv does not have enough samples (needed: 1960).
Total sample for accelerometer: 6
Total label samples after truncation: 6


In [None]:
print('GlassAccelerometer_train:', len(GlassAccelerometer_train))
print('GlassGyroscope_train:', len(GlassGyroscope_train))
print('GlassMagnetometer_train:', len(GlassMagnetometer_train))
print('PhoneAccelerometer_train:', len(PhoneAccelerometer_train))
print('PhoneGyroscope_train:', len(PhoneGyroscope_train))
print('PhoneMagnetometer_train:', len(PhoneMagnetometer_train))
print('WatchAccelerometer_train:', len(WatchAccelerometer_train))
print('WatchGyroscope_train:', len(WatchGyroscope_train))
print('WatchMagnetometer_train:', len(WatchMagnetometer_train))
print('Y_train:', len(Y_train))

GlassAccelerometer_train: 6
GlassGyroscope_train: 6
GlassMagnetometer_train: 6
PhoneAccelerometer_train: 6
PhoneGyroscope_train: 6
PhoneMagnetometer_train: 6
WatchAccelerometer_train: 6
WatchGyroscope_train: 6
WatchMagnetometer_train: 6
Y_train: 6


In [None]:
unique_labels, counts = np.unique(Y_train, return_counts=True)

In [None]:
print("Unique labels and counts:", unique_labels, counts)
print(Y_train)

Unique labels and counts: [16] [6]
[16, 16, 16, 16, 16, 16]


In [None]:
#Save data json format
import json
data_dict = {
    'GlassAccelerometer_train': GlassAccelerometer_train,
    'GlassGyroscope_train': GlassGyroscope_train,
    'GlassMagnetometer_train': GlassMagnetometer_train,
    'PhoneAccelerometer_train': PhoneAccelerometer_train,
    'PhoneGyroscope_train': PhoneGyroscope_train,
    'PhoneMagnetometer_train': PhoneMagnetometer_train,
    'WatchAccelerometer_train': WatchAccelerometer_train,
    'WatchGyroscope_train': WatchGyroscope_train,
    'WatchMagnetometer_train': WatchMagnetometer_train,
    'Y_train': Y_train
}

# Save the data dictionary to a JSON file.
with open(r'/content/drive/My Drive/FYP/test_502.json', 'w') as file:
    json.dump(data_dict, file)

print("Training data saved to test_541.json")

Training data saved to test_541.json


In [None]:
json_file_path = "test_541.json"
with open(json_file_path, 'r') as json_file:
    data = json.load(json_file)
df = pd.DataFrame(data)

In [None]:
df.head()

Unnamed: 0,GlassAccelerometer_train,GlassGyroscope_train,GlassMagnetometer_train,PhoneAccelerometer_train,PhoneGyroscope_train,PhoneMagnetometer_train,WatchAccelerometer_train,WatchGyroscope_train,WatchMagnetometer_train,Y_train
0,"[4008.0, 4005.0, 4005.0, 4004.0, 3997.0, 4017....","[65373.0, 65453.0, 65418.0, 65353.0, 65454.0, ...","[65332.0, 65323.0, 65330.0, 65331.0, 65326.0, ...","[-0.4644893, -0.45012367, -0.45970076, -0.4740...","[0.006176458, 0.006176458, 0.006176458, 0.0043...","[112.2, 112.799995, 113.04, 113.28, 113.22, 11...","[9.033323, 9.030929, 9.033323, 8.985439, 9.078...","[-0.03176499, -0.035430185, -0.039095376, -0.0...","[150.36, 150.48, 149.64, 149.87999, 149.94, 14...",4
1,"[4000.0, 4006.0, 4005.0, 4005.0, 3990.0, 4006....","[65477.0, 65456.0, 65461.0, 65390.0, 65449.0, ...","[65318.0, 65323.0, 65338.0, 65333.0, 65332.0, ...","[-0.89067024, -0.8834874, -0.8739103, -0.90743...","[-0.042081896, -0.044525355, -0.04391449, -0.0...","[117.78, 118.079994, 117.84, 118.02, 117.89999...","[9.227253, 9.251195, 9.203311, 9.212888, 9.255...","[-0.04520403, -0.035430185, -0.029321533, -0.0...","[145.5, 146.52, 146.81999, 147.84, 148.31999, ...",4
2,"[4000.0, 4011.0, 4002.0, 4009.0, 3997.0, 3997....","[65437.0, 65394.0, 65456.0, 65437.0, 65412.0, ...","[65318.0, 65326.0, 65318.0, 65329.0, 65324.0, ...","[2.0064023, 2.8683412, 1.0750294, -2.7462332, ...","[0.96218055, 0.810686, 0.43866903, 0.007398189...","[125.46, 125.52, 124.74, 124.619995, 123.36, 1...","[9.528923, 9.504981, 9.677363, 9.9550905, 10.3...","[-0.5131268, -0.7855727, -1.102001, -1.4391985...","[141.0, 141.48, 141.06, 140.94, 141.48, 141.59...",4
3,"[4006.0, 4002.0, 4002.0, 4018.0, 4003.0, 4011....","[65412.0, 65416.0, 65377.0, 65406.0, 65463.0, ...","[65323.0, 65330.0, 65335.0, 65330.0, 65326.0, ...","[1.4150164, 1.297697, 1.4269878, 1.6520497, 2....","[-0.45869198, -0.44952902, -0.44219863, -0.433...","[115.56, 115.079994, 114.899994, 115.02, 114.7...","[9.662998, 9.126697, 8.652645, 9.272743, 9.651...","[3.8166862, 4.053702, 4.2003093, 4.244292, 4.2...","[-29.82, -30.179998, -29.939999, -30.48, -31.4...",4
4,"[4010.0, 4006.0, 4025.0, 4010.0, 4005.0, 3984....","[65364.0, 65412.0, 65411.0, 65456.0, 65431.0, ...","[65331.0, 65324.0, 65324.0, 65325.0, 65329.0, ...","[-0.17957062, -0.12450229, -0.10774237, -0.117...","[0.022058954, 0.025724147, 0.021448089, 0.0238...","[106.619995, 106.32, 106.86, 106.86, 106.74, 1...","[9.330204, 9.330204, 9.248801, 9.299079, 9.165...","[-0.021991149, -0.017104227, -0.007330383, 0.0...","[-29.82, -29.519999, -29.34, -29.4, -29.34, -2...",4
