In [1]:
import os
import math

base_input = r'E:\eep564\proj\TinyML\dataset_OG\SisFall'
base_output = r'E:\eep564\proj\TinyML\dataSet'
subfolders = ['ADL', 'FALL']

for subfolder in subfolders:
    input_path = os.path.join(base_input, subfolder)
    output_path = os.path.join(base_output, subfolder)

    for filename in os.listdir(input_path):
        if filename.endswith('.txt'):
            input_file = os.path.join(input_path, filename)
            output_file = os.path.join(output_path, filename)

            with open(input_file, 'r', encoding='utf-8') as f:
                lines = f.readlines()

            new_lines = []
            for line in lines:
                numbers = line.strip().split(',')
                if len(numbers) >= 9:
                    new_numbers = numbers[:6] # keep these 2 3-axis accelerometer data
                    new_lines.append(','.join(new_numbers) + '\n')

            with open(output_file, 'w', encoding='utf-8') as f:
                f.writelines(new_lines)

In [2]:
# Using "The Largest SMV" to select 3s' data (200Hz)

def compute_mod(x, y, z):
    return math.sqrt(x**2 + y**2 + z**2)

# If there are less than 300 lines before or after the center point, it will be automatically filled from the beginning or end.
def get_centered_segment(data, center_idx, window_size=601):
    half = window_size // 2
    total_len = len(data)
    start = center_idx - half
    end = center_idx + half + 1

    # Initial slicing (may go out of bounds)
    segment = data[max(0, start):min(end, total_len)]

    # If the segment is shorter than desired, pad from the opposite end
    while len(segment) < window_size:
        if start < 0:
            # Not enough data before the center → pad from the tail
            need = window_size - len(segment)
            tail_part = data[min(total_len, end):min(total_len, end + need)]
            segment.extend(tail_part)
            end += len(tail_part)
        elif end > total_len:
            # Not enough data after the center → pad from the head
            need = window_size - len(segment)
            head_part = data[max(0, start - need):max(0, start)]
            segment = head_part + segment
            start -= len(head_part)
        else:
            break  # Segment is complete

    return segment[:window_size]  # Ensure exact length by truncating if necessary

base_path = base_output

for subfolder in subfolders:
    folder = os.path.join(base_path, subfolder)
    for filename in os.listdir(folder):
        if filename.endswith('.txt'):
            filepath = os.path.join(folder, filename)

            with open(filepath, 'r', encoding='utf-8') as f:
                lines = f.readlines()

            # transfer to list
            data = []
            for line in lines:
                try:
                    nums = list(map(float, line.strip().split(',')))
                    if len(nums) == 6:
                        data.append(nums)
                except:
                    continue

            # SMV(Support Magnitude Vector)
            avg_mods = []
            for nums in data:
                mod1 = compute_mod(nums[0], nums[1], nums[2])
                mod2 = compute_mod(nums[3], nums[4], nums[5])
                avg_mod = (mod1 + mod2) / 2 # Use 2 accelerometers' avarage SMV to combine
                avg_mods.append(avg_mod)

            if len(avg_mods) == 0:
                print(f"wrong：{filepath}")
                continue

            max_idx = avg_mods.index(max(avg_mods))

            # extract ±300 lines（601 lines in all）
            selected_data = get_centered_segment(data, max_idx, 601)

            # write back
            with open(filepath, 'w', encoding='utf-8') as f:
                for nums in selected_data:
                    f.write(','.join(map(str, nums)) + '\n')

In [2]:
#transfer from txt to csv (both exist)
for subfolder in subfolders:
    input_folder = os.path.join(base_path, subfolder)
    output_folder = os.path.join(base_path, f"{subfolder}_CSV")

    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.endswith('.txt'):
            txt_path = os.path.join(input_folder, filename)
            csv_filename = filename.replace('.txt', '.csv')
            csv_path = os.path.join(output_folder, csv_filename)

            with open(txt_path, 'r', encoding='utf-8') as f:
                lines = f.readlines()

            with open(csv_path, 'w', encoding='utf-8') as f:
                for line in lines:
                    f.write(line.strip() + '\n')