## Trim and synchronize VIDIMU dataset files (e.g., .mot, .csv, .raw) based on estimated ideal synchronization frames specified in `infoToSync.csv`. Processes each file type accordingly and saves the aligned versions to a new output directory.


In [None]:
import os  # For handling file paths and directory structures
import sys  # For modifying the system path to import from custom directories
import pandas as pd  # For loading and manipulating tabular data (like .csv or .tsv files)

# Add the parent directory of the current working directory to the system path
# This allows importing modules (e.g., utils) that are outside the current script's folder
sys.path.append(os.path.dirname(os.getcwd()))

# Import custom utility functions from your project
# These functions are likely used for synchronizing IMU and video data
from utils.syncUtilities import plotFramesShiftToSyncrhonizeAllSubjectsOneActivity,getMainJointFromMotAndMainBonesFromCSV, getSamplesToSynchronize, SynchronizeAndCutSignals

In [2]:
import utils.fileProcessing as fileutil  # Import custom file processing utilities (e.g. reading .mot/.csv, extracting metadata) and alias it as 'fileutil' for easier access

In [3]:
fulldatasetpath = r"D:/Machine Learning/Video_IMU data/Data"  # Root path to the full dataset directory containing all subject folders and activity data (e.g., .mot, .csv, .trc files)


In [4]:
# Path to the input folder containing unsynchronized .mot files for all subjects
inpath = os.path.join(fulldatasetpath, 'dataset', 'videoandimus')

# Path to the folder containing synchronization metadata or processed CSVs (e.g., video-IMU time alignment info)
inpathcsv = os.path.join(fulldatasetpath, 'analysis', 'videoandimusync')

# Path to where synchronized output files (e.g., trimmed and aligned .mot files) will be saved
outpath = os.path.join(fulldatasetpath, 'dataset', 'videoandimusync')

# Create the output directory if it doesn't already exist
if not os.path.exists(outpath):
    os.makedirs(outpath)


In [5]:
# Name of the CSV file that contains synchronization information (e.g., start/end frames or offsets)
csvlog = 'infoToSync.csv'

# Full path to the synchronization CSV file
csvlogfile = os.path.join(inpathcsv, csvlog)

# Try to load the synchronization metadata CSV into a DataFrame
try:
    dfsync = pd.read_csv(csvlogfile)
except FileNotFoundError:
    # If the file isn't found, print an error message
    print("Can't find ", csvlogfile)

# Display the first few rows of the synchronization DataFrame
dfsync.head()


Unnamed: 0,Subject,Activity,Trial,File,Type,CutFrames,OrigRmse,TheoRmse
0,S41,A01,T02,D:/Machine Learning/Video_IMU data/Data\datase...,mp4,4,12.386718,6.091099
1,S41,A01,T02,D:/Machine Learning/Video_IMU data/Data\datase...,csv,4,12.386718,6.091099
2,S42,A01,T01,D:/Machine Learning/Video_IMU data/Data\datase...,mp4,3,8.815782,2.952356
3,S42,A01,T01,D:/Machine Learning/Video_IMU data/Data\datase...,csv,3,8.815782,2.952356
4,S44,A01,T01,D:/Machine Learning/Video_IMU data/Data\datase...,raw,1,4.613521,3.228912


In [6]:
# Sort the synchronization DataFrame by Subject and Activity to ensure consistent processing order
dfsync = dfsync.sort_values(by=['Subject', 'Activity'])


### Modify text files (.raw, .mot, .csv) --> folder videoandimussync

In [7]:
# List of subjects to process (filtered subset from the full dataset, likely the ones with valid synchronized video + IMU data)
subjects = ["S40", "S41", "S42", "S44", "S46", "S47", "S48", "S49",
            "S50", "S51", "S52", "S53", "S54", "S55", "S56", "S57"]


In [8]:
# Iterate over each row in the synchronization metadata DataFrame
for index, row in dfsync.iterrows():
    subject = row['Subject']

    # Skip this row if the subject is not in the filtered list
    if subject not in subjects:
        print('Skip subject')
        continue

    # Extract file info and metadata from the current row
    file = row['File']         # Full path to the file to be trimmed
    type = row['Type']         # File type: 'raw', 'mot', 'csv', or 'mp4'
    frames = row['CutFrames']  # Number of video frames to cut from start

    # Construct output path by replacing inpath with outpath
    outfile = file.replace(inpath, outpath)

    # Skip video files – only trimming synchronised motion/sensor data
    if type == 'mp4':
        # print("Skip video")
        continue

    # Create output directory if it doesn't exist
    if not os.path.exists(os.path.dirname(outfile)):
        print("Creating directory: ", os.path.dirname(outfile))
        os.mkdir(os.path.dirname(outfile))

    # === Handle trimming based on file type ===

    if type == 'raw':
        # RAW IMU files: 5 sensors sampled at 50 Hz → lines_per_frame maps video frames (30 Hz) to IMU lines
        lines_per_frame = 5 * (50 / 30)
        row_start = 2  # Data starts from row 2
        # Trim the IMU data based on synchronised video frame count
        fileutil.remove_insidelines_file(file, row_start, int(frames * lines_per_frame), outfile)

    elif type == 'mot':
        # .mot files: 1 set of joint angles sampled at 50 Hz → direct frame mapping
        lines_per_frame = 50 / 30
        row_start = 9  # OpenSim .mot files usually have 9 header rows
        fileutil.remove_insidelines_file(file, row_start, int(frames * lines_per_frame), outfile)

    elif type == 'csv':
        # CSV files (e.g., from video tracking): assume 1 line per video frame
        lines_per_frame = 1
        row_start = 2
        fileutil.remove_insidelines_file(file, row_start, int(frames * lines_per_frame), outfile)

    else:
        # If the file type is unrecognised, skip
        continue
