In [None]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import csv
from dtaidistance import dtw
from concurrent.futures import ProcessPoolExecutor, as_completed
import sys

def unsyncFiles(inputFile):
    
    limb_signals = {
        'right_arm': ['right_arm_acc_x', 'right_arm_acc_y', 'right_arm_acc_z'],
        'left_arm':  ['left_arm_acc_x', 'left_arm_acc_y', 'left_arm_acc_z'],
        'right_leg': ['right_leg_acc_x', 'right_leg_acc_y', 'right_leg_acc_z'],
        'left_leg':  ['left_leg_acc_x', 'left_leg_acc_y', 'left_leg_acc_z'],
    }
    
    input_dir = "raw_data"
    output_dir = "unsync"
    os.makedirs(output_dir, exist_ok=True)
    
    sbj_0_path = os.path.join(input_dir, "sbj_0.csv")
    df_0 = pd.read_csv(sbj_0_path, low_memory=False)
    
    rest_rows = df_0[
        df_0['label'].isna() |
        df_0['label'].astype(str).str.strip().isin(['', 'null', 'nan'])
    ]
    
    report_rows = []
    
    input_path = os.path.join(input_dir, inputFile)

    df = pd.read_csv(input_path, low_memory=False)

    prepended_columns = {}
    subject_report = {'subject': inputFile}

    for limb, columns in limb_signals.items():
        n_rows = np.random.randint(250, 1000)
        subject_report[limb] = n_rows

        prepend_part = rest_rows[columns].head(n_rows)
        prepend_part = prepend_part.astype(df[columns].dtypes.to_dict(), errors='ignore')

        original_part = df[columns]

        combined_part = pd.concat([prepend_part, original_part], ignore_index=True)
        prepended_columns[limb] = combined_part

    right_arm_pre = subject_report['right_arm']
    left_arm_pre = subject_report['left_arm']
    right_leg_pre = subject_report['right_leg']
    left_leg_pre = subject_report['left_leg']

    subject_report['unsync_arm'] = abs(left_arm_pre - right_arm_pre)
    subject_report['unsync_leg'] = abs(left_leg_pre - right_leg_pre)

    report_rows.append(subject_report)

    all_limb_cols = [col for cols in limb_signals.values() for col in cols]
    other_columns = [col for col in df.columns if col not in all_limb_cols]
    other_part = df[other_columns]

    final_df = pd.DataFrame()
    for limb in limb_signals:
        final_df = pd.concat([final_df, prepended_columns[limb]], axis=1)

    final_df = pd.concat([final_df, other_part], axis=1)

    final_df = final_df[df.columns]

    
    baseFileName = os.path.basename(inputFile)
    output_filename = baseFileName.replace(".csv", "_unsync.csv")

    output_path = os.path.join(output_dir, output_filename)
    final_df.to_csv(output_path, index=False)

    report_df = pd.DataFrame(report_rows)
    report_df = report_df[
        [
            'subject',
            'right_arm',
            'left_arm',
            'right_leg',
            'left_leg',
            'unsync_arm',
            'unsync_leg'
        ]
    ]
    report_df.to_csv("unsync_report.csv", mode='a', header=not os.path.exists("unsync_report.csv"), index=False) 
    return output_filename

def calculateMagnitude(inputFile): 
    df = pd.read_csv(inputFile, low_memory=False)
    mag_df = pd.DataFrame()
    
    x = df.iloc[:, 0].astype(float)
    y = df.iloc[:, 1].astype(float)
    z = df.iloc[:, 2].astype(float)

    magnitude = np.sqrt(x**2 + y**2 + z**2)
    mag_df['magnitude'] = magnitude
    filename = os.path.basename(inputFile)
    output_filename = filename.replace(".csv", "_magnitude.csv")
    mag_df.to_csv(output_filename, index=False)
    return output_filename        

def compute_variance(input_csv, window_size=50):
    df = pd.read_csv(input_csv, low_memory=False)

    n = len(df)
    
    if n < window_size:
        raise ValueError("Window size must be less than or equal to length of signal.")

    var_data = {'index': np.arange(window_size // 2, n - window_size // 2 +1)}

    signal = df.to_numpy(dtype=float)

    cumsum = np.cumsum(np.insert(signal, 0, 0))
    cumsum_sq = np.cumsum(np.insert(signal**2, 0, 0))

    sum_x = cumsum[window_size:] - cumsum[:-window_size]
    sum_x2 = cumsum_sq[window_size:] - cumsum_sq[:-window_size]

    mean_x = sum_x / window_size
    mean_x2 = sum_x2 / window_size

    variance = mean_x2 - mean_x**2

    var_data[f'{input_csv.replace('_magnitude.csv', '_variance')}'] = variance

    var_df = pd.DataFrame(var_data)
    var_df.to_csv(input_csv.replace('_magnitude', '_variance'), index=False)
    return var_df

def label_rest_motion(magnitudeFile, var_df, threshold=0.002):
    var_col = magnitudeFile.replace('_magnitude.csv', '_variance')
    
    var_indices = var_df['index'].values
    variances = var_df[var_col].values

    mag_df = pd.read_csv(magnitudeFile)
    n = len(mag_df)

    positions = np.searchsorted(var_indices, np.arange(n))

    left_indices = np.clip(positions - 1, 0, len(var_indices) - 1)
    right_indices = np.clip(positions, 0, len(var_indices) - 1)

    left_distances = np.abs(var_indices[left_indices] - np.arange(n))
    right_distances = np.abs(var_indices[right_indices] - np.arange(n))

    closest_var_idx = np.where(left_distances <= right_distances, left_indices, right_indices)

    labels = np.where(variances[closest_var_idx] > threshold, 'motion', 'rest')

    return labels.tolist()

def get_balanced_windows(labels, window_size=200):
    windows = []
    n = len(labels)

    if n < window_size:
        return windows

    window_labels = labels[:window_size]
    rest_count = window_labels.count('rest')
    motion_count = window_size - rest_count

    i = 0
    while i + window_size <= n:
        if abs(rest_count - motion_count) <= window_size * 0.1:
            windows.append((i, i + window_size))
            i += window_size
            if i + window_size <= n:
                window_labels = labels[i:i+window_size]
                rest_count = window_labels.count('rest')
                motion_count = window_size - rest_count
        else:
            if i + window_size == n:
                break

            left_label = labels[i]
            right_label = labels[i + window_size]

            if left_label == 'rest':
                rest_count -= 1
            else:
                motion_count -= 1

            if right_label == 'rest':
                rest_count += 1
            else:
                motion_count += 1

            i += 1

    return windows


def process_single_window(args):

    file_name, mode, reference_magnitudes_values, target_search_segment, window_indices, idx, target_search_start = args
    win_start, win_end = window_indices

    min_distance = float('inf')
    best_start_idx = None

    window = reference_magnitudes_values[win_start:win_end]    
    window_size = len(window)

    for start_idx in range(len(target_search_segment) - window_size + 1):
        candidate = target_search_segment[start_idx:start_idx + window_size]
        distance = dtw.distance_fast(window, candidate)
        
        if distance < min_distance:
            min_distance = distance
            best_start_idx = start_idx

    if best_start_idx is None:
        print(f"No DTW match found for window {win_start}-{win_end}", file=sys.stderr)
        return None
    
    target_start = target_search_start + best_start_idx
    target_end = target_start + window_size

    return [
        file_name,
        mode,
        win_start,
        win_end,
        target_start,
        target_end,
        min_distance
    ]

def run_dtw_matching_parallel(reference_magnitudes, target_magnitudes_values, balanced_windows, reference_labels, target_labels, file_name, mode, max_workers=4):

    results_csv = "dtw_results.csv"

    if not os.path.exists(results_csv):
        with open(results_csv, mode='w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow([
                "Subject",
                "Mode",
                "Ref Start",
                "Ref End",
                "Target Start",
                "Target End",
                "DTW Distance"
            ])

    args_list = []
    reference_magnitudes_values = pd.read_csv(reference_magnitudes)['magnitude'].values
    target_magnitudes_values = pd.read_csv(target_magnitudes_values)['magnitude'].values

    for idx, (win_start, win_end) in enumerate(balanced_windows):
        target_search_start = max(win_start - 400, 0)
        target_search_end = min(win_start - 400 + 1000, len(target_magnitudes_values))
        target_search_segment = target_magnitudes_values[target_search_start:target_search_end]

        args = (
            file_name,
            mode,
            reference_magnitudes_values,
            target_search_segment,
            (win_start, win_end),
            idx,
            target_search_start
        )
        args_list.append(args)

    results = []

    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(process_single_window, args) for args in args_list]
        for f in tqdm(as_completed(futures), total=len(futures), desc="Processing windows"):
            try:
                res = f.result()
                if res:
                    results.append(res)
            except Exception as e:
                print(f"Task failed: {e}", file=sys.stderr)

    with open(results_csv, mode='a', newline='') as f:
        writer = csv.writer(f)
        for res in results:
            writer.writerow(res)


def clearFiles():
    keep_files = {'dtw_results.csv', 'unsync_report.csv'}
    for filename in os.listdir('.'):
        if filename.endswith('.csv') and filename not in keep_files:
            try:
                os.remove(filename)
            except Exception as e:
                print(f"Error deleting {filename}: {e}", file=sys.stderr)

def sync(file1, file2, file_name, mode):

    reference_file = file1
    target_file = file2 

    reference_magnitudes= calculateMagnitude(reference_file)
    target_magnitudes = calculateMagnitude(target_file)

    reference_var_df = compute_variance(reference_magnitudes, window_size=50)
    target_var_df = compute_variance(target_magnitudes, window_size=50)

    reference_labels = label_rest_motion(reference_magnitudes, reference_var_df, threshold=0.002)
    target_labels = label_rest_motion(target_magnitudes, target_var_df, threshold=0.002)

    balanced_windows = get_balanced_windows(reference_labels, window_size=200)

    run_dtw_matching_parallel(reference_magnitudes, target_magnitudes, balanced_windows, reference_labels, target_labels, file_name, mode)

    clearFiles()

if __name__ == "__main__":
    
    for sbj_idx in range(1, 24):
        file_name = f"sbj_{sbj_idx}.csv"
        # unsync sbj file
        unsyncFileName = unsyncFiles(file_name)
        unsync_df = pd.read_csv(os.path.join('unsync', unsyncFileName))
        # create data frames for each limb and save as csv
        file1 = unsync_df[['right_arm_acc_x','right_arm_acc_y','right_arm_acc_z']]
        file2 = unsync_df[['left_arm_acc_x','left_arm_acc_y','left_arm_acc_z']]
        file1.to_csv('right_arm_data.csv', index=False)
        file2.to_csv('left_arm_data.csv', index=False)
        # sync right and left arm keeping right arm as reference
        sync('right_arm_data.csv', 'left_arm_data.csv', file_name, "r2l_a")

        file1.to_csv('right_arm_data.csv', index=False)
        file2.to_csv('left_arm_data.csv', index=False)
        # sync right and left arm keeping left arm as reference
        sync('left_arm_data.csv', 'right_arm_data.csv', file_name, "l2r_a")
        
        file3 = unsync_df[['right_leg_acc_x','right_leg_acc_y','right_leg_acc_z']]
        file4 = unsync_df[['left_leg_acc_x','left_leg_acc_y','left_leg_acc_z']]

        file3.to_csv('right_leg_data.csv', index=False)
        file4.to_csv('left_leg_data.csv', index=False)
        # sync right and left leg keeping right leg as reference
        sync('right_leg_data.csv', 'left_leg_data.csv', file_name, "r2l_l")

        file3.to_csv('right_leg_data.csv', index=False)
        file4.to_csv('left_leg_data.csv', index=False)
        # sync right and left leg keeping left leg as reference
        sync('left_leg_data.csv', 'right_leg_data.csv', file_name, "l2r_l")
