In [None]:
import re
import os
from config.config_loader import load_config
import numpy as np
import pandas as pd
from datetime import timedelta
import datetime
from scipy.stats import skew, kurtosis, iqr
import multiprocessing
from joblib import Parallel, delayed
import matplotlib.pyplot as plt

# Load configuration for feature engineering
CONFIG_DATA = load_config(os.path.join("config", "feature_engineering_config.json"))

# Sliding window and sampling settings
WINDOW_SIZE_S = CONFIG_DATA["sliding_window_settings"]["window_size_s"]
PEDAL_SAMPLE_RATE_HZ = CONFIG_DATA["pedal_hz"]
STEP_SIZE_S = CONFIG_DATA["sliding_window_settings"]["step_size_s"]

In [None]:
def change_index(input_index, df, start_timestamp):
    """
    Find the first row index in df whose timestamp is greater than a threshold.

    Args:
        input_index (float or int): Offset value (typically in seconds).
        df (pd.DataFrame): DataFrame containing a 'timestamp' column.
        start_timestamp (float): The reference timestamp for offset alignment.

    Returns:
        int or None: The index of the first row where timestamp > threshold,
                     or None if no such row exists.
    """
    # Compute the threshold using reference timestamp + offset
    threshold = start_timestamp + input_index

    # Filter rows that strictly exceed the threshold
    filtered_df = df[df["timestamp"] > threshold]

    # Return the index of the first valid row (if any)
    return filtered_df.index[0] if not filtered_df.empty else None

In [None]:
def concat_df(df,person_type,person_num,data_type,ex_name):
    NC_highway_straight = [
                            [["1:38","2:07"],["2:40","2:42"],["2:48","3:14"],["3:35","3:59"],["4:07","5:18"],["6:05","6:20"],["8:50","9:34"],["9:59","10:15"]],  # NC1
                            [["2:56","3:08"],["3:36","3:51"],["4:21","4:30"],["4:51","8:31"],["10:26","10:41"],["11:06","11:30"]],  # NC2
                            [["2:47","2:52"],["2:58","3:27"],["3:53","4:30"],["5:24","6:44"],["6:47","7:33"]],  # NC3
                            [["2:01","2:05"],["2:35","3:04"],["3:37","3:40"],["3:48","5:27"],["5:56","6:39"],["7:19","7:33"],["7:46","8:07"],["8:53","9:13"]],  # NC4
                            [["1:38","2:01"],["2:30","2:57"],["3:20","5:23"],["5:28","5:52"],["6:28","6:48"],["11:00","11:14"],["11:36","12:00"]],  # NC5
                            [["1:13","1:30"],["1:50","2:07"],["2:40","3:33"],["3:40","4:20"],["4:30","4:50"],["5:00","5:50"],["5:59","6:15"],["6:35","6:56"],["9:50","10:20"],["10:56","11:13"]],  # NC6
                            [["1:13","1:39"],["1:53","2:10"],["2:46","3:29"],["3:49","5:56"],["6:28","7:30"],["7:51","8:13"]],  # NC7
                            [["1:40","2:07"],["3:19","4:20"],["4:35","4:48"],["5:10","7:14"],["7:20","7:42"],["8:04","8:39"]],  # NC8
                            [["1:37","2:06"],["3:10","4:42"],["4:49","6:09"],["6:39","6:50"],["7:16","7:26"],["9:31","9:53"],["10:20","10:40"],["11:10","11:35"]],  # NC9
                            [["3:22","3:57"],["4:39","7:27"],["7:58","9:50"]],  # NC10
                            [["2:11","2:16"],["2:23","2:38"],["3:09","5:09"],["6:40","7:06"],["7:34","7:44"],["9:23","9:41"]],  # NC11
                            [["1:10","1:40"],["2:07","5:41"],["6:13","6:32"],["7:59","8:18"],["8:38","9:04"],["9:44","10:19"]],  # NC12
                            [["1:33","1:59"],["2:38","5:38"],["7:19","7:41"],["8:03","8:23"],["10:53","11:09"]],  # NC13
                            [["2:29","2:58"],["3:39","4:36"],["5:00","6:01"],["6:15","7:07"],["7:44","7:57"],["9:38","9:51"],["10:14","10:31"],["11:00","11:30"],["11:36","11:44"],["11:50","13:23"],["14:43","14:55"],["15:00","15:09"]],  # NC14
                            [["2:59","3:14"],["4:12","6:40"],["6:48","8:22"],["9:03","9:14"],["9:41","9:48"],["9:57","10:06"],["10:53","11:11"],["11:43","11:59"],["12:32","15:14"]],  # NC15
                            [["2:38","3:16"],["4:41","7:07"],["7:14","9:18"],["9:58","10:26"],["11:09","12:24"],["14:01","14:22"],["15:11","15:41"]],  # NC16
                            [["1:29","1:57"],["2:42","5:33"],["6:10","6:25"],["6:49","6:58"],["7:36","7:57"],["8:25","8:44"],["9:16","10:16"],["10:22","11:25"],["11:30","11:49"],["11:58","12:35"],["13:12","13:40"],["13:44","14:01"]],  # NC17
                            [["2:02","2:25"],["3:08","5:10"],["5:15","5:36"],["5:41","6:09"],["6:51","7:03"],["7:33","7:44"],["9:55","10:11"],["10:35","11:08"],["11:36","11:54"],["11:58","12:19"],["12:28","14:37"]],  # NC18
                            [["2:40","3:03"],["4:04","5:41"],["6:02","6:15"],["6:20","6:48"],["7:21","7:37"],["8:06","8:14"],["10:51","11:07"],["11:32","12:00"],["12:29","13:18"],["13:23","15:14"],["15:48","16:02"]],  # NC19
                            [["1:32","1:51"],["2:18","2:23"],["2:44","5:27"],["5:54","6:06"],["6:37","6:45"],["9:06","9:21"],["9:36","9:55"],["10:27","13:20"],["14:04","14:52"]],  # NC20
                            [["2:21","3:01"],["3:41","6:57"],["7:33","7:53"],["8:25","8:35"],["11:41","11:57"],["12:21","12:39"],["13:09","15:13"],["15:17","15:55"]],  # NC21
                            [["4:26","4:46"],["5:27","8:00"],["8:35","8:46"],["9:21","9:29"],["11:17","11:31"],["11:47","12:04"],["12:30","15:22"]],  # NC22
                            [["2:47","3:03"],["3:41","3:48"],["5:02","7:42"],["8:07","8:29"],["8:47","8:58"],["9:21","9:38"],["10:38","10:54"],["11:36","14:20"],["15:07","15:19"]],  # NC23
                            [["1:37","1:57"],["2:54","5:26"],["5:31","6:02"],["6:36","6:51"],["7:15","7:33"],["8:49","9:09"],["9:32","9:55"],["10:21","11:15"],["11:20","12:16"],["12:26","13:53"],["14:26","15:27"]],  # NC24
                            [["1:25","1:44"],["2:39","2:48"],["2:52","5:30"],["6:04","6:23"],["6:44","7:00"],["7:20","7:34"],["7:49","8:11"],["8:36","11:26"],["11:53","12:45"]],  # NC25
                            [["1:46","2:20"],["2:58","6:27"],["7:13","7:55"],["8:37","8:46"],["10:02","10:32"],["11:08","11:48"],["12:34","15:12"]],  # NC26
                            [["2:09","2:41"],["3:19","6:37"],["7:15","7:40"],["8:06","8:29"],["10:22","10:49"],["11:18","11:44"],["11:23","15:45"]],  # NC27
                            [["1:35","2:00"],["2:50","6:39"],["7:14","7:34"],["7:55","8:02"],["10:53","11:11"],["11:27","11:54"],["12:30","15:48"]],  # NC28
                            [["2:11","2:21"],["2:59","5:57"],["6:29","6:42"],["7:06","7:17"],["9:01","9:18"],["9:34","9:52"],["10:17","13:11"],["13:38","14:39"]],  # NC29
                            [["2:21","2:38"],["3:04","6:18"],["6:47","7:06"],["7:29","7:39"],["10:04","10:22"],["10:39","10:59"],["11:23","14:36"],["15:07","15:16"]],  # NC30
                            [["2:32","2:57"],["3:24","4:35"],["4:55","6:07"],["6:09","6:27"],["6:51","7:03"],["8:39","8:51"],["9:06","9:27"],["9:46","11:28"],["11:30","11:34"],["11:55","12:55"],["13:23","14:08"]]  # NC31
                        ]
    NC_highway_turn = [
                        [["2:07","2:40"],["9:34","9:59"],["10:15","10:35"]],  # NC1
                        [["3:51","4:21"],["8:31","8:58"],["10:41","11:06"]],  # NC2
                        [["3:27","3:53"],["4:19","5:09"],["7:33","7:49"],["8:03","8:20"],["9:06","9:25"]],  # NC3
                        [["3:04","3:08"],["3:11","3:37"],["8:07","8:14"],["8:33","8:53"],["9:13","9:45"]],  # NC4
                        [["2:01","2:30"],["5:52","6:28"],["6:48","7:05"],["11:14","11:36"]],  # NC5
                        [["2:07","2:40"],["6:15","6:35"],["7:20","7:49"],["10:31","10:56"],["11:13","11:34"]],  # NC6
                        [["2:10","2:46"],["7:30","7:51"],["8:13","8:33"]],  # NC7
                        [["2:23","2:42"],["2:53","3:19"],["7:42","7:56"],["8:39","9:06"]],  # NC8
                        [["2:06","2:48"],["6:09","6:39"],["7:26","7:44"],["9:53","10:20"],["10:40","11:10"]],  # NC9
                        [["3:57","4:39"],["9:50","10:19"],["10:35","10:56"]],  # NC10
                        [["2:38","3:09"],["7:44","8:16"],["9:41","10:01"]],  # NC11
                        [["1:40","2:07"],["5:41","6:13"],["6:32","6:56"],["8:18","8:38"],["9:04","9:44"]],  # NC12
                        [["1:59","2:38"],["6:02","6:40"],["7:41","8:03"],["11:09","11:29"]],  # NC13
                        [["2:58","3:39"],["7:07","7:12"],["7:18","7:38"],["7:57","8:07"],["8:13","8:17"],["9:51","10:08"],["10:31","11:00"],["14:00","14:43"]],  # NC14
                        [["3:14","3:35"],["3:46","4:12"],["8:22","9:03"],["9:14","9:41"],["11:11","11:43"],["11:59","12:32"]],  # NC15
                        [["3:16","4:41"],["9:18","9:58"],["10:26","11:09"],["14:22","15:11"]],  # NC16
                        [["1:57","2:42"],["5:33","6:10"],["6:30","6:49"],["7:57","8:25"],["8:44","9:13"],["12:35","13:12"]],  # NC17
                        [["2:25","2:57"],["6:09","6:47"],["7:03","7:33"],["10:11","10:35"],["11:08","11:36"],["14:37","15:19"]],  # NC18
                        [["3:03","3:10"],["3:21","4:04"],["6:48","7:21"],["7:37","8:06"],["11:07","11:32"],["12:00","12:29"],["15:14","15:48"]],  # NC19
                        [["1:51","2:18"],["5:27","5:54"],["6:06","6:13"],["6:17","6:37"],["9:21","9:36"],["9:55","10:27"],["13:20","14:04"]],  # NC20
                        [["3:01","3:41"],["7:06","7:33"],["7:53","8:25"],["11:57","12:21"],["12:40","13:09"]],  # NC21
                        [["4:46","5:27"],["8:07","8:35"],["8:46","9:21"],["11:31","11:47"],["12:04","12:30"],["15:22","15:57"]],  # NC22
                        [["3:03","3:14"],["3:19","3:41"],["7:42","8:07"],["8:29","8:47"],["9:38","9:46"],["10:25","10:38"],["10:54","11:36"],["15:19","15:39"]],  # NC23
                        [["1:57","2:22"],["6:02","6:36"],["7:02","7:15"],["9:09","9:32"],["9:55","10:21"],["13:53","14:26"]],  # NC24
                        [["1:44","2:28"],["5:30","6:04"],["6:23","6:44"],["7:34","7:49"],["8:11","8:18"],["8:22","8:36"],["11:26","11:53"]],  # NC25
                        [["2:20","2:58"],["6:27","7:13"],["7:55","8:37"],["10:32","11:08"],["11:48","12:34"]],  # NC26
                        [["2:41","3:19"],["6:37","7:15"],["7:40","8:06"],["10:49","11:18"],["11:44","12:23"]],  # NC27
                        [["2:00","2:50"],["6:39","7:14"],["7:34","7:55"],["11:11","11:27"],["11:54","12:30"],["15:48","16:14"]],  # NC28
                        [["2:21","2:59"],["5:57","6:29"],["6:42","7:06"],["9:18","9:34"],["9:52","10:17"],["13:11","13:38"]],  # NC29
                        [["2:38","3:04"],["6:18","6:47"],["7:15","7:29"],["10:22","10:39"],["10:59","11:23"],["14:26","15:07"]],  # NC30
                        [["2:57","3:24"],["6:27","6:51"],["7:07","7:24"],["8:51","9:06"],["9:27","9:46"],["12:55","13:23"]]  # NC31
                    ]
    PD_highway_straight=[[["1:40","2:18"],["3:19","3:38"],["4:16","4:36"],["4:48","4:56"],["5:33","6:32"],["8:01","8:30"],["9:05","9:41"]],#PD1
                        [["1:25","1:57"],["3:28","3:58"],["4:08","7:08"],["7:16","7:35"],["8:37","8:50"],["9:59","10:18"]],#PD2
                        [["1:16","1:50"],["2:20","4:00"],["7:16","8:12"],["8:56","9:10"],["9:45","10:17"],["10:38","11:01"]],#PD3
                        [["1:18","1:53"],["2:53","5:10"],["5:46","6:09"],["6:32","6:47"],["7:31","7:48"]],#PD4
                        [["1:23","1:50"],["2:20","5:11"],["5:35","5:51"],["6:21","6:45"],["7:22","7:38"],["7:56","8:19"],["8:45","9:23"],["9:46","10:57"]],#PD5
                        [["1:37","2:22"],["2:55","3:35"],["3:54","6:40"],["8:02","8:23"],["8:48","9:10"],["10:46","11:00"]],#PD6
                        [["2:12","2:42"],["3:42","4:35"],["5:11","8:11"],["9:41","10:21"],["10:53","11:00"]],#PD7
                        [["1:23","1:45"],["3:01","4:48"],["5:20","6:42"],["7:35","7:48"],["9:30","9:47"],["10:30","10:50"]],#PD8
                        [["2:43","3:32"],["4:45","5:00"],["5:12","5:35"],["5:46","6:58"],["7:20","7:50"],["8:34","8:53"],["9:24","10:25"]],#PD9
                        [["3:02","3:34"],["4:08","7:26"],["7:55","8:19"],["13:20","13:37"],["13:51","14:15"],["14:39","17:11"],["17:48","18:20"],["18:51","19:03"]],  # PD10
                        [["2:24","2:49"],["3:27","6:37"],["7:06","7:17"],["8:00","8:17"],["9:19","9:32"],["9:50","10:10"],["10:28","13:42"],["14:17","14:57"]],  # PD11
                        [["3:59","4:16"],["4:48","6:48"],["7:12","7:38"],["8:02","8:19"],["8:42","8:54"],["9:17","9:32"],["10:56","11:11"],["11:25","11:45"],["12:07","12:14"],["12:16","15:11"]],  # PD12
                        [["1:53","2:12"],["4:40","6:15"],["6:45","7:05"],["7:10","7:49"],["9:09","9:43"],["10:13","10:36"],["12:11","12:33"],["14:12","14:26"],["15:27","15:57"],["16:32","16:40"]],  # PD13
                        [["2:24","2:43"],["3:18","4:06"],["4:40","5:37"],["5:41","6:25"],["6:49","7:01"],["7:51","8:00"],["10:11","10:23"],["10:40","10:57"],["11:21","12:53"],["13:30","14:16"],["14:39","15:21"]],  # PD14
                        [["2:28","2:42"],["3:08","3:35"],["4:26","5:58"],["6:03","7:04"],["7:12","7:40"],["8:11","8:40"],["9:30","9:43"],["13:00","13:26"],["13:55","14:21"],["14:54","15:38"]],  # PD15
                        [["3:45","4:15"],["5:07","9:13"],["9:37","10:02"],["10:39","10:54"],["12:51","13:09"],["13:33","14:08"],["16:24","16:41"]],  # PD16
                        [["2:59","3:15"],["5:43","5:55"],["6:20","9:20"],["9:29","9:41"],["10:19","10:37"]],  # PD17
                        [["2:44","3:43"],["5:22","8:26"],["9:32","10:33"],["11:26","11:56"],["12:27","12:38"],["14:04","14:24"],["14:50","15:13"]],  # PD18
                        [["3:12","5:58"],["7:00","13:43"],["14:54","15:08"]],  # PD19
                        [["2:30","3:07"],["3:47","4:46"],["5:45","7:51"],["8:22","8:49"],["9:15","9:29"],["10:36","11:01"],["11:21","11:55"],["12:20","16:06"]],  # PD20
                        [["2:56","3:18"],["3:58","7:26"],["8:08","8:30"],["9:04","9:28"],["10:57","11:18"],["11:37","12:09"],["12:39","12:58"],["13:20","15:28"],["15:38","15:56"]],  # PD21
                        [["1:56","2:21"],["2:49","5:52"],["6:19","6:40"],["6:58","7:14"],["8:45","9:00"],["9:15","9:45"],["10:09","13:03"],["13:33","14:22"]],  # PD22
                        [["2:24","2:55"],["3:24","6:43"],["7:15","7:43"],["8:06","8:18"],["9:30","10:15"],["10:35","11:08"],["11:38","15:21"]],  # PD23
                        [["2:30","3:28"],["4:02","7:27"],["8:08","8:27"],["8:45","9:14"],["11:07","11:30"],["12:02","12:29"],["13:37","15:32"]],  # PD24
                        [["1:28","1:46"],["2:16","5:22"],["5:53","6:14"],["6:37","6:57"],["8:31","8:48"],["9:06","9:31"],["10:05","13:10"],["13:39","14:32"]],  # PD25
                        [["2:06","2:28"],["3:06","4:32"],["4:38","6:09"],["6:43","7:06"],["7:28","7:35"],["9:24","9:58"],["10:26","10:50"],["11:23","12:26"],["12:43","14:58"]],  # PD26
                        [["2:14","2:39"],["3:25","3:49"],["4:24","8:46"],["9:49","10:06"],["10:42","11:03"],["14:21","14:41"],["15:02","15:27"]]  # PD27
                        ]
    PD_highway_turn=[[["2:18","2:48"]],#PD1
                    [["1:57","2:22"],["3:08","3:28"],["7:35","7:52"],["8:20","8:37"]],#PD2
                    [["1:50","2:20"],["8:24","8:56"],["9:10","9:45"]],#PD3
                    [["1:53","2:24"],["6:09","6:32"],["6:47","7:06"]],#PD4
                    [["1:50","2:20"],["5:11","5:35"],["5:51","5:59"],["6:10","6:21"],["7:38","7:56"],["8:19","8:45"]],#PD5
                    [["2:22","2:55"],["6:40","7:09"],["8:23","8:48"]],#PD6
                    [["2:42","3:15"],["8:34","9:41"],["10:21","10:53"]],#PD7
                    [["2:16","3:01"],["7:04","7:35"],["7:48","8:18"],["9:47","10:08"]],#PD8
                    [["3:32","3:56"]],#PD9
                    [["3:34","4:08"],["7:26","7:55"],["8:19","8:43"],["13:37","13:51"],["14:15","14:39"],["18:20","18:51"]],  # PD10
                    [["2:49","3:27"],["6:37","7:06"],["7:42","8:00"],["9:32","9:50"],["10:10","10:28"],["13:42","14:08"]],  # PD11
                    [["4:16","4:48"],["8:19","8:42"],["8:54","9:17"],["11:11","11:22"],["11:45","12:07"],["15:11","15:37"]],  # PD12
                    [["3:22","3:50"],["9:49","10:13"],["11:16","11:21"],["11:49","12:11"],["14:26","14:35"],["15:20","15:27"],["15:57","16:32"]],  # PD13
                    [["2:43","3:18"],["6:25","6:49"],["7:27","7:33"],["10:23","10:40"],["10:57","11:12"],["11:15","11:21"],["14:16","14:39"]],  # PD14
                    [["3:35","4:26"],["7:40","8:11"],["8:40","8:50"],["13:26","13:55"],["14:21","14:54"]],  # PD15
                    [["4:15","5:07"],["9:13","9:37"],["10:02","10:39"],["13:09","13:33"],["14:08","14:32"]],  # PD16
                    [["5:01","5:43"],["9:58","10:19"]],  # PD17
                    [["3:43","4:07"],["4:51","5:22"],["10:33","11:26"],["12:07","12:27"],["14:24","14:50"],["15:13","16:03"]],  # PD18
                    [["5:58","7:00"],["13:43","14:54"]],  # PD19
                    [["3:07","3:47"],["7:51","8:22"],["8:49","9:15"],["11:01","11:21"],["11:55","12:20"],["16:06","16:14"]],  # PD20
                    [["3:33","3:58"],["7:26","8:08"],["8:30","9:04"],["11:18","11:37"],["12:09","12:39"]],  # PD21
                    [["2:21","2:49"],["5:52","6:19"],["6:40","6:58"],["9:00","9:15"],["9:45","10:09"],["13:03","13:33"]],  # PD22
                    [["2:55","3:24"],["6:43","7:15"],["7:43","8:06"],["10:15","10:35"],["11:08","11:38"]],  # PD23
                    [["3:28","4:02"],["7:27","8:08"],["8:27","8:45"],["11:30","11:49"],["13:00","13:37"]],  # PD24
                    [["1:46","2:16"],["5:22","5:53"],["6:14","6:37"],["8:48","9:06"],["9:31","10:05"],["13:10","13:39"]],  # PD25
                    [["2:28","3:06"],["6:09","6:43"],["7:06","7:28"],["9:58","10:26"],["10:50","11:23"]],  # PD26
                    [["2:53","3:25"],["8:46","9:25"],["10:06","10:42"],["14:41","15:02"],["15:27","16:01"]]  # PD27
                    ]

    Whole_slice_PD_highway=[[["1:40","9:41"]],[["1:25","10:18"]],[["1:16","11:01"]],
                    [["1:18","7:48"]],[["1:23","10:57"]],[["1:37","11:00"]],
                    [["2:12","11:00"]],[["1:23","10:50"]],[["2:40","10:26"]],
                    [["3:02","19:03"]],[["2:24","14:57"]],[["3:59","15:37"]],
                    [["1:53","16:40"]],[["2:24","15:21"]],[["2:28","15:38"]],
                    [["3:45","16:41"]],[["2:59","10:37"]],[["2:44","16:03"]],
                    [["3:12","15:08"]],[["2:30","16:14"]],[["2:56","15:56"]],
                    [["1:56","14:22"]],[["2:24","15:21"]],[["2:30","15:32"]],
                    [["1:28","14:32"]],[["2:06","14:58"]],[["2:14","16:01"]]
                   ]

    Whole_slice_NC_highway=[[["1:38","10:35"]],[["2:56","11:30"]],[["2:47","9:25"]],
                    [["2:01","9:45"]],[["1:38","12:00"]],[["1:13","11:34"]],
                    [["1:13","8:33"]],[["1:40","9:06"]],[["1:37","11:35"]],
                    [["3:22","10:56"]],[["2:11","10:01"]],[["1:10","10:19"]],
                    [["1:33","11:29"]],[["2:29","15:09"]],[["2:59","15:14"]],
                    [["2:38","15:41"]],[["1:29","14:01"]],[["2:02","15:19"]],
                    [["2:40","16:02"]],[["1:32","14:52"]],[["2:21","15:55"]],
                    [["4:26","15:57"]],[["2:47","15:39"]],[["1:37","15:27"]],
                    [["1:25","2:48"],["2:52","8:18"],["8:22","12:45"]], #NC25
                    [["1:46","15:12"]],[["2:09","15:45"]],[["1:35","16:14"]],
                    [["2:11","14:39"]],[["2:21","15:16"]],[["2:32","14:08"]]
                   ]
    
    Whole_slice_PD_city = [[["2:42","11:18"]],[["1:30","11:04"]],[["1:21","11:02"]], #PD3
                    [["1:45","10:58"]],[["1:11","11:00"]],[["1:38","11:02"]],#PD6
                    [["1:49","11:00"]],[["1:34","11:28"]],[["1:00","1:00"]],  #PD9       
                    [["3:22","15:03"]],[["2:07","14:46"]],[["1:55","16:27"]],#PD12
                    [["4:57","19:12"]],[["2:55","16:04"]],[["3:50","16:18"]],#PD15
                    [["2:35","14:03"]],[["3:39","11:33"]],[["4:35","15:48"]],#PD18 
                    [["2:55","17:02"]],[["5:05","11:55"]],[["4:35","17:13"]],#PD21
                    [["1:56","15:18"]],[["2:30","14:34"]],[["3:47","17:28"]],#PD24
                    [["1:43","14:58"]],[["2:34","15:27"]],[["2:02","15:30"]] #PD27
                   ]

    Whole_slice_NC_city = [[["2:33","10:57"]],[["1:51","11:05"]],[["1:23","10:25"]], #NC3
                    [["3:00","10:22"]],[["1:23","11:29"]],[["1:48","11:36"]],#NC6
                    [["2:12","11:23"]],[["1:41","9:58"]],[["2:23","12:13"]],#NC9
                    [["2:36","11:09"]],[["1:41","12:22"]],[["1:11","10:42"]],#NC12
                    [["1:35","12:22"]],[["2:27","17:00"]],[["1:44","15:13"]],#NC15
                    [["2:26","15:29"]],[["2:52","15:21"]],[["1:46","15:22"]],#NC18
                    [["2:37","17:18"]],[["2:01","16:02"]],[["4:04","15:59"]],#NC21
                    [["1:50","15:22"]],[["1:32","15:52"]],[["3:17","17:23"]],#NC24
                    [["2:20","16:02"]], #NC25
                    [["3:22","17:10"]],[["1:33","17:49"]],[["4:04","15:29"]],#NC28
                    [["1:32","15:26"]],[["3:04","15:34"]],[["2:01","15:34"]]#NC31
                   ]

    if person_type =="PD":
        start_timestamp = CONFIG_DATA["PD_data_path"]["video_start_timestamp_"+ex_name][person_num]
        if data_type == "straight":
            time_slice = PD_highway_straight
        elif data_type == "turn":
            time_slice = PD_highway_turn
        elif ex_name =="ex2":    
            time_slice = Whole_slice_PD_highway
        else:
            time_slice = Whole_slice_PD_city
    else:
        start_timestamp = CONFIG_DATA["NC_data_path"]["video_start_timestamp_"+ex_name][person_num]
        if data_type == "straight":
            time_slice = NC_highway_straight
        elif data_type == "turn":
            time_slice = NC_highway_turn
        elif ex_name =="ex2":    
            time_slice = Whole_slice_NC_highway
        else:
            time_slice =Whole_slice_NC_city
    
    df_subset = pd.DataFrame()
    total_time = 0

    for times in time_slice[person_num]:
        start_time = int(times[0].split(":")[0])*60+int(times[0].split(":")[1])
        end_time = int(times[1].split(":")[0])*60+int(times[1].split(":")[1])
        start_index = change_index(start_time, df, start_timestamp)
        end_index = change_index(end_time, df, start_timestamp)
        print("Trying to slice: ", start_index, end_index)

        df_subset = pd.concat([df_subset,df.iloc[start_index:end_index].copy()],axis= 0, ignore_index=True)
        
    return df_subset

In [None]:
import os
import pandas as pd
from config.config_loader import load_config

# Load configuration settings
CONFIG_DATA = load_config(os.path.join("config", "feature_engineering_config.json"))
NC_number = CONFIG_DATA["NC_number"]
PD_number = CONFIG_DATA["PD_number"]

# === Load PD group merged datasets ===
PD_pedal_data_ex1 = pd.read_csv("data_pro1/PD_pedal_data_ex1.csv")
PD_speed_data_ex1 = pd.read_csv("data_pro1/PD_speed_data_ex1.csv")
PD_eyemovement_data_ex1 = pd.read_csv("data_pro1/PD_eyemovement_data_ex1.csv")

PD_pedal_data_ex2 = pd.read_csv("data_pro1/PD_pedal_data_ex2.csv")
PD_speed_data_ex2 = pd.read_csv("data_pro1/PD_speed_data_ex2.csv")
PD_eyemovement_data_ex2 = pd.read_csv("data_pro1/PD_eyemovement_data_ex2.csv")

# === Load NC group merged datasets ===
NC_pedal_data_ex1 = pd.read_csv("data_pro1/NC_pedal_data_ex1.csv")
NC_speed_data_ex1 = pd.read_csv("data_pro1/NC_speed_data_ex1.csv")
NC_eyemovement_data_ex1 = pd.read_csv("data_pro1/NC_eyemovement_data_ex1.csv")

NC_pedal_data_ex2 = pd.read_csv("data_pro1/NC_pedal_data_ex2.csv")
NC_speed_data_ex2 = pd.read_csv("data_pro1/NC_speed_data_ex2.csv")
NC_eyemovement_data_ex2 = pd.read_csv("data_pro1/NC_eyemovement_data_ex2.csv")

# Mapping keys to dataframes for flexible indexing
PD_data_dict = {
    "pedal_ex1": PD_pedal_data_ex1,
    "speed_ex1": PD_speed_data_ex1,
    "eyemovement_ex1": PD_eyemovement_data_ex1,
    "pedal_ex2": PD_pedal_data_ex2,
    "speed_ex2": PD_speed_data_ex2,
    "eyemovement_ex2": PD_eyemovement_data_ex2,
}

NC_data_dict = {
    "pedal_ex1": NC_pedal_data_ex1,
    "speed_ex1": NC_speed_data_ex1,
    "eyemovement_ex1": NC_eyemovement_data_ex1,
    "pedal_ex2": NC_pedal_data_ex2,
    "speed_ex2": NC_speed_data_ex2,
    "eyemovement_ex2": NC_eyemovement_data_ex2,
}


# ===== Slice PD data by participant =====
for person_num in range(PD_number):
    for sensor in ["pedal", "speed", "eyemovement"]:
        for ex in ["ex1", "ex2"]:
            for slice_type in ["whole"]:

                # Select dataset by key lookup
                df_big = PD_data_dict[f"{sensor}_{ex}"]

                # Extract participant rows
                df = (
                    df_big[df_big["person_id"] == person_num + 1]
                    .copy()
                    .reset_index(drop=True)
                )

                # Apply window-based slicing
                df_subset = concat_df(df, "PD", person_num, slice_type, ex)

                # Ensure output directory exists
                output_dir = f"./sliced_data/PD/PD{person_num + 1}"
                os.makedirs(output_dir, exist_ok=True)

                # Save file
                out_file = f"{output_dir}/{slice_type}_{sensor}_{ex}.csv"
                df_subset.to_csv(out_file, index=False)

                print(f"[INFO] PD#{person_num + 1} - {sensor}_{ex} saved")


# ===== Slice NC data by participant =====
for person_num in range(NC_number):
    for sensor in ["pedal", "speed", "eyemovement"]:
        for ex in ["ex1", "ex2"]:
            for slice_type in ["whole"]:

                df_big = NC_data_dict[f"{sensor}_{ex}"]
                df = (
                    df_big[df_big["person_id"] == person_num + 1]
                    .copy()
                    .reset_index(drop=True)
                )

                df_subset = concat_df(df, "NC", person_num, slice_type, ex)

                output_dir = f"./sliced_data/NC/NC{person_num + 1}"
                os.makedirs(output_dir, exist_ok=True)

                out_file = f"{output_dir}/{slice_type}_{sensor}_{ex}.csv"
                df_subset.to_csv(out_file, index=False)

                print(f"[INFO] NC#{person_num + 1} - {sensor}_{ex} saved")
