In [None]:
import os
import glob
import pandas as pd
import numpy as np
from io import StringIO
import matplotlib.pyplot as plt


########################################
# Function: Read sensor data with metadata and update column names with sensor group and mode identifiers
########################################
def read_sensor_data_with_metadata(file_path, debug=False):
    """
    Reads the sensor CSV file that contains metadata in the first five lines,
    a header in line 5, a sample rate row in line 6, and sensor data from line 7 onward.
    
    For updated data (with multiple sensors: e.g., FDS, FCU, FCR), it parses:
      - Line 3: Sensor group identifiers (e.g., "FDS (81770), , ... , FCU (81728), ..., FCR (81745)")
      - Line 4: Sensor mode information for each group.
    
    It then updates the header (line 5) by appending the sensor group to each column name.
    The sensor mode is loaded into metadata but not appended to the column name.
    
    Parameters:
      file_path (str): Path to the CSV file.
      debug (bool): If True, prints detailed debug output.
    
    Returns:
      df (pd.DataFrame): DataFrame with sensor data, updated column names, metadata columns, and a "Timestamp" column.
      metadata (dict): Dictionary of parsed metadata.
    """
    with open(file_path, 'r') as f:
        all_lines = f.readlines()
    
    metadata = {}
    # --- Parse first 3 lines (common for both formats) ---
    # Line 0: Application
    line = all_lines[0].strip()
    if ',' in line:
        key, value = line.split(',', 1)
        metadata[key.strip().rstrip(':')] = value.strip()
    else:
        metadata['Application'] = line

    # Line 1: Date/Time
    line = all_lines[1].strip()
    if ',' in line:
        key, value = line.split(',', 1)
        metadata[key.strip().rstrip(':')] = value.strip()
    else:
        metadata['Date/Time'] = line

    # Line 2: Collection Length (seconds)
    line = all_lines[2].strip()
    if ',' in line:
        key, value = line.split(',', 1)
        metadata[key.strip().rstrip(':')] = value.strip()
    else:
        metadata['Collection Length (seconds)'] = line

    # --- Determine dataset type (updated or legacy) ---
    sensor_group_line = all_lines[3].strip()
    sensor_mode_line = all_lines[4].strip()
    if ',' in sensor_group_line and len(sensor_group_line.split(',')) > 1:
        # Updated dataset detected
        sensor_group_tokens = [token.strip() for token in sensor_group_line.split(',')]
        # Propagate non-empty values forward
        sensor_groups = []
        last = None
        for token in sensor_group_tokens:
            if token:
                last = token
            sensor_groups.append(last if last is not None else "")
        # Similarly for sensor modes (line 4)
        sensor_mode_tokens = [token.strip() for token in sensor_mode_line.split(',')]
        sensor_modes = []
        last_mode = None
        for token in sensor_mode_tokens:
            if token:
                last_mode = token
            sensor_modes.append(last_mode if last_mode is not None else "")
        # Store these in metadata
        metadata['SensorGroups'] = sensor_groups
        metadata['SensorModes'] = sensor_modes
        if debug:
            print(f"[read_sensor_data_with_metadata] SensorGroups: {sensor_groups}")
            print(f"[read_sensor_data_with_metadata] SensorModes: {sensor_modes}")
    else:
        # Legacy dataset: use line 3 and 4 as single values.
        metadata['Sensor'] = sensor_group_line
        metadata['Sensor Mode'] = sensor_mode_line

    # --- Header row for sensor data is on line 5 in both cases ---
    header_line = all_lines[5].strip()
    original_col_names = [col.strip() for col in header_line.split(',')]
    
    # If updated dataset, update column names by appending only sensor group.
    if 'SensorGroups' in metadata:
        if len(metadata['SensorGroups']) >= len(original_col_names):
            new_col_names = []
            for i, col in enumerate(original_col_names):
                group = metadata['SensorGroups'][i]
                new_col_names.append(f"{col} - {group}")
            if debug:
                print("[read_sensor_data_with_metadata] New column names set (updated dataset, sensor group only).")
        else:
            if debug:
                print("[read_sensor_data_with_metadata] Warning: Not enough sensor group entries; using original column names.")
            new_col_names = original_col_names
    else:
        new_col_names = original_col_names

    # Read the sensor data (starting at line 7)
    data_str = ''.join(all_lines[7:])
    df = pd.read_csv(StringIO(data_str), header=None, names=new_col_names)

    # Add metadata columns to the DataFrame (except sensor group and mode lists)
    for key, value in metadata.items():
        if key not in ['SensorGroups', 'SensorModes']:
            df[key] = value

    # Create a running Timestamp column
    collection_length = float(metadata.get('Collection Length (seconds)', 0))
    start_time = pd.to_datetime(metadata.get('Date/Time', None))
    num_samples = len(df)
    time_offsets = np.linspace(0, collection_length, num_samples)
    df['Timestamp'] = start_time + pd.to_timedelta(time_offsets, unit='s')
    
    if debug:
        print(f"[read_sensor_data_with_metadata] Final DataFrame shape: {df.shape}")
        print(f"[read_sensor_data_with_metadata] Final column names: {df.columns.tolist()}")
    else:
        print("read_sensor_data_with_metadata completed.")
    
    return df, metadata


########################################
# Function: Compute EMG extreme flag using a fixed time window
########################################
def compute_emg_extreme_flag_window(df, window_time=1.3, column='EMG 1 (mV)', 
                                    threshold_high=1.0, threshold_low=-0.5, debug=False):
    """
    Computes a flag for each row indicating whether, within a fixed time window
    around the current row, there is at least one EMG value above 'threshold_high' and
    one below 'threshold_low'.
    
    Parameters:
      df (pd.DataFrame): DataFrame containing a 'Timestamp' column.
      window_time (float): Time window in seconds.
      column (str): Column name with EMG values.
      threshold_high (float): High threshold.
      threshold_low (float): Low threshold.
      debug (bool): If True, prints detailed debug information.
      
    Returns:
      pd.Series: Series of 0/1 flags.
    """
    time_diffs = df['Timestamp'].diff().dropna().dt.total_seconds()
    median_dt = time_diffs.median() if not time_diffs.empty else 0
    frame_count = int(round(window_time / median_dt)) if median_dt > 0 else 0
    if debug:
        print(f"[compute_emg_extreme_flag_window] Using a time window of {window_time} sec (~{frame_count} frames)")
    
    flags = []
    timestamps = df['Timestamp']
    values = df[column]
    for idx, current_time in timestamps.items():
        start_time = current_time - pd.Timedelta(seconds=window_time)
        end_time = current_time + pd.Timedelta(seconds=window_time)
        window_mask = (timestamps >= start_time) & (timestamps <= end_time)
        window_values = values[window_mask]
        flag = int((window_values > threshold_high).any() and (window_values < threshold_low).any())
        flags.append(flag)
    
    flag_series = pd.Series(flags, index=df.index)
    if debug:
        print(f"[compute_emg_extreme_flag_window] Output flags shape: {flag_series.shape}")
    else:
        print("compute_emg_extreme_flag_window completed.")
    return flag_series


########################################
# Function: Compute EMG extreme flag using a dynamic time window
########################################
def compute_emg_extreme_flag_dynamic_window(df, column='EMG 1 (mV)', threshold_high=1.0, 
                                              threshold_low=-0.5, debug=False):
    """
    Computes a dynamic extreme flag for each row by first determining a fixed-window flag,
    then adjusting the time window based on the nearest extreme events.
    
    Parameters:
      df (pd.DataFrame): DataFrame containing a 'Timestamp' column.
      column (str): Column name with EMG values.
      threshold_high (float): High threshold.
      threshold_low (float): Low threshold.
      debug (bool): If True, prints detailed debug information.
      
    Returns:
      pd.Series: Series of 0/1 flags.
    """
    fixed_flags = compute_emg_extreme_flag_window(df, window_time=1.3, column=column, 
                                                   threshold_high=threshold_high, threshold_low=threshold_low, debug=debug)
    extreme_times = df.loc[fixed_flags == 1, 'Timestamp']
    extreme_time_array = extreme_times.sort_values().values  # numpy array of timestamps
    
    new_flags = []
    dynamic_windows = []  # store δ (in seconds) for each row
    timestamps = df['Timestamp']
    values = df[column]
    
    for idx, current_time in timestamps.items():
        current_time_np = np.datetime64(current_time)
        pos = np.searchsorted(extreme_time_array, current_time_np)
        prev_extreme = extreme_time_array[pos - 1] if pos > 0 else None
        next_extreme = extreme_time_array[pos] if pos < len(extreme_time_array) else None
        
        if prev_extreme is not None and next_extreme is not None:
            delta_prev = (current_time_np - prev_extreme).astype('timedelta64[ns]').astype(float) / 1e9
            delta_next = (next_extreme - current_time_np).astype('timedelta64[ns]').astype(float) / 1e9
            delta_sec = min(delta_prev, delta_next)
        elif prev_extreme is not None:
            delta_sec = (current_time_np - prev_extreme).astype('timedelta64[ns]').astype(float) / 1e9
        elif next_extreme is not None:
            delta_sec = (next_extreme - current_time_np).astype('timedelta64[ns]').astype(float) / 1e9
        else:
            delta_sec = 0
        dynamic_windows.append(delta_sec)
        
        start_time = current_time - pd.Timedelta(seconds=delta_sec)
        end_time = current_time + pd.Timedelta(seconds=delta_sec)
        window_mask = (timestamps >= start_time) & (timestamps <= end_time)
        window_values = values[window_mask]
        flag = int((window_values > threshold_high).any() and (window_values < threshold_low).any())
        new_flags.append(flag)
    
    new_flags_series = pd.Series(new_flags, index=df.index)
    avg_dynamic_window = np.mean(dynamic_windows) if dynamic_windows else 0
    if debug:
        print(f"[compute_emg_extreme_flag_dynamic_window] Average dynamic window size: {avg_dynamic_window:.2f} sec")
    else:
        print("compute_emg_extreme_flag_dynamic_window completed.")
    return new_flags_series


########################################
# Function: Build global min/max dictionary
########################################
def build_global_min_max(df, columns_to_analyze, debug=False):
    """
    Builds and returns a dictionary mapping each column to its global min and max.
    
    Parameters:
      df (pd.DataFrame): DataFrame.
      columns_to_analyze (list): List of column names.
      debug (bool): If True, prints debug info.
      
    Returns:
      dict: Dictionary with min and max for each column.
    """
    global_dict = {}
    for col in columns_to_analyze:
        global_dict[col] = {'min': df[col].min(), 'max': df[col].max()}
    if debug:
        print(f"[build_global_min_max] Global min/max for columns: {global_dict}")
    else:
        print("build_global_min_max completed.")
    return global_dict


########################################
# Function: Compute window metrics
########################################
def compute_window_metrics(window_df, columns_to_analyze, global_min_max=None, debug=False):
    """
    Computes metrics (average, min, max) for a given window of data.
    
    Parameters:
      window_df (pd.DataFrame): DataFrame slice.
      columns_to_analyze (list): List of column names.
      global_min_max (dict, optional): Dictionary for global min/max comparison.
      debug (bool): If True, prints debug info.
      
    Returns:
      dict: Dictionary of computed metrics.
    """
    stats = {}
    for col in columns_to_analyze:
        w_min = window_df[col].min()
        w_max = window_df[col].max()
        w_avg = window_df[col].mean()
        stats[f'avg_{col}'] = w_avg
        stats[f'min_{col}'] = w_min
        stats[f'max_{col}'] = w_max
        if global_min_max is not None and col in global_min_max:
            g_min = global_min_max[col]['min']
            g_max = global_min_max[col]['max']
            stats[f'is_global_min_{col}'] = (w_min == g_min)
            stats[f'is_global_max_{col}'] = (w_max == g_max)
    if debug:
        print(f"[compute_window_metrics] Computed stats: {stats}")
    return stats


########################################
# Function: Analyze spikes in a given column
########################################
def analyze_spikes(df, col, window=50, global_min_max=None, debug=False):
    """
    Analyzes spikes in a given column using a specified window.
    
    Parameters:
      df (pd.DataFrame): DataFrame.
      col (str): Column name.
      window (int): Window size (number of rows) around the spike.
      global_min_max (dict, optional): Dictionary for global min/max.
      debug (bool): If True, prints debug info.
      
    Returns:
      pd.DataFrame: DataFrame with computed spike metrics.
    """
    spike_flag_col = f'{col}_spike_flag'
    flagged_indices = df.index[df[spike_flag_col] == 1]
    results = []
    columns_to_analyze = [
        'EMG 1 (mV)', 'ACC X (G)', 'ACC Y (G)', 'ACC Z (G)',
        'GYRO X (deg/s)', 'GYRO Y (deg/s)', 'GYRO Z (deg/s)'
    ]
    
    for idx in flagged_indices:
        start_idx = max(0, idx - window)
        end_idx = min(len(df) - 1, idx + window)
        window_df = df.loc[start_idx:end_idx]
        window_stats = compute_window_metrics(window_df, columns_to_analyze, global_min_max=global_min_max, debug=debug)
        window_stats['spike_index'] = idx
        window_stats['spike_column'] = col
        window_stats['spike_value'] = df.loc[idx, col]
        window_stats['window_start'] = start_idx
        window_stats['window_end'] = end_idx
        results.append(window_stats)
    if debug:
        print(f"[analyze_spikes] Processed {len(flagged_indices)} spikes for column {col}.")
    return pd.DataFrame(results)


########################################
# Function: Compare spike windows from EMG and ACC/GYRO
########################################
def compare_spike_windows(emg_spikes_df, acc_gyro_spikes_df, debug=False):
    """
    Compares spike windows from EMG and ACC/GYRO and returns merged information.
    
    Parameters:
      emg_spikes_df (pd.DataFrame): DataFrame from analyze_spikes for EMG.
      acc_gyro_spikes_df (pd.DataFrame): DataFrame from analyze_spikes for ACC/GYRO.
      debug (bool): If True, prints debug info.
      
    Returns:
      pd.DataFrame: Merged DataFrame.
    """
    rows = []
    for i, emg_row in emg_spikes_df.iterrows():
        emg_win_start = emg_row['window_start']
        emg_win_end = emg_row['window_end']
        overlapping_spikes = acc_gyro_spikes_df[
            (acc_gyro_spikes_df['spike_index'] >= emg_win_start) &
            (acc_gyro_spikes_df['spike_index'] <= emg_win_end)
        ]
        for j, spike_row in overlapping_spikes.iterrows():
            merged_dict = {
                'emg_spike_index': emg_row['spike_index'],
                'emg_spike_value': emg_row['spike_value'],
                'acc_gyro_spike_index': spike_row['spike_index'],
                'acc_gyro_spike_column': spike_row['spike_column'],
                'emg_window_avg': emg_row['avg_EMG 1 (mV)'],
                'acc_window_avg': spike_row.get('avg_ACC X (G)', None)
            }
            rows.append(merged_dict)
    if debug:
        print(f"[compare_spike_windows] Merged {len(rows)} overlapping spike events.")
    return pd.DataFrame(rows)


########################################
# Function: Mark throwing motion based on extreme flag windows
########################################
def mark_throwing_motion(df, extreme_flag_col='EMG_extreme_flag', window_time=1.3, debug=False):
    """
    Marks rows as part of the throwing motion based on extreme flag events.
    
    For each row where the specified extreme_flag_col is 1, mark all rows within ±(window_time/2) seconds 
    of that event's timestamp as part of the throwing motion by setting a new column 'ThrowingMotion' to 1.
    
    Parameters:
      df (pd.DataFrame): DataFrame with a 'Timestamp' column.
      extreme_flag_col (str): The column name that holds the extreme flag.
      window_time (float): Total duration (in seconds) for the throwing motion window.
      debug (bool): If True, prints detailed debug information.
      
    Returns:
      pd.DataFrame: Copy of the DataFrame with an added 'ThrowingMotion' column.
    """
    df = df.copy()
    df['ThrowingMotion'] = 0
    half_window = window_time / 2  # e.g., 0.65 seconds for a 1.3-second window
    
    # Debug: Show the number of extreme events.
    extreme_events = df.loc[df[extreme_flag_col] == 1, 'Timestamp']
    if debug:
        print(f"[mark_throwing_motion] Found {len(extreme_events)} extreme events. Using half window = {half_window} sec.")
    
    # Mark rows within the window of each extreme event.
    for t in extreme_events:
        start = t - pd.Timedelta(seconds=half_window)
        end = t + pd.Timedelta(seconds=half_window)
        mask = (df['Timestamp'] >= start) & (df['Timestamp'] <= end)
        df.loc[mask, 'ThrowingMotion'] = 1
        if debug:
            print(f"[mark_throwing_motion] Marking event at {t} (window: {start} to {end}).")
    
    if debug:
        total_marked = df['ThrowingMotion'].sum()
        print(f"[mark_throwing_motion] Total rows marked as ThrowingMotion: {total_marked}")
    else:
        print("mark_throwing_motion completed.")
    
    return df


########################################
# Function: Process a single CSV file
########################################
def process_file(file_path, debug=False):
    """
    Processes a single sensor CSV file:
      - Reads the file and its metadata.
      - Performs cleaning and type conversion.
      - Computes various flags and metrics.
      - Marks throwing motion.
    
    Parameters:
      file_path (str): Path to the CSV file.
      debug (bool): If True, prints detailed debug output.
    
    Returns:
      pd.DataFrame: Processed DataFrame.
    """
    if debug:
        print(f"\n[process_file] Processing file: {file_path}")
    else:
        print(f"Processing file: {os.path.basename(file_path)}")

    # Step 1: Read data and metadata.
    df, metadata = read_sensor_data_with_metadata(file_path, debug=debug)
    if debug:
        print(f"[process_file] DataFrame shape after reading: {df.shape}")
    else:
        print("Data read completed.")

    # Step 2: Display minimal summary if in debug mode.
    if debug:
        print(f"[process_file] Descriptive Statistics:\n{df.describe()}")
        print(f"[process_file] Data types:\n{df.dtypes}")
    else:
        print("Basic summary displayed.")

    # Step 3: Dynamically identify numeric sensor columns.
    base_names = ['ACC X (G)', 'ACC Y (G)', 'ACC Z (G)', 
                  'GYRO X (deg/s)', 'GYRO Y (deg/s)', 'GYRO Z (deg/s)']
    numeric_cols = []
    for base in base_names:
        matches = [col for col in df.columns if col.startswith(base)]
        numeric_cols.extend(matches)
    if debug:
        print(f"[process_file] Identified numeric sensor columns: {numeric_cols}")

    # Clean data: Remove rows with blank numeric values.
    mask = df[numeric_cols].apply(lambda col: col.astype(str).str.strip() == '').any(axis=1)
    if debug:
        print(f"[process_file] Rows with blank numeric values: {mask.sum()}")
    df = df[~mask]

    # Convert identified numeric columns to numeric type.
    for col in numeric_cols:
        try:
            df[col] = pd.to_numeric(df[col], errors='raise')
        except Exception as e:
            print(f"[process_file] Error converting column {col}: {e}")
            raise
    if debug:
        print(f"[process_file] Data shape after cleaning: {df.shape}")

    # Step 4: (Optional) Subset data; here we use the full dataset.
    print(f"[process_file] Data subset: {df.shape[0]} rows (full data used).")
    
    # (Optional) Compute overall min/max summary.
    min_max_df = pd.DataFrame({'min': df.min(), 'max': df.max()})
    if debug:
        print(f"[process_file] Overall min/max summary:\n{min_max_df}")
    else:
        print("Min/Max summary computed.")

    # Step 5: Create spike flags for ACC/GYRO columns.
    for col in numeric_cols:
        spike_flag_col = f'{col}_spike_flag'
        df[spike_flag_col] = ((df[col] > 1) | (df[col] < -0.5)).astype(int)
    print("Spike flags for ACC/GYRO created.")

    # Create spike flag for EMG (value > 1.0).
    emg_base = 'EMG 1 (mV)'
    emg_matches = [col for col in df.columns if col.startswith(emg_base)]
    if emg_matches:
        emg_col = emg_matches[0]
    else:
        raise KeyError(f"No column found starting with '{emg_base}'")
    emg_spike_flag_col = f'{emg_col}_spike_flag'
    df[emg_spike_flag_col] = (df[emg_col] > 1.0).astype(int)
    
    # Additional EMG flags.
    df['EMG_high_flag'] = (df[emg_col] > 1.0).astype(int)
    df['EMG_low_flag'] = (df[emg_col] < -0.5).astype(int)
    if debug:
        print(f"[process_file] EMG_high_flag, EMG_low_flag added. Count >1.0: {df['EMG_high_flag'].sum()}, "
              f"Count <-0.5: {df['EMG_low_flag'].sum()}")

    # Step 6: Compute fixed-window extreme flag for EMG.
    df['EMG_extreme_flag'] = compute_emg_extreme_flag_window(df, window_time=1.3, column=emg_col, debug=debug)
    if debug:
        print(f"[process_file] Fixed-window extreme flag count: {df['EMG_extreme_flag'].sum()}")

    # Step 7: Count unique extreme events in fixed window.
    unique_extreme_count = ((df['EMG_extreme_flag'] == 1) &
                            (df['EMG_extreme_flag'].shift(1).fillna(0) != 1)).sum()
    if debug:
        print(f"[process_file] Unique extreme events (fixed window): {unique_extreme_count}")

    # Step 8: Compute dynamic-window extreme flag for EMG.
    df['EMG_extreme_flag_dynamic'] = compute_emg_extreme_flag_dynamic_window(df, column=emg_col, debug=debug)
    if debug:
        print(f"[process_file] Dynamic-window extreme flag count: {df['EMG_extreme_flag_dynamic'].sum()}")
        unique_dynamic_extreme_count = ((df['EMG_extreme_flag_dynamic'] == 1) &
                                        (df['EMG_extreme_flag_dynamic'].shift(1).fillna(0) != 1)).sum()
        print(f"[process_file] Unique extreme events (dynamic window): {unique_dynamic_extreme_count}")

    # Step 9: Mark throwing motion based on fixed-window extreme flags.
    df = mark_throwing_motion(df, extreme_flag_col='EMG_extreme_flag', window_time=1.3, debug=debug)
    if debug:
        print(f"[process_file] ThrowingMotion rows count: {df['ThrowingMotion'].sum()}")

    print("File processing completed.\n")
    return df


########################################
# Main function: Process all files in a folder and output a single Parquet file
########################################
def main(debug=False, input_folder='./data/raw/', output_file='./data/processed/processed_pitch_data.parquet'):
    """
    Processes all CSV files in the specified folder, stacks them into one DataFrame,
    and writes the output to a Parquet file.
    
    Parameters:
      debug (bool): If True, prints detailed debug information.
      input_folder (str): Folder containing the CSV files.
      output_file (str): Path for the output Parquet file.
    
    Returns:
      pd.DataFrame: Final processed DataFrame.
    """
    # Ensure input folder exists.
    if not os.path.isdir(input_folder):
        raise FileNotFoundError(f"Input folder '{input_folder}' does not exist.")
    
    # Find all CSV files in the folder.
    csv_files = glob.glob(os.path.join(input_folder, '*.csv'))
    if not csv_files:
        raise FileNotFoundError("No CSV files found in the input folder.")
    
    if debug:
        print(f"[main] Found {len(csv_files)} CSV files in '{input_folder}'.")
    else:
        print(f"Found {len(csv_files)} CSV file(s).")

    processed_dfs = []
    for file in csv_files:
        df = process_file(file, debug=debug)
        # Optionally add a column to indicate source file.
        df['SourceFile'] = os.path.basename(file)
        processed_dfs.append(df)
    
    # Stack all DataFrames (row-wise).
    final_df = pd.concat(processed_dfs, ignore_index=True)
    if debug:
        print(f"[main] Final stacked DataFrame shape: {final_df.shape}")
    else:
        print("All files processed and stacked.")

    # Save final DataFrame to Parquet.
    output_dir = os.path.dirname(output_file)
    os.makedirs(output_dir, exist_ok=True)
    final_df.to_parquet(output_file, index=False)
    print(f"Final processed data saved to: {output_file}")
    
    return final_df


# Run the module when executed as a script.
if __name__ == "__main__":
    # Set debug=True for detailed output, or False for minimal output.
    processed_df = main(
        debug=True,
        input_folder='../../data/raw/three_sensored_emg_data/',         # Specify your input folder path here.
        output_file='../../data/processed/emg_pitch_data_processed.parquet'  # Specify your output file path here.
    )


[main] Found 1 CSV files in '../../data/raw/three_sensored_emg_data/'.

[process_file] Processing file: ../../data/raw/three_sensored_emg_data\2-14-25_bullpen_1.csv
[read_sensor_data_with_metadata] SensorGroups: ['FDS (81770)', 'FDS (81770)', 'FDS (81770)', 'FDS (81770)', 'FDS (81770)', 'FDS (81770)', 'FDS (81770)', 'FCU (81728)', 'FCU (81728)', 'FCU (81728)', 'FCU (81728)', 'FCU (81728)', 'FCU (81728)', 'FCU (81728)', 'FCR (81745)']
[read_sensor_data_with_metadata] SensorModes: ['sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 50', 'sensor mode: 40']
[read_sensor_data_with_metadata] New column names set (updated dataset, sensor group only).


  df = pd.read_csv(StringIO(data_str), header=None, names=new_col_names)


[read_sensor_data_with_metadata] Final DataFrame shape: (182675, 19)
[read_sensor_data_with_metadata] Final column names: ['EMG 1 (mV) - FDS (81770)', 'ACC X (G) - FDS (81770)', 'ACC Y (G) - FDS (81770)', 'ACC Z (G) - FDS (81770)', 'GYRO X (deg/s) - FDS (81770)', 'GYRO Y (deg/s) - FDS (81770)', 'GYRO Z (deg/s) - FDS (81770)', 'EMG 1 (mV) - FCU (81728)', 'ACC X (G) - FCU (81728)', 'ACC Y (G) - FCU (81728)', 'ACC Z (G) - FCU (81728)', 'GYRO X (deg/s) - FCU (81728)', 'GYRO Y (deg/s) - FCU (81728)', 'GYRO Z (deg/s) - FCU (81728)', 'EMG 1 (mV) - FCR (81745)', 'Application', 'Date/Time', 'Collection Length (seconds)', 'Timestamp']
[process_file] DataFrame shape after reading: (182675, 19)
[process_file] Descriptive Statistics:
       EMG 1 (mV) - FDS (81770)  EMG 1 (mV) - FCU (81728)  \
count             182675.000000             182675.000000   
mean                  -0.004528                 -0.009749   
min                   -3.431678                 -1.700481   
25%                   -0.