In [28]:
import pandas as pd
import numpy as np



In [None]:
## Extract Table from CSV

# Parameters
file_path = 'data/SampleRoadAcceleration.csv'
table_marker = 'Line'
target_header_num = 3 # Should be 1 or more
total_headers_num = 4
limit_lines_search = 300 

# Loop
flag_marker = False
with open(file_path) as f:
    for i, line in enumerate(f):
        if (table_marker in line) and not flag_marker:
            # Assignation
            first_header_row = i
            target_header_row = i + target_header_num - 1
            table_row = i + total_headers_num
            flag_marker = True

        if i == target_header_row:
            # Header row extraction
            columns = [col.strip() for col in line.split(',')]
            break

        if i > limit_lines_search:
            raise ValueError(f"Marker '{table_marker}' not found within the first {limit_lines_search} lines of the file.")

# DataFrame
df = pd.read_csv(file_path, skiprows=table_row, header=None, names=columns)

In [107]:
# Efficient, vectorized payload extraction for up to 64 bytes

# Parameters
extended_payload_cols = [f'B{i}' for i in range(1, 65)]
current_payload_cols = [f'B{i}' for i in range(1, 9)]

# Identify rows with long payloads (space in B1)
long_payload_mask = df['B1'].astype(str).str.contains(' ')

# Split long payloads into bytes (vectorized) and rename columns to B1, B2, ..., B64
long_payload_bytes = (
    df.loc[long_payload_mask, 'B1']
    .str.strip()
    .str.split(' ', expand=True)
)
long_payload_bytes.columns = extended_payload_cols[:long_payload_bytes.shape[1]]

# For short payloads, stack B1-B8 columns as strings (vectorized)
short_payload_bytes = (
    df.loc[~long_payload_mask, current_payload_cols]
)

# Pad both to 64 columns
def pad_to_64(df_bytes):
    cols = [f'B{i}' for i in range(1, 65)]
    df_bytes = df_bytes.reindex(columns=cols, fill_value=np.nan)
    return df_bytes

long_payload_bytes = pad_to_64(long_payload_bytes)
short_payload_bytes = pad_to_64(short_payload_bytes)

# Combine back into one DataFrame, preserving original order
payload_df = pd.concat([long_payload_bytes, short_payload_bytes]).sort_index()

# Reorganize columns to match the original payload structure
first_payload_idx = df.columns.get_loc(current_payload_cols[0])
new_col_order = (
    list(df.columns[:first_payload_idx]) +
    extended_payload_cols +
    list(df.columns[first_payload_idx + len(current_payload_cols):])
)
# Create the extended dataframe
df_extended = pd.concat([df.drop(columns=current_payload_cols), payload_df], axis=1)
df_extended = df_extended[new_col_order]



In [142]:
# Keep only columns of interest, prepare for export
target_columns = ['Abs Time(Sec)', 'Network', 'Arb ID'] + extended_payload_cols
# Rename columns dict
rename_dict = {'Abs Time(Sec)': 'abs_time',
               'Network': 'network',
               'Arb ID': 'arb_id'
               }

# Create a list of numeric column names for payload bytes (as strings)
numeric_payload_cols = [str(i) for i in range(1, len(extended_payload_cols) + 1)]
rename_dict.update(
    dict(zip(extended_payload_cols, numeric_payload_cols))
)


# Create df_hex with renamed columns
df_hex = df_extended[target_columns].rename(columns=rename_dict)


# Add a 'length' column before 'arb_id' indicating the number of non-NaN payload bytes for each row
df_hex.insert(df_hex.columns.get_loc('arb_id'), 'length', df_hex[numeric_payload_cols].notna().sum(axis=1).astype('uint8'))

In [143]:
# Conversion to int: All payload columns from hex string to integer. Preserve NaN, type of columns should be uint8
df_int = df_hex.copy()
df_int[numeric_payload_cols] = (
    df_hex[numeric_payload_cols]
    .map(lambda x: int(x, 16) if pd.notna(x) else np.nan)
    .astype('UInt8')
)



In [151]:
## Export

# Parameters
export_file_path = 'data/SampleRoadAcceleration_All.csv'

df_export = df_int.copy()
df_export.to_csv(export_file_path, index=False)