In [1]:
import pandas as pd

file_path = r"/mnt/c/Users/sebas/Downloads/radar-parsingfiles/radar_full.csv"
data = pd.read_csv(file_path).convert_dtypes()


In [2]:
data.head()

Unnamed: 0,time,addr,bus,data
0,301.72,0x191,2,0xAEFF2E104400D69F
1,301.72,0x240,1,0x4843BF43857DD8FF
2,301.72,0x80,1,0x12E43C062E006D26
3,301.72,0x180,1,0xCFC0000000
4,301.72,0x260,130,0x00FFD6FF2EFE90FA


In [3]:
# Function to parse 'data' into individual bytes with error handling
def parse_data(row):
    try:
        # Ensure the row length matches 18 characters ('0x' + 16 hex characters)
        if len(row) == 18 and row.startswith("0x"):
            return [int(row[i : i + 2], 16) for i in range(2, len(row), 2)]
        else:
            return None  # Skip invalid rows
    except Exception:
        return None  # Handle any unexpected errors gracefully


# Parse the data field and filter out invalid rows
data["parsed_bytes"] = data["data"].apply(parse_data)
data = data[
    data["parsed_bytes"].notnull()
]  # Remove rows with invalid or malformed data

# Split parsed bytes into separate columns
byte_columns = [
    "byte_0",
    "byte_1",
    "byte_2",
    "byte_3",
    "byte_4",
    "byte_5",
    "byte_6",
    "byte_7",
]
data[byte_columns] = pd.DataFrame(data["parsed_bytes"].tolist(), index=data.index)

# Decode signals based on bit structure
data["ID"] = data["addr"]  # Assuming 'addr' is the ID
data["LONG_DIST"] = (
    data["byte_2"].astype(int) * 256 + data["byte_3"].astype(int)
) * 0.03
data["LAT_DIST"] = (
    (data["byte_4"].astype(int) * 256 + data["byte_5"].astype(int)) - 32768
) * 0.018
data["SPEED"] = (
    data["byte_6"].astype(int) * 256 + data["byte_7"].astype(int)
) * 0.06944444444
data["LAT_SPEED"] = (
    (data["byte_0"].astype(int) * 256 + data["byte_1"].astype(int)) - 32768
) * 0.1
data["RCS"] = data["byte_7"].astype(int)  # Assuming RCS is directly in byte_7

# Display the processed DataFrame
print(data[["ID", "LONG_DIST", "LAT_DIST", "SPEED", "LAT_SPEED", "RCS"]].head())

      ID  LONG_DIST  LAT_DIST        SPEED  LAT_SPEED  RCS
0  0x191     353.76  -276.480  3815.486111     1203.1  159
1  0x240    1468.89    25.290  3857.708333    -1426.9  255
2   0x80     460.98  -377.856  1940.416667    -2793.2   38
4  0x260    1651.17  -373.284  2577.361111    -3251.3  250
5  0x260    1651.17  -373.284  2577.361111    -3251.3  250


In [4]:
data.head()

Unnamed: 0,time,addr,bus,data,parsed_bytes,byte_0,byte_1,byte_2,byte_3,byte_4,byte_5,byte_6,byte_7,ID,LONG_DIST,LAT_DIST,SPEED,LAT_SPEED,RCS
0,301.72,0x191,2,0xAEFF2E104400D69F,"[174, 255, 46, 16, 68, 0, 214, 159]",174,255,46,16,68,0,214,159,0x191,353.76,-276.48,3815.486111,1203.1,159
1,301.72,0x240,1,0x4843BF43857DD8FF,"[72, 67, 191, 67, 133, 125, 216, 255]",72,67,191,67,133,125,216,255,0x240,1468.89,25.29,3857.708333,-1426.9,255
2,301.72,0x80,1,0x12E43C062E006D26,"[18, 228, 60, 6, 46, 0, 109, 38]",18,228,60,6,46,0,109,38,0x80,460.98,-377.856,1940.416667,-2793.2,38
4,301.72,0x260,130,0x00FFD6FF2EFE90FA,"[0, 255, 214, 255, 46, 254, 144, 250]",0,255,214,255,46,254,144,250,0x260,1651.17,-373.284,2577.361111,-3251.3,250
5,301.72,0x260,0,0x00FFD6FF2EFE90FA,"[0, 255, 214, 255, 46, 254, 144, 250]",0,255,214,255,46,254,144,250,0x260,1651.17,-373.284,2577.361111,-3251.3,250


In [5]:
# Plot addr 0x191 over 'time' LONG_DIST using seaborn, filtered between 331-333s


In [6]:
# Time events
# 300-306s: Car on left (coming towards)
# 315-220s: changing to left lane (getting away from right)
# 321-340s: At intersections (cars moving left to right)
# 341-345s: Further awayy cars moving from right to left

time_events = {
    "car_on_left": (300, 306),
    "changing_to_left_lane": (315, 320),
    "at_intersections_close_l_to_r": (321, 340),
    "at_intersections_far_r_to_l": (341, 345),
}

# Create labels in the data for these time ranges, filter out rest
df = data.copy()
for event, (start, end) in time_events.items():
    df.loc[(df["time"] >= start) & (df["time"] <= end), "event"] = event

# Remove  before 300 and after 345
df = df[df["time"] >= 300]
df = df[df["time"] <= 345]

df.head()

Unnamed: 0,time,addr,bus,data,parsed_bytes,byte_0,byte_1,byte_2,byte_3,byte_4,byte_5,byte_6,byte_7,ID,LONG_DIST,LAT_DIST,SPEED,LAT_SPEED,RCS,event
0,301.72,0x191,2,0xAEFF2E104400D69F,"[174, 255, 46, 16, 68, 0, 214, 159]",174,255,46,16,68,0,214,159,0x191,353.76,-276.48,3815.486111,1203.1,159,car_on_left
1,301.72,0x240,1,0x4843BF43857DD8FF,"[72, 67, 191, 67, 133, 125, 216, 255]",72,67,191,67,133,125,216,255,0x240,1468.89,25.29,3857.708333,-1426.9,255,car_on_left
2,301.72,0x80,1,0x12E43C062E006D26,"[18, 228, 60, 6, 46, 0, 109, 38]",18,228,60,6,46,0,109,38,0x80,460.98,-377.856,1940.416667,-2793.2,38,car_on_left
4,301.72,0x260,130,0x00FFD6FF2EFE90FA,"[0, 255, 214, 255, 46, 254, 144, 250]",0,255,214,255,46,254,144,250,0x260,1651.17,-373.284,2577.361111,-3251.3,250,car_on_left
5,301.72,0x260,0,0x00FFD6FF2EFE90FA,"[0, 255, 214, 255, 46, 254, 144, 250]",0,255,214,255,46,254,144,250,0x260,1651.17,-373.284,2577.361111,-3251.3,250,car_on_left


In [7]:
print(f"shape1: {df.shape}")

# Filter out addr's where all bytes are stable (don't change over time)
stable_addrs = df.groupby("addr", group_keys=False).apply(
    lambda x: all(x[f"byte_{i}"].nunique() == 1 for i in range(8)), include_groups=False
)
df = df[~df["addr"].isin(stable_addrs[stable_addrs].index)]
print(f"shape2: {df.shape}")


shape1: (200498, 20)
shape2: (180798, 20)


In [8]:
# Filter out rows where the number of 0's in parsed_bytes is greater than 5
df = df[
    df["parsed_bytes"].notnull() & (df["parsed_bytes"].apply(lambda x: x.count(0) <= 5))
]


In [9]:
df = df.reset_index(drop=True)

In [10]:
df.head()
df.to_parquet('preprocessed_full.parquet')

In [11]:
# Filter on time between 331-334s
df = df[(df["time"] >= 331) & (df["time"] <= 334)]
df.to_parquet('preprocessed_sample.parquet')

In [None]:
# import matplotlib.pyplot as plt
# import pandas as pd
# import seaborn as sns
# from matplotlib.backends.backend_pdf import PdfPages


# def plot_radar_variables_with_events(df, output_filename="radar_plots_with_events.pdf"):
#     """
#     Create plots for radar variables, with each address in a separate figure and distinct colors.
#     Highlight time periods based on the 'event' column.

#     Args:
#         df: DataFrame containing radar data
#         output_filename: Name of the output PDF file
#     """
#     # Variables to plot with assigned colors
#     variables_colors = {
#         "LONG_DIST": "#FF6B6B",  # Coral Red
#         "LAT_DIST": "#4ECDC4",  # Turquoise
#         "SPEED": "#45B7D1",  # Sky Blue
#         "LAT_SPEED": "#96CEB4",  # Sage Green
#         "RCS": "#FFBE0B",  # Golden Yellow
#     }

#     # Define colors for events
#     event_colors = {
#         "car_on_left": "#FFDDC1",  # Light Coral
#         "changing_to_left_lane": "#C1FFD7",  # Light Green
#         "at_intersections_close_l_to_r": "#C1D4FF",  # Light Blue
#         "at_intersections_far_r_to_l": "#FFC1E0",  # Light Pink
#     }

#     # Create separate DataFrames for numeric variables
#     df_numeric = df.melt(
#         id_vars=["time", "addr", "event"],
#         value_vars=list(variables_colors.keys()),
#         var_name="Variable",
#         value_name="Value",
#     )

#     # Ensure all values are numeric
#     df_numeric["Value"] = pd.to_numeric(df_numeric["Value"], errors="coerce")

#     # Remove any rows with NaN values
#     df_numeric = df_numeric.dropna()

#     # Initialize PDF
#     with PdfPages(output_filename) as pdf:
#         # Plot for each unique address
#         for addr in df["addr"].unique():
#             # Filter data for the current address
#             subset = df_numeric[df_numeric["addr"] == addr]
#             event_subset = df[df["addr"] == addr]

#             if len(subset) > 0:  # Only create plot if there's data
#                 # Create figure with subplots for each variable
#                 fig, axes = plt.subplots(
#                     len(variables_colors), 1, figsize=(12, 3 * len(variables_colors))
#                 )
#                 fig.suptitle(f"Address: {addr}", y=1.02, fontsize=16)

#                 # Plot each variable separately
#                 for idx, (var, color) in enumerate(variables_colors.items()):
#                     var_data = subset[subset["Variable"] == var]
#                     current_ax = axes[idx] if len(variables_colors) > 1 else axes

#                     # Highlight time intervals for events
#                     for event, event_color in event_colors.items():
#                         event_times = event_subset[event_subset["event"] == event]
#                         for _, row in event_times.iterrows():
#                             start_time = row["time"]
#                             end_time = row["time"]
#                             current_ax.axvspan(
#                                 start_time,
#                                 end_time,
#                                 color=event_color,
#                                 alpha=0.3,
#                                 label=event
#                                 if idx == 0
#                                 else "",  # Avoid duplicate legends
#                             )

#                     # Scatter plot
#                     sns.scatterplot(
#                         data=var_data,
#                         x="time",
#                         y="Value",
#                         ax=current_ax,
#                         color=color,
#                         alpha=0.6,
#                         label=var,
#                         s=50,  # Point size
#                     )

#                     # Line plot
#                     sns.lineplot(
#                         data=var_data,
#                         x="time",
#                         y="Value",
#                         ax=current_ax,
#                         color=color,
#                         alpha=0.3,
#                         linewidth=2,
#                     )

#                     # Set labels and style
#                     current_ax.set_title(f"{var}", color=color, fontsize=12, pad=10)
#                     current_ax.set_xlabel("Time (s)", fontsize=10)
#                     current_ax.set_ylabel(var, color=color, fontsize=10)

#                     # Add grid with custom style
#                     current_ax.grid(True, linestyle="--", alpha=0.3)

#                     # Style the spines
#                     for spine in current_ax.spines.values():
#                         spine.set_edgecolor(color)
#                         spine.set_linewidth(2)

#                     # Style the tick parameters
#                     current_ax.tick_params(colors="gray")

#                     # Set background color
#                     current_ax.set_facecolor("#f8f9fa")

#                 # Add legend for events
#                 handles, labels = current_ax.get_legend_handles_labels()
#                 current_ax.legend(
#                     handles=handles,
#                     labels=labels + list(event_colors.keys()),
#                     bbox_to_anchor=(1.05, 1),
#                     loc="upper left",
#                 )

#                 # Adjust layout
#                 plt.tight_layout()

#                 # Save to PDF with high DPI
#                 pdf.savefig(fig, bbox_inches="tight", dpi=300)
#                 plt.close()

#     print(f"PDF saved as {output_filename}")


# # Usage
# plot_radar_variables_with_events(df, "radar_analysis_with_events_colored.pdf")
