# Preprocessing for Curb Data

In [3]:
# Enable autoload for just updated files
%load_ext autoreload
%autoreload 2

In [4]:
import sys
import numpy as np
sys.path.append('../../')   # Add parent directory to Python path
from utils.preprocessing import *
from utils.segmentation import *
from utils.visualization import *

# #P1 Asphalt Road E1-E3

In [44]:
#Missing value and combined data
#Load the E1 Data and handle missing values
df_one= pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E1/HandleBar/Accelerometer/Accelerometer.0.csv')  
print(df_one.isnull().sum())
print(df_one.shape)
df_one['NTP'] = pd.to_datetime(df_one['NTP'])
#delete date before 16:11:01
start = pd.to_datetime("2024-11-25 16:11:01.5")
df_filtered = df_one[df_one['NTP'] >= start].copy()
df_filtered.shape
#save to csv file
df_filtered.to_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E1/HandleBar/Accelerometer/Accelerometer_filtered.csv', index=False)


Date         0
NTP          0
GNSS-Time    0
Acc-X        0
Acc-Y        0
Acc-Z        0
dtype: int64
(10144, 6)


In [None]:
#Load the E2 Data and handle missing values
df_two= pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E2/HandleBar/Accelerometer/Accelerometer.0.csv')  
print(df_two.isnull().sum())
print(df_two.shape)
df_two['NTP'] = pd.to_datetime(df_two['NTP'])
#delete date before 16:12:35
start = pd.to_datetime("2024-11-25 16:12:35.5")
df_two_filtered = df_two[df_two['NTP'] >= start].copy()
print(df_two_filtered.shape)
#save to csv file
df_two_filtered.to_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E2/HandleBar/Accelerometer/Accelerometer_filtered.csv', index=False)

Date         0
NTP          0
GNSS-Time    0
Acc-X        0
Acc-Y        0
Acc-Z        0
dtype: int64
(9519, 6)


## Load the E3 Data and handle missing values

In [None]:
df_three= pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E3/HandleBar/Accelerometer/Accelerometer.0.csv')  
print(df_three.isnull().sum())
print(df_three.shape)
df_three['NTP'] = pd.to_datetime(df_three['NTP'])
#delete date before 16:14:14
start = pd.to_datetime("2024-11-25 16:14:14")
df_three_filtered = df_three[df_three['NTP'] >= start].copy()
print(df_three_filtered.shape)
#save to csv file
df_three_filtered.to_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E3/HandleBar/Accelerometer/Accelerometer_filtered.csv', index=False)

Date         0
NTP          0
GNSS-Time    0
Acc-X        0
Acc-Y        0
Acc-Z        0
dtype: int64
(10707, 6)


#Combine

In [None]:
df_filtered_one = pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E1/HandleBar/Accelerometer/Accelerometer_filtered.csv')
df_filtered_two = pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E2/HandleBar/Accelerometer/Accelerometer_filtered.csv')
df_filtered_three = pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/E3/HandleBar/Accelerometer/Accelerometer_filtered.csv')
df_filtered_combined = pd.concat([df_filtered_one, df_filtered_two, df_filtered_three], ignore_index=True)
df_filtered_combined.to_csv('../../data/RoadRoughness/Raw/Asphalt/P1/Accelerometer_filtered_combined.csv', index=False)
print(df_filtered_combined.shape)

In [30]:
plot_accelerometer_data(df_filtered_combined,None)

# Downsampling

In [None]:
df = pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/Accelerometer_filtered_combined.csv')
df_selected = df[['NTP', 'Acc-X', 'Acc-Y', 'Acc-Z']].copy()
df_100hz = downsample_to_frequency(
    df_selected,
    target_hz=100,
    timestamp_col='NTP',
    output_path='../../data/RoadRoughness/Raw/Asphalt/P1/Accelerometer_filtered_combined_100hz.csv',
    categorical_attributes=None
)
df_30hz = downsample_to_frequency(
    df_selected,
    target_hz=30,
    timestamp_col='NTP',
    output_path='../../data/RoadRoughness/Raw/Asphalt/P1/Accelerometer_filtered_combined_30hz.csv',
    categorical_attributes=None
)

# plot the combined data

In [None]:
name = 'P3 Handlebar Accelerometer Data of 100Hz'
plot_accelerometer_data(df_100hz, name)
name = 'P3 Handlebar Accelerometer Data of 30Hz'
plot_accelerometer_data(df_30hz, name)

# 100HZ, 0.5s window size, 50% overlapping

In [None]:
df_combined_100hz = pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/Accelerometer_filtered_combined_100hz.csv')
#Segmentation into 50% Overlapping
segments = segment_acceleration_data_overlapping_numpy(
    df_combined_100hz,
    window_size=50,   # 0.5s at 100Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
print(segments.shape)
np.savez(
    '../../data/RoadRoughness/Raw/Asphalt/P1/segments_100hz_0.5s_50overlap.npz',
    segments = segments
)

# 30HZ, 0.5s window size, 50% overlapping

In [None]:
df_combined_30hz = pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/Accelerometer_filtered_combined_30hz.csv')
#Segmentation into 50% Overlapping
segments = segment_acceleration_data_overlapping_numpy(
    df_combined_30hz,
    window_size=15,   # 0.5s at 30Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
print(segments.shape)
np.savez(
    '../../data/RoadRoughness/Raw/Asphalt/P1/segments_30hz_0.5s_50overlap.npz',
    segments = segments
)

(953, 15, 3)


# 100HZ, 1s window size, 50% overlapping

In [5]:
df_combined_100hz = pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/Accelerometer_filtered_combined_100hz.csv')
#Segmentation into 50% Overlapping
segments = segment_acceleration_data_overlapping_numpy(
    df_combined_100hz,
    window_size=100,   # 1s at 100Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
print(segments.shape)
np.savez(
    '../../data/RoadRoughness/Raw/Asphalt/P1/segments_100hz_1s_50overlap.npz',
    segments = segments
)

(440, 100, 3)


# 30HZ, 1s window size, 50% overlapping

In [7]:
df_combined_30hz = pd.read_csv('../../data/RoadRoughness/Raw/Asphalt/P1/Accelerometer_filtered_combined_30hz.csv')
#Segmentation into 50% Overlapping
segments = segment_acceleration_data_overlapping_numpy(
    df_combined_30hz,
    window_size=30,   # 1s at 30Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
print(segments.shape)
np.savez(
    '../../data/RoadRoughness/Raw/Asphalt/P1/segments_30hz_1s_50overlap.npz',
    segments = segments
)

(444, 30, 3)
