# Preprocessing for Curb Data

In [2]:
# Enable autoload for just updated files
%load_ext autoreload
%autoreload 2

In [3]:
import sys
import numpy as np
import random
sys.path.append('../../')   # Add parent directory to Python path
from utils.preprocessing import *
from utils.segmentation import *
from utils.visualization import *

# #P3 

In [3]:
#Missing value and combined data
#Load the ESP1 Data and handle missing values
df_one= pd.read_csv('../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_merged_ESP1.csv')  
output = '../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_merged_ESP1_filled_missing_values.csv'
#handle missing values
fill_missing_values_curb(df_one, output)
# Load the ESP2 Data and handle missing values
df_two= pd.read_csv('../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_merged_ESP2.csv')  
output = '../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_merged_ESP2_filled_missing_values.csv'
fill_missing_values_curb(df_two,output)
#check missing values if exist
print(df_two[['Acc-X', 'Acc-Y', 'Acc-Z']].isnull().sum())
# Combine activities from ESP1 and ESP2 with correct Annotation
output = '../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_combined.csv'
df_combined = combine_activities_curb(df_one,df_two,output)

Acc-X    0
Acc-Y    0
Acc-Z    0
dtype: int64


In [None]:
# Downsampling:
df = pd.read_csv('../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_combined.csv')
df_selected = df[['NTP', 'Acc-X', 'Acc-Y', 'Acc-Z', 'curb_scene' ]].copy()
categorical_attributes = ['curb_scene']
df_100hz = downsample_to_frequency(
    df_selected,
    target_hz=100,
    timestamp_col='NTP',
    output_path='../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_combined_100hz.csv',
    categorical_attributes=categorical_attributes
)
df_30hz = downsample_to_frequency(
    df_selected,
    target_hz=30,
    timestamp_col='NTP',
    output_path='../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_combined_30hz.csv',
    categorical_attributes=categorical_attributes
)

curb_scene
0.0    383229
1.0     10853
Name: count, dtype: int64

## 100HZ, curb scene 0 and 1, window size 0.5s, 50% overlapping

In [None]:
df_combined_100hz = pd.read_csv('../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_combined_100hz.csv')
df_combined_100hz_0 = df_combined_100hz[df_combined_100hz['curb_scene'] == 0]
df_combined_100hz_1 = df_combined_100hz[df_combined_100hz['curb_scene'] == 1]
#Segmentation into 50% Overlapping
segments_0 = segment_acceleration_data_overlapping_numpy(
    df_combined_100hz_0,
    window_size=50,   # 0.5s at 100Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
segments_1 = segment_acceleration_data_overlapping_numpy(
    df_combined_100hz_1,
    window_size=50,   # 0.5s at 100Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
#select 0 elements because normal data too many
num_segments = segments_0.shape[0]
indices = np.random.choice(num_segments, size=segments_1.shape[0], replace=False)
selected_segments = segments_0[indices]
# Save all arrays (old and new) to the same file with different keys
np.savez(
    '../../data/Curb/P3/handlebar/Accelerometer/segments_100hz_0.5s_50overlap.npz',
    segments_1 = segments_1,
    segments_0=selected_segments
)

## 30HZ, curb scene 0 and 1, window size 0.5s, 50% overlapping

In [None]:
df_combined_30hz = pd.read_csv('../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_combined_30hz.csv')
df_combined_30hz_0 = df_combined_30hz[df_combined_30hz['curb_scene'] == 0]
df_combined_30hz_1 = df_combined_30hz[df_combined_30hz['curb_scene'] == 1]
#Segmentation into 50% Overlapping
segments_0 = segment_acceleration_data_overlapping_numpy(
    df_combined_30hz_0,
    window_size=15,   # 0.5s at 30Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
segments_1 = segment_acceleration_data_overlapping_numpy(
    df_combined_30hz_1,
    window_size=15,   # 0.5s at 30Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
#select 0 elements because normal data too many
num_segments = segments_0.shape[0]
indices = np.random.choice(num_segments, size=segments_1.shape[0], replace=False)
selected_segments = segments_0[indices]
# Save all arrays (old and new) to the same file with different keys
np.savez(
    '../../data/Curb/P3/handlebar/Accelerometer/segments_30hz_0.5s_50overlap.npz',
    segments_1 = segments_1,
    segments_0=selected_segments
)

## 100HZ, curb scene 0 and 1, window size 1s, 50% overlapping

In [None]:
df_combined_100hz = pd.read_csv('../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_combined_100hz.csv')
df_combined_100hz_0 = df_combined_100hz[df_combined_100hz['curb_scene'] == 0]
df_combined_100hz_1 = df_combined_100hz[df_combined_100hz['curb_scene'] == 1]
#Segmentation into 50% Overlapping
segments_0 = segment_acceleration_data_overlapping_numpy(
    df_combined_100hz_0,
    window_size=100,   # 1s at 100Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
segments_1 = segment_acceleration_data_overlapping_numpy(
    df_combined_100hz_1,
    window_size=100,   # 1s at 100Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
#select 0 elements because normal data too many
num_segments = segments_0.shape[0]
indices = np.random.choice(num_segments, size=segments_1.shape[0], replace=False)
selected_segments = segments_0[indices]
# Save all arrays (old and new) to the same file with different keys
np.savez(
    '../../data/Curb/P3/handlebar/Accelerometer/segments_100hz_1s_50overlap.npz',
    segments_1 = segments_1,
    segments_0=selected_segments
)

## 30HZ, curb scene 0 and 1, window size 1s, 50% overlapping

In [5]:
df_combined_30hz = pd.read_csv('../../data/Curb/P3/handlebar/Accelerometer/Accelerometer_data_combined_30hz.csv')
df_combined_30hz_0 = df_combined_30hz[df_combined_30hz['curb_scene'] == 0]
df_combined_30hz_1 = df_combined_30hz[df_combined_30hz['curb_scene'] == 1]
#Segmentation into 50% Overlapping
segments_0 = segment_acceleration_data_overlapping_numpy(
    df_combined_30hz_0,
    window_size=30,   # 1s at 30Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
segments_1 = segment_acceleration_data_overlapping_numpy(
    df_combined_30hz_1,
    window_size=30,   # 1s at 30Hz
    overlap=50,       # 50% overlap
    channels=['Acc-X', 'Acc-Y', 'Acc-Z'],
)
#select 0 elements because normal data too many
num_segments = segments_0.shape[0]
indices = np.random.choice(num_segments, size=segments_1.shape[0], replace=False)
selected_segments = segments_0[indices]
# Save all arrays (old and new) to the same file with different keys
np.savez(
    '../../data/Curb/P3/handlebar/Accelerometer/segments_30hz_1s_50overlap.npz',
    segments_1 = segments_1,
    segments_0=selected_segments
)