## Combine all datasets and Train Test Spilt

In [19]:
%load_ext autoreload
%autoreload 2

In [20]:
import sys
import numpy as np
sys.path.append('../../')   # Add parent directory to Python path
from utils.preprocessing import *
from utils.segmentation import *
from utils.visualization import *
np.random.seed(42)  # For reproducibility

In [21]:
def select_random_samples(data, n=190):
    indices = np.random.choice(data.shape[0], n, replace=False)
    return data[indices]

## 1. Combine all datasets

In [None]:

data = np.load('../../data/Curb/P3/handlebar/Accelerometer/segments_100hz_0.5s_50overlap.npz')
data_curb_0 = data['segments_0']
data_curb_1 = data['segments_1']
data= np.load('../../data/RoadRoughness/Raw/Asphalt/P1/segments_100hz_0.5s_50overlap.npz')
data_asphalt= data['segments']
data_asphalt = select_random_samples(data_asphalt, 190)
data= np.load('../../data/RoadRoughness/Raw/Cobblestone/P1/segments_100hz_0.5s_50overlap.npz')
data_cobblestone= data['segments']
data_cobblestone = select_random_samples(data_cobblestone, 190)
data= np.load('../../data/RoadRoughness/Raw/CompactGravel/P1/segments_100hz_0.5s_50overlap.npz')
data_compact_gravel= data['segments']
data_compact_gravel = select_random_samples(data_compact_gravel, 190)
data= np.load('../../data/RoadRoughness/Raw/Dirt/P1/segments_100hz_0.5s_50overlap.npz')
data_Dirt= data['segments']
data_Dirt = select_random_samples(data_Dirt, 190)
data= np.load('../../data/RoadRoughness/Raw/PavingStone/P1/segments_100hz_0.5s_50overlap.npz')
data_PavingStone= data['segments']
data_PavingStone = select_random_samples(data_PavingStone, 190)



In [24]:
# Add labels
# Assign labels for each surface type
datasets = [
    (data_curb_0, "curb_0"),
    (data_curb_1, "curb_1"),
    (data_asphalt, "asphalt"),
    (data_cobblestone, "cobblestone"),
    (data_compact_gravel, "compact_gravel"),
    (data_Dirt, "dirt"),
    (data_PavingStone, "paving_stone")
]

# Combine all into a single list of (sensor_values, label)
combined_dataset = []
for data, label in datasets:
    for segment in data:
        combined_dataset.append((segment, label))

In [None]:
import pickle
with open('../../data/TrainTest/combined_dataset.pkl', 'wb') as f:
    pickle.dump(combined_dataset, f)

In [25]:
# Show number of samples
print("Number of samples:", len(combined_dataset))
# Show shape of sensor values for the first sample
print("Shape of first sample's sensor values:", combined_dataset[0][0].shape)
# # Show first 3 samples (sensor values and labels)
# for i in range(3):
#     print(combined_dataset[i][0], combined_dataset[i][1])

# Count samples for each label
from collections import Counter
label_counts = Counter(label for _, label in combined_dataset)
print("Number of samples per label:")
for label, count in label_counts.items():
    print(f"{label}: {count}")

Number of samples: 1326
Shape of first sample's sensor values: (50, 3)
Number of samples per label:
curb_0: 188
curb_1: 188
asphalt: 190
cobblestone: 190
compact_gravel: 190
dirt: 190
paving_stone: 190


## 2. Train Test Spilt


In [None]:
from sklearn.model_selection import train_test_split
# 80% train, 20% test
train_set, test_set = train_test_split(combined_dataset, test_size=0.2, random_state=42, shuffle=True, stratify=[label for data, label in combined_dataset])
print(len(train_set))
print(len(test_set))

1060
266


In [29]:
train_set[0]

(array([[-2.94318687,  4.45824211,  5.85580769],
        [-2.94290638,  4.458214  ,  5.85604172],
        [-2.94262588,  4.45818589,  5.85627574],
        [-2.94234539,  4.45815778,  5.85650977],
        [-2.9420649 ,  4.45812967,  5.85674379],
        [-2.94178441,  4.45810157,  5.85697782],
        [-2.94150391,  4.45807346,  5.85721184],
        [-2.94122342,  4.45804535,  5.85744586],
        [-2.94094293,  4.45801724,  5.85767989],
        [-2.94066243,  4.45798913,  5.85791391],
        [-2.94038194,  4.45796102,  5.85814794],
        [-2.94010145,  4.45793292,  5.85838196],
        [-2.93982095,  4.45790481,  5.85861599],
        [-2.93954046,  4.4578767 ,  5.85885001],
        [-2.93925997,  4.45784859,  5.85908403],
        [-2.93897947,  4.45782048,  5.85931806],
        [-2.93869898,  4.45779237,  5.85955208],
        [-2.93841849,  4.45776426,  5.85978611],
        [-2.93813799,  4.45773616,  5.86002013],
        [-2.9378575 ,  4.45770805,  5.86025416],
        [-2.93757701