## Combine all datasets and Train Test Spilt

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import numpy as np
import pickle

sys.path.append('../../')   # Add parent directory to Python path
from utils.preprocessing import *
from utils.segmentation import *
from utils.visualization import *
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

np.random.seed(42)  # For reproducibility


In [7]:
def select_random_samples(data, n=1):
    indices = np.random.choice(data.shape[0], n, replace=False)
    return data[indices]

## 1. Combine all datasets

### 1.1 Curb

In [None]:
#P3
data = np.load('../../data/Curb/P3/handlebar/Accelerometer/segments_100hz_0.5s_50overlap.npz')
curb_p3_0 = data['segments_0']
curb_p3_1 = data['segments_1']
# P6
data = np.load('../../data/Curb/P6/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene0_segments.npz')
curb_p6_0 = data['segments']
data = np.load('../../data/Curb/P6/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene1_segments.npz')
curb_p6_1 = data['segments']
# P11
data = np.load('../../data/Curb/P11/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene0_segments.npz')
curb_p11_0 = data['segments']
data = np.load('../../data/Curb/P11/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene1_segments.npz')
curb_p11_1 = data['segments']
# P12
data = np.load('../../data/Curb/P12/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene0_segments.npz')
curb_p12_0 = data['segments']
data = np.load('../../data/Curb/P12/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene1_segments.npz')
curb_p12_1 = data['segments']
# P18
data = np.load('../../data/Curb/P18/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene0_segments.npz')
curb_p18_0 = data['segments']
data = np.load('../../data/Curb/P18/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene1_segments.npz')
curb_p18_1 = data['segments']
# P21
data = np.load('../../data/Curb/P21/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene0_segments.npz')
curb_p21_0 = data['segments']
data = np.load('../../data/Curb/P21/handlebar/Accelerometer/Accelerometer_data_combined_100hz_scene1_segments.npz')
curb_p21_1 = data['segments']

In [8]:
# After loading all the curb datasets
# 1. First get the count for each participant's scene 1 data
p6_count = curb_p6_1.shape[0]
p11_count = curb_p11_1.shape[0]
p12_count = curb_p12_1.shape[0]
p18_count = curb_p18_1.shape[0]
p21_count = curb_p21_1.shape[0]

# 2. Randomly select the same number of samples from scene 0 data
curb_p6_0 = select_random_samples(curb_p6_0, p6_count)
curb_p11_0 = select_random_samples(curb_p11_0, p11_count)
curb_p12_0 = select_random_samples(curb_p12_0, p12_count)
curb_p18_0 = select_random_samples(curb_p18_0, p18_count)
curb_p21_0 = select_random_samples(curb_p21_0, p21_count)

# 3. Print shape to verify
print("After balancing:")
print("Curb P3 (scene 0):", curb_p3_0.shape)
print("Curb P3 (scene 1):", curb_p3_1.shape)
print("Curb P6 (scene 0):", curb_p6_0.shape)
print("Curb P6 (scene 1):", curb_p6_1.shape)
print("Curb P11 (scene 0):", curb_p11_0.shape)
print("Curb P11 (scene 1):", curb_p11_1.shape)
print("Curb P12 (scene 0):", curb_p12_0.shape)
print("Curb P12 (scene 1):", curb_p12_1.shape)
print("Curb P18 (scene 0):", curb_p18_0.shape)
print("Curb P18 (scene 1):", curb_p18_1.shape)
print("Curb P21 (scene 0):", curb_p21_0.shape)
print("Curb P21 (scene 1):", curb_p21_1.shape)

# 4. Combine all curb data
data_curb_0 = np.concatenate([curb_p3_0, curb_p6_0, curb_p11_0, curb_p12_0, curb_p18_0, curb_p21_0])
data_curb_1 = np.concatenate([curb_p3_1, curb_p6_1, curb_p11_1, curb_p12_1, curb_p18_1, curb_p21_1])

print(f"Total curb scene 0 samples: {data_curb_0.shape[0]}")
print(f"Total curb scene 1 samples: {data_curb_1.shape[0]}")

After balancing:
Curb P3 (scene 0): (188, 50, 3)
Curb P3 (scene 1): (188, 50, 3)
Curb P6 (scene 0): (167, 50, 3)
Curb P6 (scene 1): (167, 50, 3)
Curb P11 (scene 0): (250, 50, 3)
Curb P11 (scene 1): (250, 50, 3)
Curb P12 (scene 0): (193, 50, 3)
Curb P12 (scene 1): (193, 50, 3)
Curb P18 (scene 0): (167, 50, 3)
Curb P18 (scene 1): (167, 50, 3)
Curb P21 (scene 0): (224, 50, 3)
Curb P21 (scene 1): (224, 50, 3)
Total curb scene 0 samples: 1189
Total curb scene 1 samples: 1189


In [11]:
# save the combined curb data
with open('../data_more/curb_0.5s_combined_all.pkl', 'wb') as f:
    pickle.dump({'scene_0': data_curb_0, 'scene_1': data_curb_1}, f)

### 1.2 Asphalt

In [16]:
#P1
data= np.load('../../data/RoadRoughness/Raw/Asphalt/P1/segments_100hz_0.5s_50overlap.npz')
asphalt_p1= data['segments']
#P2
data= np.load('../../data/RoadRoughness/Raw/Asphalt/P2/segments_100hz_0.5s_50overlap.npz')
asphalt_p2= data['segments']
# P3
data = np.load('../../data/RoadRoughness/Raw/Asphalt/P3/segments_100hz_0.5s_50overlap.npz')
asphalt_p3 = data['segments']
# P4
data = np.load('../../data/RoadRoughness/Raw/Asphalt/P4/segments_100hz_0.5s_50overlap.npz')
asphalt_p4 = data['segments']
# P5
data = np.load('../../data/RoadRoughness/Raw/Asphalt/P5/segments_100hz_0.5s_50overlap.npz')
asphalt_p5 = data['segments']
# P6
data = np.load('../../data/RoadRoughness/Raw/Asphalt/P6/segments_100hz_0.5s_50overlap.npz')
asphalt_p6 = data['segments']
# P7
data = np.load('../../data/RoadRoughness/Raw/Asphalt/P7/segments_100hz_0.5s_50overlap.npz')
asphalt_p7 = data['segments']
# P8
data = np.load('../../data/RoadRoughness/Raw/Asphalt/P8/segments_100hz_0.5s_50overlap.npz')
asphalt_p8 = data['segments']
# P9
data = np.load('../../data/RoadRoughness/Raw/Asphalt/P9/segments_100hz_0.5s_50overlap.npz')
asphalt_p9 = data['segments']
# P10
data = np.load('../../data/RoadRoughness/Raw/Asphalt/P10/segments_100hz_0.5s_50overlap.npz')
asphalt_p10 = data['segments']

In [17]:
# Print shape of all asphalt datasets
print("P1:", asphalt_p1.shape)
print("P2:", asphalt_p2.shape)
print("P3:", asphalt_p3.shape)
print("P4:", asphalt_p4.shape)
print("P5:", asphalt_p5.shape)
print("P6:", asphalt_p6.shape)
print("P7:", asphalt_p7.shape)
print("P8:", asphalt_p8.shape)
print("P9:", asphalt_p9.shape)
print("P10:", asphalt_p10.shape)
print("\nTotal samples:", sum(arr.shape[0] for arr in [asphalt_p1, asphalt_p2, asphalt_p3, asphalt_p4, 
                                                       asphalt_p5, asphalt_p6, asphalt_p7, asphalt_p8,
                                                       asphalt_p9, asphalt_p10]))

P1: (74, 50, 3)
P2: (120, 50, 3)
P3: (81, 50, 3)
P4: (74, 50, 3)
P5: (114, 50, 3)
P6: (106, 50, 3)
P7: (78, 50, 3)
P8: (86, 50, 3)
P9: (190, 50, 3)
P10: (93, 50, 3)

Total samples: 1016


In [18]:
# combine and save all asphalt data
data_asphalt = np.concatenate([asphalt_p1, asphalt_p2, asphalt_p3, asphalt_p4, asphalt_p5, 
                              asphalt_p6, asphalt_p7, asphalt_p8, asphalt_p9, asphalt_p10])
with open('../data_more/asphalt_0.5s_combined_all.pkl', 'wb') as f:
    pickle.dump(data_asphalt, f)

### 1.3 Cobblestone

In [30]:
#P1
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P1/segments_100hz_0.5s_50overlap.npz')
cobblestone_p1 = data['segments']
#P2
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P2/segments_100hz_0.5s_50overlap.npz')
cobblestone_p2 = data['segments']
# P3
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P3/segments_100hz_0.5s_50overlap.npz')
cobblestone_p3 = data['segments']
# P4
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P4/segments_100hz_0.5s_50overlap.npz')
cobblestone_p4 = data['segments']
# P5
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P5/segments_100hz_0.5s_50overlap.npz')
cobblestone_p5 = data['segments']
# P6
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P6/segments_100hz_0.5s_50overlap.npz')
cobblestone_p6 = data['segments']
# P7
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P7/segments_100hz_0.5s_50overlap.npz')
cobblestone_p7 = data['segments']
# P8
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P8/segments_100hz_0.5s_50overlap.npz')
cobblestone_p8 = data['segments']
# P9
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P9/segments_100hz_0.5s_50overlap.npz')
cobblestone_p9 = data['segments']
# P10
data = np.load('../../data/RoadRoughness/Raw/Cobblestone/P10/segments_100hz_0.5s_50overlap.npz')
cobblestone_p10 = data['segments']

In [31]:
# Print shape of all cobblestone datasets
print("Cobblestone shapes:")
print("P1:", cobblestone_p1.shape)
print("P2:", cobblestone_p2.shape)
print("P3:", cobblestone_p3.shape)
print("P4:", cobblestone_p4.shape)
print("P5:", cobblestone_p5.shape)
print("P6:", cobblestone_p6.shape)
print("P7:", cobblestone_p7.shape)
print("P8:", cobblestone_p8.shape)
print("P9:", cobblestone_p9.shape)
print("P10:", cobblestone_p10.shape)
print("\nTotal samples:", sum(arr.shape[0] for arr in [cobblestone_p1, cobblestone_p2, cobblestone_p3, cobblestone_p4, 
                                                      cobblestone_p5, cobblestone_p6, cobblestone_p7, cobblestone_p8,
                                                      cobblestone_p9, cobblestone_p10]))

Cobblestone shapes:
P1: (59, 50, 3)
P2: (55, 50, 3)
P3: (49, 50, 3)
P4: (71, 50, 3)
P5: (51, 50, 3)
P6: (62, 50, 3)
P7: (47, 50, 3)
P8: (67, 50, 3)
P9: (67, 50, 3)
P10: (75, 50, 3)

Total samples: 603


In [21]:
# combine and save all cobblestone data
data_cobblestone = np.concatenate([cobblestone_p1, cobblestone_p2, cobblestone_p3, cobblestone_p4, cobblestone_p5, 
                                  cobblestone_p6, cobblestone_p7, cobblestone_p8, cobblestone_p9, cobblestone_p10])
with open('../data_more/cobblestone_0.5s_combined_all.pkl', 'wb') as f:
    pickle.dump(data_cobblestone, f)

### 1.4 CompactGravel

In [24]:
#P1
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P1/segments_100hz_0.5s_50overlap.npz')
compactgravel_p1 = data['segments']
#P2
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P2/segments_100hz_0.5s_50overlap.npz')
compactgravel_p2 = data['segments']
# P3
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P3/segments_100hz_0.5s_50overlap.npz')
compactgravel_p3 = data['segments']
# P4
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P4/segments_100hz_0.5s_50overlap.npz')
compactgravel_p4 = data['segments']
# P5
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P5/segments_100hz_0.5s_50overlap.npz')
compactgravel_p5 = data['segments']
# P6
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P6/segments_100hz_0.5s_50overlap.npz')
compactgravel_p6 = data['segments']
# P7
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P7/segments_100hz_0.5s_50overlap.npz')
compactgravel_p7 = data['segments']
# P8
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P8/segments_100hz_0.5s_50overlap.npz')
compactgravel_p8 = data['segments']
# P9
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P9/segments_100hz_0.5s_50overlap.npz')
compactgravel_p9 = data['segments']
# P10
data = np.load('../../data/RoadRoughness/Raw/CompactGravel/P10/segments_100hz_0.5s_50overlap.npz')
compactgravel_p10 = data['segments']

In [25]:
# Print shape of all compact gravel datasets
print("CompactGravel shapes:")
print("P1:", compactgravel_p1.shape)
print("P2:", compactgravel_p2.shape)
print("P3:", compactgravel_p3.shape)
print("P4:", compactgravel_p4.shape)
print("P5:", compactgravel_p5.shape)
print("P6:", compactgravel_p6.shape)
print("P7:", compactgravel_p7.shape)
print("P8:", compactgravel_p8.shape)
print("P9:", compactgravel_p9.shape)
print("P10:", compactgravel_p10.shape)
print("\nTotal samples:", sum(arr.shape[0] for arr in [compactgravel_p1, compactgravel_p2, compactgravel_p3, compactgravel_p4, 
                                                      compactgravel_p5, compactgravel_p6, compactgravel_p7, compactgravel_p8,
                                                      compactgravel_p9, compactgravel_p10]))

CompactGravel shapes:
P1: (66, 50, 3)
P2: (71, 50, 3)
P3: (99, 50, 3)
P4: (59, 50, 3)
P5: (51, 50, 3)
P6: (59, 50, 3)
P7: (79, 50, 3)
P8: (64, 50, 3)
P9: (61, 50, 3)
P10: (79, 50, 3)

Total samples: 688


In [None]:
# # combine and save all compact gravel data
# data_compactgravel = np.concatenate([compactgravel_p1, compactgravel_p2, compactgravel_p3, compactgravel_p4, compactgravel_p5, 
#                                    compactgravel_p6, compactgravel_p7, compactgravel_p8, compactgravel_p9, compactgravel_p10])
# with open('../data_more/compactgravel_0.5s_combined_all.pkl', 'wb') as f:
#     pickle.dump(data_compactgravel, f)


### 1.5 Dirt

In [32]:
#P1
data = np.load('../../data/RoadRoughness/Raw/Dirt/P1/segments_100hz_0.5s_50overlap.npz')
dirt_p1 = data['segments']
#P2
data = np.load('../../data/RoadRoughness/Raw/Dirt/P2/segments_100hz_0.5s_50overlap.npz')
dirt_p2 = data['segments']
# P3
data = np.load('../../data/RoadRoughness/Raw/Dirt/P3/segments_100hz_0.5s_50overlap.npz')
dirt_p3 = data['segments']
# P4
data = np.load('../../data/RoadRoughness/Raw/Dirt/P4/segments_100hz_0.5s_50overlap.npz')
dirt_p4 = data['segments']
# P5
data = np.load('../../data/RoadRoughness/Raw/Dirt/P5/segments_100hz_0.5s_50overlap.npz')
dirt_p5 = data['segments']
# P6
data = np.load('../../data/RoadRoughness/Raw/Dirt/P6/segments_100hz_0.5s_50overlap.npz')
dirt_p6 = data['segments']
# P7
data = np.load('../../data/RoadRoughness/Raw/Dirt/P7/segments_100hz_0.5s_50overlap.npz')
dirt_p7 = data['segments']
# P8
data = np.load('../../data/RoadRoughness/Raw/Dirt/P8/segments_100hz_0.5s_50overlap.npz')
dirt_p8 = data['segments']
# P9
data = np.load('../../data/RoadRoughness/Raw/Dirt/P9/segments_100hz_0.5s_50overlap.npz')
dirt_p9 = data['segments']
# P10
data = np.load('../../data/RoadRoughness/Raw/Dirt/P10/segments_100hz_0.5s_50overlap.npz')
dirt_p10 = data['segments']

In [33]:
# Print shape of all dirt datasets
print("Dirt shapes:")
print("P1:", dirt_p1.shape)
print("P2:", dirt_p2.shape)
print("P3:", dirt_p3.shape)
print("P4:", dirt_p4.shape)
print("P5:", dirt_p5.shape)
print("P6:", dirt_p6.shape)
print("P7:", dirt_p7.shape)
print("P8:", dirt_p8.shape)
print("P9:", dirt_p9.shape)
print("P10:", dirt_p10.shape)
print("\nTotal samples:", sum(arr.shape[0] for arr in [dirt_p1, dirt_p2, dirt_p3, dirt_p4, 
                                                      dirt_p5, dirt_p6, dirt_p7, dirt_p8,
                                                      dirt_p9, dirt_p10]))

Dirt shapes:
P1: (93, 50, 3)
P2: (74, 50, 3)
P3: (75, 50, 3)
P4: (89, 50, 3)
P5: (63, 50, 3)
P6: (100, 50, 3)
P7: (67, 50, 3)
P8: (79, 50, 3)
P9: (83, 50, 3)
P10: (67, 50, 3)

Total samples: 790


In [None]:
# # combine and save all dirt data
# data_dirt = np.concatenate([dirt_p1, dirt_p2, dirt_p3, dirt_p4, dirt_p5, 
#                            dirt_p6, dirt_p7, dirt_p8, dirt_p9, dirt_p10])
# with open('../data_more/dirt_0.5s_combined_all.pkl', 'wb') as f:
#     pickle.dump(data_dirt, f)

### 1.6 PavingStone

In [34]:
#P1
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P1/segments_100hz_0.5s_50overlap.npz')
pavingstone_p1 = data['segments']
#P2
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P2/segments_100hz_0.5s_50overlap.npz')
pavingstone_p2 = data['segments']
# P3
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P3/segments_100hz_0.5s_50overlap.npz')
pavingstone_p3 = data['segments']
# P4
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P4/segments_100hz_0.5s_50overlap.npz')
pavingstone_p4 = data['segments']
# P5
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P5/segments_100hz_0.5s_50overlap.npz')
pavingstone_p5 = data['segments']
# P6
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P6/segments_100hz_0.5s_50overlap.npz')
pavingstone_p6 = data['segments']
# P7
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P7/segments_100hz_0.5s_50overlap.npz')
pavingstone_p7 = data['segments']
# P8
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P8/segments_100hz_0.5s_50overlap.npz')
pavingstone_p8 = data['segments']
# P9
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P9/segments_100hz_0.5s_50overlap.npz')
pavingstone_p9 = data['segments']
# P10
data = np.load('../../data/RoadRoughness/Raw/PavingStone/P10/segments_100hz_0.5s_50overlap.npz')
pavingstone_p10 = data['segments']

In [35]:
# Print shape of all paving stone datasets
print("PavingStone shapes:")
print("P1:", pavingstone_p1.shape)
print("P2:", pavingstone_p2.shape)
print("P3:", pavingstone_p3.shape)
print("P4:", pavingstone_p4.shape)
print("P5:", pavingstone_p5.shape)
print("P6:", pavingstone_p6.shape)
print("P7:", pavingstone_p7.shape)
print("P8:", pavingstone_p8.shape)
print("P9:", pavingstone_p9.shape)
print("P10:", pavingstone_p10.shape)
print("\nTotal samples:", sum(arr.shape[0] for arr in [pavingstone_p1, pavingstone_p2, pavingstone_p3, pavingstone_p4, 
                                                      pavingstone_p5, pavingstone_p6, pavingstone_p7, pavingstone_p8,
                                                      pavingstone_p9, pavingstone_p10]))

PavingStone shapes:
P1: (81, 50, 3)
P2: (83, 50, 3)
P3: (72, 50, 3)
P4: (67, 50, 3)
P5: (57, 50, 3)
P6: (89, 50, 3)
P7: (80, 50, 3)
P8: (83, 50, 3)
P9: (75, 50, 3)
P10: (79, 50, 3)

Total samples: 766


In [None]:
# # combine and save all paving stone data
# data_pavingstone = np.concatenate([pavingstone_p1, pavingstone_p2, pavingstone_p3, pavingstone_p4, pavingstone_p5, 
#                                  pavingstone_p6, pavingstone_p7, pavingstone_p8, pavingstone_p9, pavingstone_p10])
# with open('../data_more/pavingstone_0.5s_combined_all.pkl', 'wb') as f:
#     pickle.dump(data_pavingstone, f)