# **UAH Driveset**

## Data Preprocessing for one trip

In [None]:
import pandas as pd
import os

# datasets\UAH-DRIVESET-v1\D1\20151110175712-16km-D1-NORMAL1-SECONDARY

# Get the current directory path
current_dir = os.getcwd()

# Go up two directories from the current directory
root_dir = os.path.abspath(os.path.join(current_dir, os.pardir, os.pardir, os.pardir))

dataset_dir = os.path.join(root_dir, 'datasets', 'UAH-DRIVESET-v1')

### **Import Data (UAH-Driveset dataset)**

Sensor labels found in http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera16itsc.pdf

In [None]:

#****************************************************#
# RAW ACCELEROMETER - 10Hz
# 1) Timestamp (seconds)
# 2) Boolean of system activated (1 if >50km/h)
# 3) Acceleration in X (Gs)
# 4) Acceleration in Y (Gs)
# 5) Acceleration in Z (Gs)
# 6) Acceleration in X filtered by KF (Gs)
# 7) Acceleration in Y filtered by KF (Gs)
# 8) Acceleration in Z filtered by KF (Gs)
# 9) Roll (degrees)
# 10) Pitch (degrees)
# 11) Yaw (degrees)
#****************************************************#
folder_path = os.path.join(dataset_dir, 'D1', '20151110175712-16km-D1-NORMAL1-SECONDARY')

accel_path = os.path.join(folder_path, 'RAW_ACCELEROMETERS.txt')
gps_path = os.path.join(folder_path, 'RAW_GPS.txt')

raw_accelerometer = pd.read_csv(accel_path, delim_whitespace=True, header=None)

raw_accelerometer.columns = [
                'Timestamp_Accel',
                'SystemActivated',
                'accelerometerXAxis',
                'accelerometerYAxis',
                'accelerometerZAxis',
                'AccelX_KF',
                'AccelY_KF',
                'AccelZ_KF',
                'gyroscopeXAxis',
                'gyroscopeYAxis',
                'gyroscopeZAxis'
]


#****************************************************#
# RAW GPS - 1Hz
# 1) Timestamp (seconds)
# 2) Speed (km/h)
# 3) Latitude coordinate (degrees)
# 4) Longitude coordinate (degrees)
# 5) Altitude (meters)
# 6) Vertical accuracy (degrees)
# 7) Horizontal accuracy (degrees)
# 8) Course (degrees)
# 9) Difcourse: course variation (degrees)
# 10) Lanex dist state [internal val]
# 11) Lanex history [internal val]

# https://github.com/Eromera/uah_driveset_reader/blob/master/driveset_reader.py
#  elif (i == 10):
#             self.columnInfo.setText('Lanex dist state [internal val]')
#          elif (i == 11):
#             self.columnInfo.setText('Lanex history [internal val]')
#****************************************************#
raw_gps = pd.read_csv(gps_path, delim_whitespace=True, header=None)

raw_gps.columns = [ 'Timestamp_GPS', 
                   'Speed', 
                   'latitude',
                   'longitude',
                   'Altitude',
                   'VerticalAccuracy',
                   'HorizontalAccuracy',
                   'Course',
                   'Difcourse',
                   'LanexDistState',
                   'LanexHistory',
                   'dropcolumn']

raw_gps = raw_gps.drop(columns=['LanexDistState', 'LanexHistory', 'dropcolumn'])


# Create dataframes
df_accel = raw_accelerometer.copy()
df_gps = raw_gps.copy()

print(df_accel.head())
print(df_gps.head())

### **Sampling**
#### Upsample the GPS data to 10Hz

GPS data = 1Hz \
Accelerometer data = 10Hz

In [None]:
# GPS data = 1Hz
# Accelerometer data = 10Hz

# Upsample the GPS data to 10Hz
df_gps_upsampled = df_gps.reindex(df_gps.index.repeat(10)).reset_index(drop=True)
df_gps = df_gps_upsampled.copy()

### **Syncronize**

In [None]:
# Find the closest match between accelerometer and GPS timestamps
min_timestamp_accel = df_accel['Timestamp_Accel'].min()
min_timestamp_gps = df_gps['Timestamp_GPS'].min()

# Determine how many rows to trim from each dataframe
if min_timestamp_accel < min_timestamp_gps:
    row_number_accel = df_accel['Timestamp_Accel'].sub(min_timestamp_gps).abs().idxmin()
    df_accel = df_accel.iloc[row_number_accel:]

if min_timestamp_gps < min_timestamp_accel:
    row_number_gps = df_gps['Timestamp_GPS'].sub(min_timestamp_accel).abs().idxmin()
    df_gps = df_gps.iloc[row_number_gps:]

# Ensure that both dataframes have the same length
min_length = min(len(df_accel), len(df_gps))
df_accel = df_accel.iloc[:min_length]
df_gps = df_gps.iloc[:min_length]

# Combine the dataframes
df_combined = pd.concat([df_accel.reset_index(drop=True), df_gps.reset_index(drop=True)], axis=1)

# Save the synchronized data to CSV
df_combined.to_csv('synchronized_data.csv', index=False)

In [None]:
# # Accelerometer data columns
# timestamp = df_accel['Timestamp']
# system_activated = df_accel['SystemActivated']
# accelX = df_accel['AccelX']
# accelY = df_accel['AccelY']
# accelZ = df_accel['AccelZ']
# accelX_KF = df_accel['AccelX_KF']
# accelY_KF = df_accel['AccelY_KF']
# accelZ_KF = df_accel['AccelZ_KF']
# gyrX_roll = df_accel['Roll']
# gyrY_pitch = df_accel['Pitch']
# gyrZ_yaw = df_accel['Yaw']

# # GPS data columns
# timestamp_gps = df_gps['Timestamp']
# speed = df_gps['Speed']
# latitude = df_gps['Latitude']
# longitude = df_gps['Longitude']
# altitude = df_gps['Altitude']
# vertical_accuracy = df_gps['VerticalAccuracy']
# horizontal_accuracy = df_gps['HorizontalAccuracy']
# course = df_gps['Course']
# difcourse = df_gps['Difcourse']

## Generalized code for preprocessing

In [None]:
import pandas as pd
import os

# Get the current directory path
current_dir = os.getcwd()

# Go up two directories from the current directory
root_dir = os.path.abspath(os.path.join(current_dir, os.pardir, os.pardir, os.pardir))

dataset_dir = os.path.join(root_dir, 'datasets', 'UAH-DRIVESET-v1')
output_dir = os.path.join(dataset_dir, 'UAH-Processed')

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Iterate through each subfolder in the dataset directory
for root, dirs, files in os.walk(dataset_dir):
    for subfolder in dirs:
        subfolder_path = os.path.join(root, subfolder)

        # Construct paths to accelerometer and GPS files
        accel_path = os.path.join(subfolder_path, 'RAW_ACCELEROMETERS.txt')
        gps_path = os.path.join(subfolder_path, 'RAW_GPS.txt')

        # Check if both accelerometer and GPS files exist
        if os.path.exists(accel_path) and os.path.exists(gps_path):
            # Read raw accelerometer and GPS data
            raw_accelerometer = pd.read_csv(accel_path, delim_whitespace=True, header=None)
            raw_gps = pd.read_csv(gps_path, delim_whitespace=True, header=None)

            # Preprocess and synchronize the data
            raw_accelerometer.columns = [
                'Timestamp_Accel',
                'SystemActivated',
                'accelerometerXAxis',
                'accelerometerYAxis',
                'accelerometerZAxis',
                'AccelX_KF',
                'AccelY_KF',
                'AccelZ_KF',
                'gyroscopeXAxis',
                'gyroscopeYAxis',
                'gyroscopeZAxis'
            ]

            raw_gps.columns = ['Timestamp_GPS',
                               'Speed',
                               'latitude',
                               'longitude',
                               'Altitude',
                               'VerticalAccuracy',
                               'HorizontalAccuracy',
                               'Course',
                               'Difcourse',
                               'LanexDistState',
                               'LanexHistory',
                               'dropcolumn']

            raw_gps = raw_gps.drop(columns=['LanexDistState', 'LanexHistory', 'dropcolumn'])

            df_accel = raw_accelerometer.copy()
            df_gps = raw_gps.copy()

            df_gps_upsampled = df_gps.reindex(df_gps.index.repeat(10)).reset_index(drop=True)
            df_gps = df_gps_upsampled.copy()

            # Find the closest match between accelerometer and GPS timestamps
            min_timestamp_accel = df_accel['Timestamp_Accel'].min()
            min_timestamp_gps = df_gps['Timestamp_GPS'].min()

            # Determine how many rows to trim from each dataframe
            if min_timestamp_accel < min_timestamp_gps:
                row_number_accel = df_accel['Timestamp_Accel'].sub(min_timestamp_gps).abs().idxmin()
                df_accel = df_accel.iloc[row_number_accel:]
            elif min_timestamp_gps < min_timestamp_accel:
                row_number_gps = df_gps['Timestamp_GPS'].sub(min_timestamp_accel).abs().idxmin()
                df_gps = df_gps.iloc[row_number_gps:]

            # Ensure that both dataframes have the same length
            min_length = min(len(df_accel), len(df_gps))
            df_accel = df_accel.iloc[:min_length]
            df_gps = df_gps.iloc[:min_length]

            # Combine the dataframes
            df_combined = pd.concat([df_accel.reset_index(drop=True), df_gps.reset_index(drop=True)], axis=1)

            # Save the synchronized data to a CSV file with the same name as the subfolder
            output_filename = os.path.join(output_dir, f'{subfolder}.csv')
            df_combined.to_csv(output_filename, index=False)
