<h1>1 - Raw Data Pre-Processing</h1>

This Jupyter Notebook pre-processes the raw data sampled through MPU-9250 sensors (accelerometer, gyroscope, magnetometer and temperature) and GPS, performing the following operations:
* GPS Data Correction:
    * Removes data from providers other than GPS
    * Calculates the time between samples
    * Calculates the distance between geodetic points
    * Search for location samples with wrong time in the time sequence
    * Renames columns to more friendly names
    * Creates a map and with the Locations
* MPU-9250 Settings Correction:
    * Convert acceleration data from g to m/s^2
    * Join settings from 0x68 and 0x69 into a file (one for right and another for left side)
    * Renames columns to more friendly names
* MPU-9250 Data Correction:
    * Resamples to 100Hz
    * Join data from 0x68 and 0x69 synchronized in the same interval into a file (one for right and another for left side)
    * Synchronizes left and right data in the same interval
    * Renames columns to more friendly names
    * Convert acceleration data from g to m/s^2 
* Synchronization of MPU and GPS data:
    * A

<h4>Importing Packages</h4>

In [None]:
import os
import folium
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil import parser
from haversine import haversine, Unit
from geopy.distance import geodesic
from tqdm import tqdm, tqdm_notebook
pd.set_option("float_format", '{:0.10f}'.format)
pd.set_option('display.max_columns', 30)
%matplotlib inline
%run "1 - Raw Data Pre-Processing.py"

<h4>Parameters Definition</h4>

In [None]:
# Progress Bar
load_bar = None

# In seconds | 0.1s = 10Hz | 0.01s = 100Hz
sampling_rate_interval = 0.01 

# 1 = 0.1s = 10Hz | 2 = 0.01s = 100Hz
sampling_rate_interval_decimals = len(str(sampling_rate_interval).split(".")[1])

# Gravity value in m/s²
gravity = 9.80665

<h4>Utility Functions</h4>

In [None]:
# Transform Date String into Timestamp Int
def parseDateToTimestamp(value):
    newDate = parser.parse(value)
    return datetime.timestamp(newDate)

# Transform Timestamp Int into Date String 
def parseTimestampToDate(value):
    newDate = datetime.fromtimestamp(value)
    return newDate.strftime("%Y-%m-%dT%H:%M:%S")

# Format timestamp by limiting decimal places
def roundTimestamp(value):
    return round(value, sampling_rate_interval_decimals)

# Offset correction for incorrect times/delays between devices
def shiftTime(data, time): # time in seconds
    data['timestamp'] += time
    return data

# Create the folder if it doesn't exists
def createFolder(path):
    
    if not os.path.exists(path):
        os.makedirs(path)
    
# Create a README.md file
def createReadme(filePath, content):
    
     with open(filePath, "w+") as file:
        file.write(content)
        
# Translate the side to English - filename
def translateSide(side):
    return "right" if side == "direita" else "left"

<h4>Importação dos Dados Brutos</h4>

In [None]:
settings = workspace["settings"]
dataset = workspace["datasets"]["saveiro-1"] # Dataset selection

mpu_data_direita_shift_time = dataset['right_shift']
mpu_data_esquerda_shift_time = dataset['left_shift']
readFolder = os.path.join(settings['readFolder'], dataset['readFolder'])
saveFolder = os.path.join(settings['saveFolder'], dataset['saveFolder'])
createFolder(saveFolder)
createReadme(os.path.join(saveFolder, "README.md"), dataset['readme'])

raw_gps = pd.read_csv(os.path.join(readFolder, 'gps.csv'), float_precision="high")

raw_data_68_direita = pd.read_csv(os.path.join(readFolder, 'direita', 'data-set-mpu-0x68.csv'), delimiter=";", float_precision="high")
raw_data_69_direita = pd.read_csv(os.path.join(readFolder, 'direita', 'data-set-mpu-0x69.csv'), delimiter=";", float_precision="high")
settings_68_direita = pd.read_csv(os.path.join(readFolder, 'direita', 'settings-mpu-0x68.csv'), delimiter=";", float_precision="high")
settings_69_direita = pd.read_csv(os.path.join(readFolder, 'direita', 'settings-mpu-0x69.csv'), delimiter=";", float_precision="high")

raw_data_68_direita = shiftTime(raw_data_68_direita, mpu_data_direita_shift_time)
raw_data_69_direita = shiftTime(raw_data_69_direita, mpu_data_direita_shift_time)

raw_data_68_esquerda = pd.read_csv(os.path.join(readFolder, 'esquerda', 'data-set-mpu-0x68.csv'), delimiter=";", float_precision="high")
raw_data_69_esquerda = pd.read_csv(os.path.join(readFolder, 'esquerda', 'data-set-mpu-0x69.csv'), delimiter=";", float_precision="high")
settings_68_esquerda = pd.read_csv(os.path.join(readFolder, 'esquerda', 'settings-mpu-0x68.csv'), delimiter=";", float_precision="high")
settings_69_esquerda = pd.read_csv(os.path.join(readFolder, 'esquerda', 'settings-mpu-0x69.csv'), delimiter=";", float_precision="high")

raw_data_68_esquerda = shiftTime(raw_data_68_esquerda, mpu_data_esquerda_shift_time)
raw_data_69_esquerda = shiftTime(raw_data_69_esquerda, mpu_data_esquerda_shift_time)

data_gps = None
data_direita = None
data_esquerda = None

settings_direita = None
settings_esquerda = None

iniTime = roundTimestamp(max(
    raw_data_68_direita['timestamp'].min(), 
    raw_data_69_direita['timestamp'].min(), 
    raw_data_68_esquerda['timestamp'].min(),
    raw_data_69_esquerda['timestamp'].min(),
))

endTime = roundTimestamp(min(
    raw_data_68_direita['timestamp'].max(), 
    raw_data_69_direita['timestamp'].max(), 
    raw_data_68_esquerda['timestamp'].max(),
    raw_data_69_esquerda['timestamp'].max(),
))   

<h2>GPS Data Correction</h2>

<h4>Data Visualization</h4>

In [None]:
raw_gps.describe()

In [None]:
raw_gps.head()

In [None]:
raw_gps.tail()

<h4>Processing</h4>

In [None]:
# Removes data from other providers, such as network, as they introduce outliers.
def removeOtherProviders(data):
    
    iniSize = len(data)
    data = data[data.provider == 'gps']
    data = data.reset_index(drop=True)
    endSize = len(data)
    
    print("Removido", (iniSize - endSize), "registros de outros provedores diferentes de GPS.")
    
    return data

# Calculates the time between samples
def calcElapsedTime(timestamps):
    
    elapsed = [0]
    size = len(timestamps)
    
    for i in range(1, size):
        elapsed.append(abs(timestamps[i] - timestamps[i-1]))
    
    return pd.Series(elapsed)

# Calculates the distance between geodetic points
def calcDistance(latitudes, longitudes):
    
    distance = [0]
    size = len(latitudes)
    
    for i in range(1, size):
        start = (latitudes[i-1], longitudes[i-1])
        end = (latitudes[i], longitudes[i])
        # dist = abs(haversine(start, end, unit='m'))
        dist = geodesic(start, end).meters
        distance.append(dist)
    
    return pd.Series(distance)

# Search for location samples with wrong time in the time sequence
def findOutliers(timestamps):
    
    size = len(timestamps)
    outliers = []
    
    for i in range(0, size):
        
        if i > 0:
            before = timestamps[i-1]
        else:
            before = timestamps[i]
        
        current = timestamps[i] 
        
        if i < size - 1:
            after = timestamps[i+1]
        else:
            after = timestamps[i]
            
        if before > current or current > after:
            outliers.append(i)
    
    return outliers

# Main method that processes GPS data
def processGPS(raw_gps):
    
    gps = removeOtherProviders(raw_gps)
    gps.insert(0, 'timestamp', gps['time'].apply(parseDateToTimestamp), True) 
    gps = gps.drop(columns=['time'])
    gps = gps.rename(columns={"lat": "latitude", "lon": "longitude", "speed": "speed_meters_per_second"})
    gps.insert(len(gps.columns), 'distance_meters', calcDistance(gps['latitude'], gps['longitude']), True)
    gps.insert(len(gps.columns), 'elapsed_time_seconds', calcElapsedTime(gps['timestamp']), True)
    
    outliers = findOutliers(gps['timestamp'])
    print("Amount of Outliers =", len(outliers))
    file = os.path.join(saveFolder, "dataset_gps.csv")
    gps.to_csv(file, index=False)
    print("Saved in", file)
    return gps

# Method that creates a map and shows the Locations
def plotAndSaveMap(gps):
    
    locations = gps[['latitude', 'longitude']]
    coordinates = [tuple(x) for x in locations.to_numpy()]

    focolat = (gps['latitude'].min() + gps['latitude'].max()) / 2
    focolon = (gps['longitude'].min() + gps['longitude'].max()) / 2
    maps = folium.Map(location=[focolat, focolon], zoom_start=14)

    folium.PolyLine(coordinates, color="blue", weight=2.5, opacity=1).add_to(maps)

    maps.save(os.path.join(saveFolder, "map.html"))
    return maps

In [None]:
data_gps = processGPS(raw_gps.copy())
plotAndSaveMap(data_gps)

<h2>MPU-9250 Settings Correction</h2>

<h4>Data Visualization</h4>

In [None]:
settings_68_direita.head()

In [None]:
settings_69_direita.head()

In [None]:
settings_68_esquerda.head()

In [None]:
settings_69_esquerda.head()

<h4>Processing</h4>

In [None]:
# Convert from g to m/s^2
def convertAccelerationSettings(data):
    
    data['accelerometer_bias_x'] *= gravity
    data['accelerometer_bias_y'] *= gravity
    data['accelerometer_bias_z'] *= gravity
    
    return data

# Main method that processes MPU settings
def processSettings(data_68, data_69, side):
    
    data = pd.DataFrame(columns = [
        'placement', 
        'address_mpu', 
        'address_ak',
        'gyroscope_full_scale', 
        'accelerometer_full_scale',
        'magnetometer_full_scale', 
        'gyroscope_resolution',
        'accelerometer_resolution', 
        'magnetometer_resolution',
        'gyroscope_bias_x', 
        'gyroscope_bias_y',
        'gyroscope_bias_z',
        'accelerometer_bias_x', 
        'accelerometer_bias_y',
        'accelerometer_bias_z',
        'magnetometer_factory_sensitivity_x',
        'magnetometer_factory_sensitivity_y',
        'magnetometer_factory_sensitivity_z',
        'magnetometer_soft_iron_distortion_x',
        'magnetometer_soft_iron_distortion_y',
        'magnetometer_soft_iron_distortion_z',
        'magnetometer_hard_iron_distortion_x',
        'magnetometer_hard_iron_distortion_y',
        'magnetometer_hard_iron_distortion_z'
    ])
    
    data = data.append({
        'placement': 'dashboard',
        'address_mpu': data_69['address_mpu_master'].values[0],
        'address_ak': data_69['address_ak'].values[0],
        'gyroscope_full_scale': data_69['gyroscope_full_scale'].values[0],
        'accelerometer_full_scale': data_69['accelerometer_full_scale'].values[0],
        'magnetometer_full_scale': data_69['magnetometer_full_scale'].values[0],
        'gyroscope_resolution': data_69['gyroscope_resolution'].values[0],
        'accelerometer_resolution': data_69['accelerometer_resolution'].values[0],
        'magnetometer_resolution': data_69['magnetometer_resolution'].values[0],
        'gyroscope_bias_x': data_69['gyroscope_master_bias_x'].values[0],
        'gyroscope_bias_y': data_69['gyroscope_master_bias_y'].values[0],
        'gyroscope_bias_z': data_69['gyroscope_master_bias_z'].values[0],
        'accelerometer_bias_x': data_69['accelerometer_master_bias_x'].values[0],
        'accelerometer_bias_y': data_69['accelerometer_master_bias_y'].values[0],
        'accelerometer_bias_z': data_69['accelerometer_master_bias_z'].values[0],
        'magnetometer_factory_sensitivity_x': data_69['magnetometer_factory_sensitivity_x'].values[0],
        'magnetometer_factory_sensitivity_y': data_69['magnetometer_factory_sensitivity_y'].values[0],
        'magnetometer_factory_sensitivity_z': data_69['magnetometer_factory_sensitivity_z'].values[0],
        'magnetometer_soft_iron_distortion_x': data_69['magnetometer_soft_iron_distortion_x'].values[0],
        'magnetometer_soft_iron_distortion_y': data_69['magnetometer_soft_iron_distortion_y'].values[0],
        'magnetometer_soft_iron_distortion_z': data_69['magnetometer_soft_iron_distortion_z'].values[0],
        'magnetometer_hard_iron_distortion_x': data_69['magnetometer_hard_iron_distortion_x'].values[0],
        'magnetometer_hard_iron_distortion_y': data_69['magnetometer_hard_iron_distortion_y'].values[0],
        'magnetometer_hard_iron_distortion_z': data_69['magnetometer_hard_iron_distortion_z'].values[0],
    }, ignore_index=True)
    
    data = data.append({
        'placement': 'above_suspension',
        'address_mpu': data_68['address_mpu_master'].values[0],
        'address_ak': data_68['address_ak'].values[0],
        'gyroscope_full_scale': data_68['gyroscope_full_scale'].values[0],
        'accelerometer_full_scale': data_68['accelerometer_full_scale'].values[0],
        'magnetometer_full_scale': data_68['magnetometer_full_scale'].values[0],
        'gyroscope_resolution': data_68['gyroscope_resolution'].values[0],
        'accelerometer_resolution': data_68['accelerometer_resolution'].values[0],
        'magnetometer_resolution': data_68['magnetometer_resolution'].values[0],
        'gyroscope_bias_x': data_68['gyroscope_master_bias_x'].values[0],
        'gyroscope_bias_y': data_68['gyroscope_master_bias_y'].values[0],
        'gyroscope_bias_z': data_68['gyroscope_master_bias_z'].values[0],
        'accelerometer_bias_x': data_68['accelerometer_master_bias_x'].values[0],
        'accelerometer_bias_y': data_68['accelerometer_master_bias_y'].values[0],
        'accelerometer_bias_z': data_68['accelerometer_master_bias_z'].values[0],
        'magnetometer_factory_sensitivity_x': data_68['magnetometer_factory_sensitivity_x'].values[0],
        'magnetometer_factory_sensitivity_y': data_68['magnetometer_factory_sensitivity_y'].values[0],
        'magnetometer_factory_sensitivity_z': data_68['magnetometer_factory_sensitivity_z'].values[0],
        'magnetometer_soft_iron_distortion_x': data_68['magnetometer_soft_iron_distortion_x'].values[0],
        'magnetometer_soft_iron_distortion_y': data_68['magnetometer_soft_iron_distortion_y'].values[0],
        'magnetometer_soft_iron_distortion_z': data_68['magnetometer_soft_iron_distortion_z'].values[0],
        'magnetometer_hard_iron_distortion_x': data_68['magnetometer_hard_iron_distortion_x'].values[0],
        'magnetometer_hard_iron_distortion_y': data_68['magnetometer_hard_iron_distortion_y'].values[0],
        'magnetometer_hard_iron_distortion_z': data_68['magnetometer_hard_iron_distortion_z'].values[0],
    }, ignore_index=True)
    
    data = data.append({
        'placement': 'below_suspension',
        'address_mpu': data_68['address_mpu_slave'].values[0],
        'gyroscope_full_scale': data_68['gyroscope_full_scale'].values[0],
        'accelerometer_full_scale': data_68['accelerometer_full_scale'].values[0],
        'magnetometer_full_scale': data_68['magnetometer_full_scale'].values[0],
        'gyroscope_resolution': data_68['gyroscope_resolution'].values[0],
        'accelerometer_resolution': data_68['accelerometer_resolution'].values[0],
        'magnetometer_resolution': data_68['magnetometer_resolution'].values[0],
        'gyroscope_bias_x': data_68['gyroscope_slave_bias_x'].values[0],
        'gyroscope_bias_y': data_68['gyroscope_slave_bias_y'].values[0],
        'gyroscope_bias_z': data_68['gyroscope_slave_bias_z'].values[0],
        'accelerometer_bias_x': data_68['accelerometer_slave_bias_x'].values[0],
        'accelerometer_bias_y': data_68['accelerometer_slave_bias_y'].values[0],
        'accelerometer_bias_z': data_68['accelerometer_slave_bias_z'].values[0]
    }, ignore_index=True)
    
    # Acceleration parse
    data = convertAccelerationSettings(data)
    
    data = data.reset_index(drop=True)
    
    file = os.path.join(saveFolder, "dataset_settings_" + translateSide(side) + ".csv")
    data.to_csv(file, index=False)
    print("Saved in", file)
    
    return data

In [None]:
settings_direita = processSettings(settings_68_direita.copy(), settings_69_direita.copy(), 'direita')

In [None]:
settings_esquerda = processSettings(settings_68_esquerda.copy(), settings_69_esquerda.copy(), 'esquerda')

<h2>MPU-9250 Data Correction</h2>

<h4>Data Visualization</h4>

In [None]:
raw_data_68_direita.describe()

In [None]:
raw_data_69_direita.describe()

In [None]:
raw_data_68_esquerda.describe()

In [None]:
raw_data_69_esquerda.describe()

In [None]:
raw_data_68_direita.head()

In [None]:
raw_data_69_direita.head()

In [None]:
raw_data_68_esquerda.head()

In [None]:
raw_data_69_esquerda.head()

<h4>Processing</h4>

In [None]:
# Corrects rounding and duplication of timestamps
def fixTimestamps(data):
    
    # Arredonda Timestamps
    data['timestamp'] = data['timestamp'].apply(roundTimestamp)
    
    # Agrupa Timestamps - Média
    data = data.groupby(["timestamp"], as_index=False).mean()
    
    data = data.reset_index(drop=True)
    
    return data

# Sequences samples from start to finish, at a fixed step interval
def sequencing(data):

    index = -1
    series = None
    timestamp = 0
    ini = iniTime
    end = endTime
    new_points = []
    result = []
    
    while(timestamp < ini):
        index += 1
        series = data.iloc[index].copy()
        timestamp = roundTimestamp(series['timestamp'])

    while ini <= end and index < len(data):
        
        series = data.iloc[index].copy()
        timestamp = roundTimestamp(series['timestamp'])
            
        if ini == timestamp:
            index += 1 

        else:
            series = (result[-1].copy() + series) / 2.0
            new_points.append(len(result))
        
        series['timestamp'] = ini
        result.append(series)
        ini = roundTimestamp(ini + sampling_rate_interval)
        
        load_bar.update(1)

    dataFrame = pd.DataFrame(result)
    dataFrame = dataFrame.reset_index(drop=True)
    return new_points, dataFrame

# Joins the 0x68 and 0x69 data into a file with new column labels
def mergeData(data_68, data_69):
    
    # Changing column names

    data_68 = data_68.rename(columns={
        'master_acc_x': 'acc_x_above_suspension',
        'master_acc_y': 'acc_y_above_suspension',
        'master_acc_z': 'acc_z_above_suspension',
        'master_gyro_x': 'gyro_x_above_suspension',
        'master_gyro_y': 'gyro_y_above_suspension',
        'master_gyro_z': 'gyro_z_above_suspension',
        'slave_acc_x': 'acc_x_below_suspension',
        'slave_acc_y': 'acc_y_below_suspension',
        'slave_acc_z': 'acc_z_below_suspension',
        'slave_gyro_x': 'gyro_x_below_suspension',
        'slave_gyro_y': 'gyro_y_below_suspension',
        'slave_gyro_z': 'gyro_z_below_suspension',
        'mag_x': 'mag_x_above_suspension',
        'mag_y': 'mag_y_above_suspension',
        'mag_z': 'mag_z_above_suspension',
        'master_temp': 'temp_above_suspension',
        'slave_temp': 'temp_below_suspension'
    })

    data_69 = data_69.rename(columns={
        'master_acc_x': 'acc_x_dashboard',
        'master_acc_y': 'acc_y_dashboard',
        'master_acc_z': 'acc_z_dashboard',
        'master_gyro_x': 'gyro_x_dashboard',
        'master_gyro_y': 'gyro_y_dashboard',
        'master_gyro_z': 'gyro_z_dashboard',
        'mag_x': 'mag_x_dashboard',
        'mag_y': 'mag_y_dashboard',
        'mag_z': 'mag_z_dashboard',
        'master_temp': 'temp_dashboard'
    })
    
    # Deleting unused columns
    
    data_69 = data_69.drop(columns=[
        'slave_acc_x', 
        'slave_acc_y', 
        'slave_acc_z', 
        'slave_gyro_x', 
        'slave_gyro_y', 
        'slave_gyro_z',
        'slave_temp'
    ])
    
    # Merge 0x68 and 0x69 for new dataframe

    fieldsSource = {
        'timestamp': data_68,

        'acc_x_dashboard': data_69,
        'acc_y_dashboard': data_69,
        'acc_z_dashboard': data_69,
        
        'acc_x_above_suspension': data_68,
        'acc_y_above_suspension': data_68,
        'acc_z_above_suspension': data_68,
        
        'acc_x_below_suspension': data_68,
        'acc_y_below_suspension': data_68,
        'acc_z_below_suspension': data_68,

        'gyro_x_dashboard': data_69,
        'gyro_y_dashboard': data_69,
        'gyro_z_dashboard': data_69,
        
        'gyro_x_above_suspension': data_68,
        'gyro_y_above_suspension': data_68,
        'gyro_z_above_suspension': data_68,
        
        'gyro_x_below_suspension': data_68,
        'gyro_y_below_suspension': data_68,
        'gyro_z_below_suspension': data_68,

        'mag_x_dashboard': data_69,
        'mag_y_dashboard': data_69,
        'mag_z_dashboard': data_69,
        
        'mag_x_above_suspension': data_68,
        'mag_y_above_suspension': data_68,
        'mag_z_above_suspension': data_68,
        
        'temp_dashboard': data_69,
        'temp_above_suspension': data_68,
        'temp_below_suspension': data_68
    }

    index = 0
    data = pd.DataFrame()
    
    for key in fieldsSource:
        dataSource = fieldsSource[key]
        column = dataSource[key]        
        data.insert(index, column.name, column, True)
        index += 1

    return data

# Convert from g to m/s^2
def convertAcceleration(data):
        
    data['acc_x_dashboard'] *= gravity
    data['acc_y_dashboard'] *= gravity
    data['acc_z_dashboard'] *= gravity
    
    data['acc_x_above_suspension'] *= gravity
    data['acc_y_above_suspension'] *= gravity
    data['acc_z_above_suspension'] *= gravity
    
    data['acc_x_below_suspension'] *= gravity
    data['acc_y_below_suspension'] *= gravity
    data['acc_z_below_suspension'] *= gravity
    
    return data
    
# Main method that processes the MPU data
def processMPU(data_68, data_69, side):
    
    print("Processing data from MPU on the", side, "side")
    print("Ini", iniTime)
    print("End", endTime)
    print("Step", sampling_rate_interval)
    print("Samples", round((endTime - iniTime) / sampling_rate_interval) + 1)
    
    # Correcting Timestamps
    data_68 = fixTimestamps(data_68)
    data_69 = fixTimestamps(data_69)
    
    # Pairing of data 0x68 and 0x69
    points_68, data_68 = sequencing(data_68)
    points_69, data_69 = sequencing(data_69)
    
    # Joining the data 0x68 and 0x69
    data = mergeData(data_68, data_69)
    
    # Converts acceleration from g to m/s^2
    data = convertAcceleration(data)

    # Saving
    file = os.path.join(saveFolder, 'dataset_mpu_' + translateSide(side) + '.csv')
    data.to_csv(file, index=False)
    print("Saved in ", file)
    
    print("Length Data 0x68 =", len(data_68))
    print("Length Data 0x69 =", len(data_69))
    print("\n", "Length New Points 0x68 =", len(points_68))
    print(points_68)
    print("\n","Length New Points 0x69 =", len(points_69))
    print(points_69)

    return data

In [None]:
count_progress = (round((endTime - iniTime) / sampling_rate_interval) * 2)

In [None]:
load_bar = tqdm_notebook(total=count_progress)
data_direita = processMPU(raw_data_68_direita.copy(), raw_data_69_direita.copy(), 'direita')

In [None]:
load_bar = tqdm_notebook(total=count_progress)
data_esquerda = processMPU(raw_data_68_esquerda.copy(), raw_data_69_esquerda.copy(), 'esquerda')

<h2>Synchronization of MPU and GPS data</h2>

<h4>Data Visualization</h4>

In [None]:
data_direita.head()

In [None]:
data_esquerda.head()

In [None]:
data_gps.head()

<h4>Processing</h4>

In [None]:
# Maps GPS data based on the MPU sample timestamp
def mergeMPUGPS(data_mpu, data_gps, side):
    
    columns = ['timestamp', 'latitude', 'longitude', 'speed_meters_per_second']
    result = []
    
    indexMPU = 0
    indexGPS = 0
    minusIndex = 2
    timestampMPU = 0
    timestampGPS = 0
    
    while indexMPU < len(data_mpu):
        
        timestampMPU = data_mpu.loc[indexMPU, 'timestamp']
        
        while timestampGPS <= timestampMPU and indexGPS < len(data_gps):
            timestampGPS = data_gps.loc[indexGPS, 'timestamp']
            indexGPS += 1
            
        if indexGPS == len(data_gps) and timestampMPU >= timestampGPS:
            minusIndex = 1
            
        series = data_gps.loc[indexGPS - minusIndex, columns] 
        result.append(series)
        indexMPU += 1
        
        load_bar.update(1)
        
    data = pd.DataFrame(result)
    data = data.reset_index(drop=True)
    data = data.rename(columns={ 'timestamp': 'timestamp_gps'})
    data = data.rename(columns={ 'speed_meters_per_second': 'speed'})
    
    series = data['timestamp_gps']
    data_mpu.insert(len(data_mpu.columns), series.name, series, True)
    series = data['latitude']
    data_mpu.insert(len(data_mpu.columns), series.name, series, True)
    series = data['longitude']
    data_mpu.insert(len(data_mpu.columns), series.name, series, True)
    series = data['speed']
    data_mpu.insert(len(data_mpu.columns), series.name, series, True)
    
    file = os.path.join(saveFolder, "dataset_gps_mpu_" + translateSide(side) + ".csv")
    data_mpu.to_csv(file, index=False)
    print("Saved in ", file)

In [None]:
load_bar = tqdm_notebook(total=len(data_direita))
mergeMPUGPS(data_direita.copy(), data_gps.copy(), "direita")

In [None]:
load_bar = tqdm_notebook(total=len(data_esquerda))
mergeMPUGPS(data_esquerda.copy(), data_gps.copy(), "esquerda")