## Import Libraries

In [None]:
import pandas as pd
import numpy as np

## Load Raw Dataset

In [None]:
df_raw = pd.read_table("RawData\dumpRawDataB5.txt", delimiter=",")

## Process Raw Dataset

In [None]:
def process_length(df_raw):
    
    ### Fill in NA / NaN values in 'B5_Abs_Plaus_Time' column
    df_raw['B5_Abs_Plaus_Time'] = df_raw['B5_Abs_Plaus_Time'].fillna(0.0)

    ### Build list for calculating 'B5_length_coordinate' column values
    B5_length_base = [(x, y, z) for x, y, z in zip(df_raw['_FileId'], df_raw['Snelheid_HA'], df_raw['B5_Abs_Plaus_Time'])]

    ### Loop through 'B5_length_base' list to calculate 'B5_length_coordinate' values
    length_list = []
    prev_length = 0.0
    curr_speed = 0.0
    curr_file = 0
    counter = 0
    for item in B5_length_base:
        if item[0] == curr_file:   ### Check to see if '_FileId' has not changed
            if item[2] == 1.0:   ### Check to see if 'B5_Abs_Plaus_Time' is equal to 1
                if counter > 1:   ### Check to see if this is 3rd entry of 'B5_Abs_Plaus_Time' equal to 1
                    curr_speed = item[1]   ### Assign current speed value
                    length = prev_length + ((curr_speed * 0.04) * 0.001)   ### Calculate 'B5_length_coordinate' value
                    length_list.append(length)   ### Append to 'length_list' list
                    prev_length = length   ### Assign previous length value
                else:
                    length_list.append(0)
                    counter = counter + 1
            else:
                length_list.append(0)
        else:
            curr_file = item[0]   ### Assign new '_FileId' as current file
            prev_length = 0.0   ### Reset previous length value
            counter = 0   ### Reset counter
            if item[2] == 1.0:
                if counter > 1:
                    curr_speed = item[1]
                    length = prev_length + ((curr_speed * 0.04) * 0.001)
                    length_list.append(length)
                    prev_length = length          
                else:
                    length_list.append(0)
                    counter = counter + 1
            else:
                length_list.append(0)

    ### Add 'length_list' list as 'B5_length_coordinate' column
    df_raw['B5_Length_Coordinate'] = length_list

    ### Return processed dataframe
    return df_raw

In [None]:
def process_B5(df_length):

    ### Add and calculate 'B5' column from 'B5_Time_Based' column
    df_length['B5'] = [x * 0.1 for x in df_length['B5_Time_Based']]

    ### Fill-in NA / NaN values in 'B5' column
    df_length['B5'] = df_length['B5'].fillna(0.0)

    ### Return processed dataframe
    return df_length

In [None]:
df_length = process_length(df_raw)
df_processed = process_B5(df_length)

## Drop columns and save processed dataset to CSV

In [None]:
df_processed.drop(df_processed.columns[1:5], inplace=True, axis=1)
df_processed.to_csv("RawData\B5_processed.csv", index=True, header=True)