In [2]:
import glob

import pandas as pd

# show more columns
pd.options.display.max_columns = 500

# show the large integer as a regular number
pd.options.display.float_format = '{:.0f}'.format

### Reading the data

In [3]:
file_list = glob.glob("../data/*.parquet")
dfs = []

for file in file_list:
    df = pd.read_parquet(path=file)
    df['file'] = file
    dfs.append(df)

vehicles_data = pd.concat(dfs)

In [6]:
logger_operations = pd.read_csv(filepath_or_buffer="../data/tires_vehicle_logger_operations.csv", sep=",")
operational_data = logger_operations.loc[logger_operations['loggerno'].isin([8750061, 8750076]), :]

### Preprocessing vehicles data

In [4]:
vehicles_data[['vin', 'loggerno', 'created_at', 'suffix']] = vehicles_data['file'].str.split('_', expand=True)
vehicles_data.drop(columns=['file', 'suffix'], inplace=True)
vehicles_data['vin'] = vehicles_data['vin'].str.replace(pat='^\./data/', repl='', regex=True)
vehicles_data['loggerno'] = vehicles_data['loggerno'].str.lstrip('0').astype(int)
vehicles_data['created_at'] = pd.to_datetime(vehicles_data['created_at'], format='%Y%m')

# drop unnecesary columns
if vehicles_data['ts'].equals(vehicles_data['ts_sec']):
    vehicles_data.drop(columns=['ts_sec'], inplace=True)
vehicles_data.drop(columns=['gps_long', 'gps_lat', 'alt', 'steeringwheel_angle', 'highres', 'vin'], inplace=True)

vehicles_data.head(3)

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'

### Unpivoting

In [8]:
def translate_wheel_position(wheel_position):
    translated_wheel_position = ''
    if wheel_position[0] == 'f':
        translated_wheel_position = '1' + wheel_position[1]
    else:
        translated_wheel_position = '2' + wheel_position[1]
    return translated_wheel_position.upper()

In [9]:
df_task01 = vehicles_data.melt(
    id_vars=['loggerno', 'created_at', 'temp_outside', 'tachometer_km', 'speed', 'ts', 'ts_int'], 
    var_name='variable', 
    value_name='value')

df_task01[['parameter', 'wheelpos']] = df_task01['variable'].str.split('_', expand=True)
df_task01.drop('variable', axis=1, inplace=True)

df_task01['wheelpos'] = df_task01['wheelpos'].apply(translate_wheel_position)

# add the value column to the end of the DataFrame
value_column = df_task01.pop('value')
df_task01 = df_task01.assign(value=value_column)

df_task01.head(20)

Unnamed: 0,loggerno,created_at,temp_outside,tachometer_km,speed,ts,ts_int,parameter,wheelpos,value
0,8750076,2021-12-01,,14581,0,2021-12-01 05:16:11,1638335771000000,wheelspeed,2R,0
1,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:13,1638335773000000,wheelspeed,2R,0
2,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:14,1638335774000000,wheelspeed,2R,0
3,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:16,1638335776000000,wheelspeed,2R,0
4,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:17,1638335777000000,wheelspeed,2R,0
5,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:19,1638335779000000,wheelspeed,2R,0
6,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:20,1638335780000000,wheelspeed,2R,0
7,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:22,1638335782000000,wheelspeed,2R,0
8,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:23,1638335783000000,wheelspeed,2R,0
9,8750076,2021-12-01,3.0,14581,0,2021-12-01 05:16:25,1638335785000000,wheelspeed,2R,0


### Write data

In [10]:
selected_columns = [
    'loggerno',
    'tireid',
    'created_at',
    'temp_outside',
    'tachometer_km',
    'speed',
    'ts',
    'ts_int',
    'parameter',
    'wheelpos',
    'value'
]

df_task01 = pd.merge(left=df_task01, right=operational_data, how='inner', on=['loggerno', 'wheelpos'])

df_task01 = df_task01.loc[:, selected_columns]

display(df_task01.head(10))

df_task01.to_parquet(path='../data/task01.parquet')

Unnamed: 0,loggerno,tireid,created_at,temp_outside,tachometer_km,speed,ts,ts_int,parameter,wheelpos,value
0,8750076,173553,2021-12-01,,14581,0,2021-12-01 05:16:11,1638335771000000,wheelspeed,2R,0
1,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:13,1638335773000000,wheelspeed,2R,0
2,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:14,1638335774000000,wheelspeed,2R,0
3,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:16,1638335776000000,wheelspeed,2R,0
4,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:17,1638335777000000,wheelspeed,2R,0
5,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:19,1638335779000000,wheelspeed,2R,0
6,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:20,1638335780000000,wheelspeed,2R,0
7,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:22,1638335782000000,wheelspeed,2R,0
8,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:23,1638335783000000,wheelspeed,2R,0
9,8750076,173553,2021-12-01,3.0,14581,0,2021-12-01 05:16:25,1638335785000000,wheelspeed,2R,0
