# Import Libraries

In [1]:
import os
import pickle

import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split

import seaborn as sns

# Data Acquisition

In [2]:
data_gasoline = pd.read_csv("../data/gasoline_data_shifted.csv")
data_ethanol = pd.read_csv("../data/ethanol_data_shifted.csv")

In [3]:
data_gasoline.head()

Unnamed: 0,EngineLoad_(t-2),EngineLoad_(t-1),EngineLoad_(t),EngineRPM_(t-2),EngineRPM_(t-1),EngineRPM_(t),SpeedOBD_(t-2),SpeedOBD_(t-1),SpeedOBD_(t),ThrottlePosition_(t-2),ThrottlePosition_(t-1),ThrottlePosition_(t),TimingAdvance_(t-2),TimingAdvance_(t-1),TimingAdvance_(t),FuelType
0,34.12,42.35,34.12,831.5,878.0,878.0,0,3.0,3.0,14.51,14.12,14.51,2.0,2.0,1.5,1
1,42.35,34.12,28.63,878.0,878.0,864.0,3,3.0,5.0,14.12,14.51,14.51,2.0,1.5,0.0,1
2,34.12,28.63,27.45,878.0,864.0,864.0,3,5.0,3.0,14.51,14.51,13.73,1.5,0.0,6.0,1
3,28.63,27.45,26.27,864.0,864.0,812.0,5,3.0,0.0,14.51,13.73,13.33,0.0,6.0,3.5,1
4,27.45,26.27,26.27,864.0,812.0,780.0,3,0.0,2.0,13.73,13.33,16.86,6.0,3.5,16.5,1


In [4]:
data_gasoline.shape

(113317, 16)

In [5]:
data_ethanol.head()

Unnamed: 0,EngineLoad_(t-2),EngineLoad_(t-1),EngineLoad_(t),EngineRPM_(t-2),EngineRPM_(t-1),EngineRPM_(t),SpeedOBD_(t-2),SpeedOBD_(t-1),SpeedOBD_(t),ThrottlePosition_(t-2),ThrottlePosition_(t-1),ThrottlePosition_(t),TimingAdvance_(t-2),TimingAdvance_(t-1),TimingAdvance_(t),FuelType
0,36.08,36.08,26.67,931.5,810.5,767.0,5.0,0.0,0.0,14.51,13.33,13.33,15.0,-3.0,-3.0,0
1,36.08,26.67,27.45,810.5,767.0,767.0,0.0,0.0,0.0,13.33,13.33,13.33,-3.0,-3.0,3.0,0
2,26.67,27.45,27.45,767.0,767.0,734.5,0.0,0.0,0.0,13.33,13.33,13.73,-3.0,3.0,-1.0,0
3,27.45,27.45,47.84,767.0,734.5,1347.5,0.0,0.0,0.0,13.33,13.73,16.08,3.0,-1.0,19.5,0
4,27.45,47.84,36.08,734.5,1347.5,1347.5,0.0,0.0,0.0,13.73,16.08,16.08,-1.0,19.5,19.5,0


In [6]:
data_ethanol.shape

(92432, 16)

# New Dataset

In [7]:
fuel_data_shifted = pd.concat([
    data_gasoline,
    data_ethanol
], axis=0)

In [8]:
fuel_data_shifted.head()

Unnamed: 0,EngineLoad_(t-2),EngineLoad_(t-1),EngineLoad_(t),EngineRPM_(t-2),EngineRPM_(t-1),EngineRPM_(t),SpeedOBD_(t-2),SpeedOBD_(t-1),SpeedOBD_(t),ThrottlePosition_(t-2),ThrottlePosition_(t-1),ThrottlePosition_(t),TimingAdvance_(t-2),TimingAdvance_(t-1),TimingAdvance_(t),FuelType
0,34.12,42.35,34.12,831.5,878.0,878.0,0.0,3.0,3.0,14.51,14.12,14.51,2.0,2.0,1.5,1
1,42.35,34.12,28.63,878.0,878.0,864.0,3.0,3.0,5.0,14.12,14.51,14.51,2.0,1.5,0.0,1
2,34.12,28.63,27.45,878.0,864.0,864.0,3.0,5.0,3.0,14.51,14.51,13.73,1.5,0.0,6.0,1
3,28.63,27.45,26.27,864.0,864.0,812.0,5.0,3.0,0.0,14.51,13.73,13.33,0.0,6.0,3.5,1
4,27.45,26.27,26.27,864.0,812.0,780.0,3.0,0.0,2.0,13.73,13.33,16.86,6.0,3.5,16.5,1


In [9]:
fuel_data_shifted["FuelType"].value_counts(normalize=True)

FuelType
1    0.550754
0    0.449246
Name: proportion, dtype: float64

In [10]:
train_data, temp_data = train_test_split(fuel_data_shifted, test_size=0.2, random_state=42, stratify=fuel_data_shifted["FuelType"])
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42, stratify=temp_data["FuelType"])

In [11]:
train_data["FuelType"].value_counts(normalize=True)

FuelType
1    0.550751
0    0.449249
Name: proportion, dtype: float64

In [12]:
val_data["FuelType"].value_counts(normalize=True)

FuelType
1    0.550765
0    0.449235
Name: proportion, dtype: float64

In [13]:
test_data["FuelType"].value_counts(normalize=True)

FuelType
1    0.550765
0    0.449235
Name: proportion, dtype: float64

In [21]:
train_data.iloc[:, :-1:3].mean()

EngineLoad_(t-2)            28.804477
EngineRPM_(t-2)           1399.148364
SpeedOBD_(t-2)              32.155463
ThrottlePosition_(t-2)      17.957185
TimingAdvance_(t-2)          9.414875
dtype: float64

In [22]:
train_data.iloc[:, :-1:3].std()

EngineLoad_(t-2)           15.827171
EngineRPM_(t-2)           519.767668
SpeedOBD_(t-2)             28.234655
ThrottlePosition_(t-2)      8.784691
TimingAdvance_(t-2)        10.843215
dtype: float64

# Scalling

In [14]:
if not "../models/data-normalizer.pkl" in os.listdir():
    scaler_shifted = MinMaxScaler()
    normalized_train = scaler_shifted.fit_transform(train_data).astype(np.float32)

    with open("../models/data-normalizer.pkl", "wb") as f:
        pickle.dump(scaler_shifted, f)
else:
    with open("../models/data-normalizer.pkl", "rb") as f:
        scaler_shifted = pickle.load(f)

In [15]:
normalized_val = scaler_shifted.transform(val_data).astype(np.float32)
normalized_test = scaler_shifted.transform(test_data).astype(np.float32)

normalized_train = scaler_shifted.transform(train_data).astype(np.float32)

# Saving

In [16]:
pd.DataFrame(
    normalized_train,
    columns=train_data.columns
).to_csv("../data/normalized_train.csv", index=False)

normalized_train.shape

(164599, 16)

In [17]:
pd.DataFrame(
    normalized_val,
    columns=val_data.columns
).to_csv("../data/normalized_val.csv", index=False)

normalized_val.shape

(20575, 16)

In [18]:
pd.DataFrame(
    normalized_test,
    columns=test_data.columns
).to_csv("../data/normalized_test.csv", index=False)

normalized_test.shape

(20575, 16)

In [None]:
with open("../data/scaler_output.txt", 'w') as f:
    f.write(f"Min: {scaler_shifted.data_min_}\n")
    f.write(f"Max: {scaler_shifted.data_max_}\n")
    f.write(f"Interval: {scaler_shifted.data_range_}\n")
    f.write(f"Scale: {scaler_shifted.scale_}\n")

In [None]:
import pandas as pd

In [5]:
pd.read_csv("../data/normalized_test.csv").iloc[:500].to_csv("./normalized_test_freematics.csv", index=False)

In [4]:
pd.read_csv("../data/df_test.csv").iloc[:500].to_csv("./df_test_freematics.csv", index=False)