## Automotive OBD-II-Dataset
```
Preparing the data for using in the deployment
``` 

In [16]:
# import required libraries
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [17]:
# load the data
obd_data = pd.read_csv("OBD-II-Dataset\\2017-07-05_Seat_Leon_RT_S_Stau.csv")
obd_data.head()

Unnamed: 0,Time,Engine Coolant Temperature [Â°C],Intake Manifold Absolute Pressure [kPa],Engine RPM [RPM],Vehicle Speed Sensor [km/h],Intake Air Temperature [Â°C],Air Flow Rate from Mass Flow Sensor [g/s],Absolute Throttle Position [%],Ambient Air Temperature [Â°C],Accelerator Pedal Position D [%],Accelerator Pedal Position E [%]
0,07:16:30.444,31,,,,,,,,,
1,07:16:30.535,31,96.0,,,,,,,,
2,07:16:30.625,31,96.0,0.0,,,,,,,
3,07:16:30.716,31,96.0,0.0,0.0,,,,,,
4,07:16:30.814,31,96.0,0.0,0.0,22.0,,,,,


In [18]:
# drop nulls
obd_data.dropna(axis=0, inplace=True)

In [19]:
# check nulls
obd_data.isna().sum()

Time                                         0
Engine Coolant Temperature [Â°C]             0
Intake Manifold Absolute Pressure [kPa]      0
Engine RPM [RPM]                             0
Vehicle Speed Sensor [km/h]                  0
Intake Air Temperature [Â°C]                 0
Air Flow Rate from Mass Flow Sensor [g/s]    0
Absolute Throttle Position [%]               0
Ambient Air Temperature [Â°C]                0
Accelerator Pedal Position D [%]             0
Accelerator Pedal Position E [%]             0
dtype: int64

In [20]:
# select our features
features = [
    "Engine Coolant Temperature [Â°C]",
    "Intake Manifold Absolute Pressure [kPa]",
    "Engine RPM [RPM]",
    "Vehicle Speed Sensor [km/h]",
    "Intake Air Temperature [Â°C]",
    "Air Flow Rate from Mass Flow Sensor [g/s]",
    "Absolute Throttle Position [%]",
    "Ambient Air Temperature [Â°C]",
]


df = obd_data[features]

In [21]:
df.head()

Unnamed: 0,Engine Coolant Temperature [Â°C],Intake Manifold Absolute Pressure [kPa],Engine RPM [RPM],Vehicle Speed Sensor [km/h],Intake Air Temperature [Â°C],Air Flow Rate from Mass Flow Sensor [g/s],Absolute Throttle Position [%],Ambient Air Temperature [Â°C]
9,31,96.0,0.0,0.0,22.0,0.91,89.0,21.0
10,31,96.0,0.0,0.0,22.0,0.91,89.0,21.0
11,31,96.0,0.0,0.0,22.0,0.91,89.0,21.0
12,31,96.0,0.0,0.0,22.0,0.91,89.0,21.0
13,31,96.0,0.0,0.0,22.0,0.91,89.0,21.0


In [22]:
df.describe()

Unnamed: 0,Engine Coolant Temperature [Â°C],Intake Manifold Absolute Pressure [kPa],Engine RPM [RPM],Vehicle Speed Sensor [km/h],Intake Air Temperature [Â°C],Air Flow Rate from Mass Flow Sensor [g/s],Absolute Throttle Position [%],Ambient Air Temperature [Â°C]
count,46340.0,46340.0,46340.0,46340.0,46340.0,46340.0,46340.0,46340.0
mean,84.916487,117.676306,1378.366055,49.577039,29.205762,18.009276,82.740356,19.512775
std,14.479516,24.174292,442.372368,37.189228,13.141223,11.244804,6.953171,1.666187
min,29.0,36.0,0.0,0.0,-7.0,0.0,13.7,16.0
25%,90.0,103.0,1054.0,17.0,23.0,9.91,83.5,18.0
50%,90.0,109.0,1443.0,42.0,28.0,15.52,83.5,20.0
75%,91.0,123.0,1689.0,83.0,32.0,22.63,83.5,21.0
max,94.0,231.0,2689.0,126.0,146.0,88.8,89.0,23.0


In [23]:
# convert df to numpy array

data = df.to_numpy()

In [24]:
data[:5]

array([[31.  , 96.  ,  0.  ,  0.  , 22.  ,  0.91, 89.  , 21.  ],
       [31.  , 96.  ,  0.  ,  0.  , 22.  ,  0.91, 89.  , 21.  ],
       [31.  , 96.  ,  0.  ,  0.  , 22.  ,  0.91, 89.  , 21.  ],
       [31.  , 96.  ,  0.  ,  0.  , 22.  ,  0.91, 89.  , 21.  ],
       [31.  , 96.  ,  0.  ,  0.  , 22.  ,  0.91, 89.  , 21.  ]])

In [25]:
# scale the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

data_scaled[:5]

array([[0.03076923, 0.30769231, 0.        , 0.        , 0.18954248,
        0.01024775, 1.        , 0.71428571],
       [0.03076923, 0.30769231, 0.        , 0.        , 0.18954248,
        0.01024775, 1.        , 0.71428571],
       [0.03076923, 0.30769231, 0.        , 0.        , 0.18954248,
        0.01024775, 1.        , 0.71428571],
       [0.03076923, 0.30769231, 0.        , 0.        , 0.18954248,
        0.01024775, 1.        , 0.71428571],
       [0.03076923, 0.30769231, 0.        , 0.        , 0.18954248,
        0.01024775, 1.        , 0.71428571]])

In [26]:
data_scaled.shape

(46340, 8)

In [27]:
data_scaled[0].shape

(8,)

In [29]:
# Save the scaler
file_name = "obd_scaler.pkl"
pickle.dump(scaler, open(file_name, "wb"))