In [1]:
# All libraries required
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [2]:
# Load in the data
files = ['opel_corsa_01.csv', 'opel_corsa_02.csv', 'peugeot_207_01.csv', 'peugeot_207_02.csv']

all_data = pd.DataFrame()
for file in files:
    # Read in each file
    car_data = pd.read_csv("./resources/"+file, sep=";")

    # Remove unrelated columns
    car_data = car_data.drop(columns=['Unnamed: 0'])

    # Concatenate the data together
    all_data = pd.concat([all_data, car_data], ignore_index=True, sort=False)

all_data

Unnamed: 0,AltitudeVariation,VehicleSpeedInstantaneous,VehicleSpeedAverage,VehicleSpeedVariance,VehicleSpeedVariation,LongitudinalAcceleration,EngineLoad,EngineCoolantTemperature,ManifoldAbsolutePressure,EngineRPM,MassAirFlow,IntakeAirTemperature,VerticalAcceleration,FuelConsumptionAverage,roadSurface,traffic,drivingStyle
0,-2.299988,25.670519,13.223501,121.592690,-2.476980,0.3555,4.705883,68.0,106.0,1796.0,15.810000,24.0,-0.1133,19.497335,SmoothCondition,LowCongestionCondition,EvenPaceStyle
1,-2.099976,24.094259,13.638919,120.422571,-1.576260,0.4492,10.588236,68.0,103.0,1689.0,14.650000,22.0,-0.1289,19.515722,SmoothCondition,LowCongestionCondition,EvenPaceStyle
2,-1.500000,22.743179,14.031043,118.456769,-1.351080,0.4258,27.450981,68.0,103.0,1599.0,11.850000,21.0,-0.1328,19.441765,SmoothCondition,LowCongestionCondition,EvenPaceStyle
3,0.100037,22.292820,14.171073,117.571308,-0.450359,0.4140,24.313726,69.0,104.0,1620.0,12.210000,20.0,-0.0859,19.388769,SmoothCondition,LowCongestionCondition,EvenPaceStyle
4,0.099976,23.643900,14.328954,117.074149,1.351080,0.3945,20.000000,69.0,104.0,1708.0,11.910000,21.0,-0.0664,19.301638,SmoothCondition,LowCongestionCondition,EvenPaceStyle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23770,1.000000,28.799999,28.559999,57.190571,3.600000,-0.0292,25.882353,81.0,115.0,1755.5,20.469999,25.0,-0.1661,14.578003,SmoothCondition,LowCongestionCondition,EvenPaceStyle
23771,1.699997,30.599998,28.529999,57.010266,1.799999,-0.0304,11.764706,81.0,106.0,736.5,17.740000,25.0,-0.1987,14.585642,SmoothCondition,LowCongestionCondition,EvenPaceStyle
23772,1.800003,29.699999,28.499999,56.883045,-0.900000,-0.1684,98.039215,81.0,106.0,1254.0,9.520000,24.0,-0.1156,14.547294,SmoothCondition,LowCongestionCondition,EvenPaceStyle
23773,2.100006,29.699999,28.409999,56.160910,0.000000,-0.0644,79.607841,80.0,112.0,1254.0,14.910000,23.0,-0.0760,14.546828,SmoothCondition,LowCongestionCondition,EvenPaceStyle


In [3]:
# Determine X (input) and y (output) - y in this case, will be drivingStyle

# Drop any values that have NaN
all_data = all_data.dropna()

x_data = all_data.copy().drop(columns=['roadSurface', 'traffic', 'drivingStyle'])
y_data = all_data[['drivingStyle']]
print(x_data.head())
print(y_data.head())

   AltitudeVariation  VehicleSpeedInstantaneous  VehicleSpeedAverage  \
0          -2.299988                  25.670519            13.223501   
1          -2.099976                  24.094259            13.638919   
2          -1.500000                  22.743179            14.031043   
3           0.100037                  22.292820            14.171073   
4           0.099976                  23.643900            14.328954   

   VehicleSpeedVariance  VehicleSpeedVariation  LongitudinalAcceleration  \
0            121.592690              -2.476980                    0.3555   
1            120.422571              -1.576260                    0.4492   
2            118.456769              -1.351080                    0.4258   
3            117.571308              -0.450359                    0.4140   
4            117.074149               1.351080                    0.3945   

   EngineLoad  EngineCoolantTemperature  ManifoldAbsolutePressure  EngineRPM  \
0    4.705883                 

In [4]:
# Pre-processing

# Normalise values
x_data = x_data.to_numpy()
x_data = preprocessing.normalize(x_data)

# Label Encoder
le_driving_style = preprocessing.LabelEncoder()
le_driving_style.fit(y_data)
y_data = le_driving_style.transform(y_data)
y_data

{'classes_': array(['AggressiveStyle', 'EvenPaceStyle'], dtype=object)}


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


array([1, 1, 1, ..., 1, 1, 1])

In [5]:
# Split into different data sets
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data)
X_train

array([[-3.21467973e-03,  1.60733714e-02,  1.93416229e-02, ...,
         4.52435640e-02, -1.53720962e-03,  1.63354058e-02],
       [ 5.82369228e-04,  1.02287549e-02,  7.18635684e-03, ...,
         1.74710768e-02,  2.09652922e-04,  1.92563576e-02],
       [ 3.63988162e-03,  5.24562257e-02,  5.74481532e-02, ...,
         6.23979706e-03,  7.69574971e-05,  5.54359488e-03],
       ...,
       [-3.52125826e-03,  5.54599063e-02,  5.40639767e-02, ...,
         4.65309888e-02, -1.79735376e-03,  1.10321464e-02],
       [ 3.29199756e-03,  4.16393113e-02,  3.89234132e-02, ...,
         2.13534933e-02, -6.22854708e-04,  5.08908105e-03],
       [ 3.35758801e-03,  4.89887349e-02,  4.82706471e-02, ...,
         8.27899796e-03,  7.87930634e-03,  5.90643053e-03]])

----
# Algorithm 1: Logistic Regression