## Automotive Diagnostics

##### Import libraries

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


### Data Exploration

In [2]:
file_path = "OBD-II-Dataset/2017-07-05_Seat_Leon_RT_S_Stau.csv"
data1 = pd.read_csv(file_path)
# data1 = pd.read_csv("OBD-II-Dataset/annotated.csv")
data1.head(11)

Unnamed: 0,Time,Engine Coolant Temperature [Â°C],Intake Manifold Absolute Pressure [kPa],Engine RPM [RPM],Vehicle Speed Sensor [km/h],Intake Air Temperature [Â°C],Air Flow Rate from Mass Flow Sensor [g/s],Absolute Throttle Position [%],Ambient Air Temperature [Â°C],Accelerator Pedal Position D [%],Accelerator Pedal Position E [%]
0,07:16:30.444,31,,,,,,,,,
1,07:16:30.535,31,96.0,,,,,,,,
2,07:16:30.625,31,96.0,0.0,,,,,,,
3,07:16:30.716,31,96.0,0.0,0.0,,,,,,
4,07:16:30.814,31,96.0,0.0,0.0,22.0,,,,,
5,07:16:30.895,31,96.0,0.0,0.0,22.0,0.91,,,,
6,07:16:30.987,31,96.0,0.0,0.0,22.0,0.91,89.0,,,
7,07:16:31.075,31,96.0,0.0,0.0,22.0,0.91,89.0,21.0,,
8,07:16:31.167,31,96.0,0.0,0.0,22.0,0.91,89.0,21.0,14.1,
9,07:16:31.257,31,96.0,0.0,0.0,22.0,0.91,89.0,21.0,14.1,14.5


In [5]:
file_path2 = "OBD-II-Dataset/2017-07-07_Seat_Leon_RT_S_Normal.csv"
data2 = pd.read_csv(file_path2)
data2.head(11)

Unnamed: 0,Time,Engine Coolant Temperature [Â°C],Intake Manifold Absolute Pressure [kPa],Engine RPM [RPM],Vehicle Speed Sensor [km/h],Intake Air Temperature [Â°C],Air Flow Rate from Mass Flow Sensor [g/s],Absolute Throttle Position [%],Ambient Air Temperature [Â°C],Accelerator Pedal Position D [%],Accelerator Pedal Position E [%]
0,07:23:12.009,32,,,,,,,,,
1,07:23:12.099,32,96.0,,,,,,,,
2,07:23:12.189,32,96.0,0.0,,,,,,,
3,07:23:12.279,32,96.0,0.0,0.0,,,,,,
4,07:23:12.369,32,96.0,0.0,0.0,23.0,,,,,
5,07:23:12.459,32,96.0,0.0,0.0,23.0,0.91,,,,
6,07:23:12.549,32,96.0,0.0,0.0,23.0,0.91,89.0,,,
7,07:23:12.639,32,96.0,0.0,0.0,23.0,0.91,89.0,22.0,,
8,07:23:12.729,32,96.0,0.0,0.0,23.0,0.91,89.0,22.0,14.1,
9,07:23:12.819,32,96.0,0.0,0.0,23.0,0.91,89.0,22.0,14.1,14.5


In [7]:
# file_path3 = "OBD-II-Dataset/2017-07-05_Seat_Leon_RT_S_Frei.csv"
file_path3 = "OBD-II-Dataset/2017-07-11_Seat_Leon_S_RT_Frei.csv"
data3 = pd.read_csv(file_path3)
data3.head(11)

Unnamed: 0,Time,Engine Coolant Temperature [Â°C],Intake Manifold Absolute Pressure [kPa],Engine RPM [RPM],Vehicle Speed Sensor [km/h],Intake Air Temperature [Â°C],Air Flow Rate from Mass Flow Sensor [g/s],Absolute Throttle Position [%],Ambient Air Temperature [Â°C],Accelerator Pedal Position D [%],Accelerator Pedal Position E [%]
0,18:34:06.275,54,,,,,,,,,
1,18:34:06.365,54,99.0,,,,,,,,
2,18:34:06.457,54,99.0,769.0,,,,,,,
3,18:34:06.548,54,99.0,769.0,0.0,,,,,,
4,18:34:06.636,54,99.0,769.0,0.0,41.0,,,,,
5,18:34:06.726,54,99.0,769.0,0.0,41.0,11.91,,,,
6,18:34:06.816,54,99.0,769.0,0.0,41.0,11.91,83.1,,,
7,18:34:06.880,54,99.0,769.0,0.0,41.0,11.91,83.1,23.0,,
8,18:34:06.967,54,99.0,769.0,0.0,41.0,11.91,83.1,23.0,14.1,
9,18:34:07.057,54,99.0,769.0,0.0,41.0,11.91,83.1,23.0,14.1,14.5


In [None]:
# data2 = pd.read_csv("OBD-II-Dataset/2017-07-05_Seat_Leon_S_KA_Normal.csv")
# # data1 = pd.read_csv("OBD-II-Dataset/annotated.csv")
# data2.head()

In [None]:
# Show number of columns (features)
data1.info()

In [None]:
data1.isnull().sum()

In [None]:
data1.isnull().sum().sum()

In [None]:
# Set time index
data1["Time"] = pd.to_datetime(data1["Time"])
data1 = data1.set_index("Time")

#### Visualise each column in the dataset

In [None]:
data1.plot(subplots=True, figsize=(14,25))
plt.show()

In [None]:
print(data1.count())

In [None]:
print(f"the number of rows in the data is: ", len(data1))

In [None]:
data1.describe()

In [None]:
# Drop any rows with missing values
# data1.dropna(inplace=True)

# Removing data points with missing value(s)
data1 = data1.iloc[10:]
data1.head()

In [None]:
## Renaming the column names
data1.columns = ['Engine_Coolant_Temperature', 'Intake_Manifold_Abs_Pressure', 'Engine_RPM', 'Vehicle_Speed', 'Intake_Air_Temperature', 'AirFlow_Rate', 'Throttle_Position', 'Air_Temperature', 'Acc_Pedal_Pos_D', 'Acc_Pedal_Pos_E']

In [None]:
plt.figure(figsize=(15,5))
sns.heatmap(data1.iloc[:,0:].corr(), annot=True)

#### Assessing Engine Load

In [None]:
# select relevant columns for analysis (engine RPM, throttle position, airflow rate)
selected_columns = ['Engine_RPM', 'Throttle_Position', 'AirFlow_Rate']
engine_data = data1[selected_columns]

# Plot engine RPM vs throttle position
plt.figure(figsize=(10, 6))
plt.scatter(engine_data['Throttle_Position'], engine_data['Engine_RPM'], c=engine_data['AirFlow_Rate'], cmap='viridis', alpha=0.5)
plt.colorbar(label='AirFlow Rate')
plt.xlabel('Throttle Position')
plt.ylabel('Engine RPM')
plt.title('Engine RPM vs Throttle Position')
plt.grid(True)
plt.show()


# Plot engine RPM vs airflow rate
plt.figure(figsize=(10, 6))
plt.scatter(engine_data['AirFlow_Rate'], engine_data['Engine_RPM'], c=engine_data['Throttle_Position'], cmap='viridis', alpha=0.5)
plt.colorbar(label='Throttle Position')
plt.xlabel('Airflow Rate')
plt.ylabel('Engine RPM')
plt.title('Engine RPM vs Airflow Rate')
plt.grid(True)
plt.show()

In [None]:
# Check features that affect the speed

### Fault detection

In [None]:
# Define thresholds or ranges for each feature to detect anomalies
thresholds = {
    'Engine_Coolant_Temperature': (80, 100),  # Example: normal range is 80-100°C
    'Intake_Manifold_Abs_Pressure': (80, 120),  # Example: normal range is 80-120 kPa
    'Engine_RPM': (1000, 6000),  # Example: normal range is 1000-6000 RPM
    'Intake_Air_Temperature': (20, 40),  # Example: normal range is 20-40°C
    'AirFlow_Rate': (200, 500),  # Example: normal range is 200-500 g/s
    'Throttle_Position': (0, 100),  # Example: normal range is 0-100%
    'Air_Temperature': (-10, 40),  # Example: normal range is -10-40°C
    'Acc_Pedal_Pos_D': (0, 100),  # Example: normal range is 0-100%
    'Acc_Pedal_Pos_E': (0, 100)  # Example: normal range is 0-100%
    
}

# Perform diagnosis for each feature
diagnosis_results = {}
for feature, (min_val, max_val) in thresholds.items():
    # Check for values outside the normal range
    anomalies = data1[(data1[feature] < min_val) | (data1[feature] > max_val)]
    if not anomalies.empty:
        diagnosis_results[feature] = anomalies

# Print diagnosis results
for feature, anomalies in diagnosis_results.items():
    print(f"Anomalies detected for {feature}:")
    print(anomalies)
    print()

In [None]:

# Define thresholds or ranges for each feature to detect anomalies
thresholds = {
    'Engine_Coolant_Temperature': (80, 100),  # Example: normal range is 80-100°C
    'Intake_Manifold_Abs_Pressure': (80, 120),  # Example: normal range is 80-120 kPa
    'Engine_RPM': (1000, 6000),  # Example: normal range is 1000-6000 RPM
    'Intake_Air_Temperature': (20, 40),  # Example: normal range is 20-40°C
    'AirFlow_Rate': (200, 500),  # Example: normal range is 200-500 g/s
    'Throttle_Position': (0, 100),  # Example: normal range is 0-100%
    'Air_Temperature': (-10, 40),  # Example: normal range is -10-40°C
    'Acc_Pedal_Pos_D': (0, 100),  # Example: normal range is 0-100%
    'Acc_Pedal_Pos_E': (0, 100)  # Example: normal range is 0-100%
}

# Visualize anomalies for each feature
for feature, (min_val, max_val) in thresholds.items():
    # Check for values outside the normal range
    anomalies = data1[(data1[feature] < min_val) | (data1[feature] > max_val)]
   
    # Plot feature distribution with anomalies highlighted
    plt.figure(figsize=(10, 6))
    plt.hist(data1[feature], bins=20, color='blue', alpha=0.7, label='Normal Range')
    plt.hist(anomalies[feature], bins=20, color='red', alpha=0.7, label='Anomalies')
    plt.xlabel(feature)
    plt.ylabel('Frequency')
    plt.title(f'{feature} Distribution with Anomalies')
    plt.legend()
    plt.grid(True)
    plt.show()

### Driver Behaviour Analysis

In [None]:
# Select relevant columns
relevant_cols = ['Vehicle_Speed', 'Engine_RPM', 'Acc_Pedal_Pos_D', 'Acc_Pedal_Pos_E']
df = data1[relevant_cols]


# Plotting accelerator pedal position D against vehicle speed and engine rpm
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(df['Acc_Pedal_Pos_D'], df['Vehicle_Speed'], alpha=0.5)
plt.title('Accelerator Position D vs Vehicle Speed')
plt.xlabel('Accelerator Position D')
plt.ylabel('Vehicle Speed')

plt.subplot(1, 2, 2)
plt.scatter(df['Acc_Pedal_Pos_D'], df['Engine_RPM'], alpha=0.5)
plt.title('Accelerator Position D vs Engine RPM')
plt.xlabel('Accelerator Position D')
plt.ylabel('Engine RPM')

plt.tight_layout()
plt.show()

# Plotting accelerator pedal position E against vehicle speed and engine rpm
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(df['Acc_Pedal_Pos_E'], df['Vehicle_Speed'], alpha=0.5)
plt.title('Accelerator Position E vs Vehicle Speed')
plt.xlabel('Accelerator Position E')
plt.ylabel('Vehicle Speed')

plt.subplot(1, 2, 2)
plt.scatter(df['Acc_Pedal_Pos_E'], df['Engine_RPM'], alpha=0.5)
plt.title('Accelerator Position E vs Engine RPM')
plt.xlabel('Accelerator Position E')
plt.ylabel('Engine RPM')


plt.tight_layout()
plt.show()


In [None]:
# Plotting vehicle speed against RPM
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(df['Vehicle_Speed'], df['Engine_RPM'], alpha=0.5)
plt.title('Vehicle Speed vs Engine RPM')
plt.xlabel('Vehicle Speed')
plt.ylabel('Engine RPM')

### One-way ANOVA 
##### To compare speeds among 3 conditions (Normal, Free, and Busy)