In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
import matplotlib.pyplot as plt

In [None]:
normal_df = pd.read_csv('../data/alfa-romeo-normal-data.csv')
normal_df.head()

In [None]:
# Standardizing the data
scaler = StandardScaler()
scaled_normal_df = scaler.fit_transform(normal_df)

# Train Isolation Forest on normal data
iso_forest = IsolationForest(n_estimators=100, contamination='auto', random_state=42)
iso_forest.fit(scaled_normal_df)

In [None]:
# Save the scaler and model to disk
dump(scaler, '../models/scaler.joblib')
dump(iso_forest, '../models/iforest.joblib')

In [None]:
# Creating the DataFrame
faulty_df = pd.read_csv('../data/alfa-romeo-faulty-data.csv')
faulty_df.head()

In [None]:
# Load the scaler and model from disk
scaler = load('../models/scaler.joblib')
model = load('../models/iforest.joblib')

In [None]:
# Convert the 'Datetime' column to datetime
faulty_df['Datetime'] = pd.to_datetime(faulty_df['Datetime'])

# Store the 'Datetime' column in a separate variable before dropping it
datetime_col = faulty_df['Datetime']
faulty_df = faulty_df.drop(columns=['Datetime'])

scaled_faulty_df = scaler.fit_transform(faulty_df)

# Apply the model to the new data to predict anomalies
anomaly_scores = iso_forest.decision_function(scaled_faulty_df)
anomaly_labels = iso_forest.predict(scaled_faulty_df)

# Add a column to the faulty data to show anomalies
faulty_df['Anomaly_Score'] = anomaly_scores
faulty_df['Anomaly_Score_IFR_Norm'] = faulty_df['Anomaly_Score'] * 1000
faulty_df['Anomaly_Score_IPW_Norm'] = faulty_df['Anomaly_Score'] * 10
faulty_df['Anomaly_Score_IT_Norm'] = faulty_df['Anomaly_Score'] * 100
faulty_df['Anomaly_Label'] = anomaly_labels

# Add the 'Datetime' column back to the dataframe
faulty_df['Datetime'] = datetime_col

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(10,18))

# Plot for Anomaly Score and Injector Flow Rate
axs[0].plot(faulty_df['Datetime'], faulty_df['Anomaly_Score_IFR_Norm'], label='Anomaly Score (normalized)')
axs[0].plot(faulty_df['Datetime'], faulty_df['Injector_Flow_Rate'], label='Injector Flow Rate', linestyle='--')
axs[0].set_xlabel('Datetime')
axs[0].set_ylabel('Values')
axs[0].set_title('Anomaly Score and Injector Flow Rate over Datetime')
axs[0].legend()

# Plot for Injector Pulse Width
axs[1].plot(faulty_df['Datetime'], faulty_df['Anomaly_Score_IPW_Norm'], label='Anomaly Score (normalized)')
axs[1].plot(faulty_df['Datetime'], faulty_df['Injector_Pulse_Width'], label='Injector Pulse Width', linestyle='--')
axs[1].set_xlabel('Datetime')
axs[1].set_ylabel('Values')
axs[1].set_title('Anomaly Score and Injector Pulse Width over Datetime')
axs[1].legend()

# Plot for Injector Pulse Width
axs[2].plot(faulty_df['Datetime'], faulty_df['Anomaly_Score_IT_Norm'], label='Anomaly Score (normalized)')
axs[2].plot(faulty_df['Datetime'], faulty_df['Injector_Timing'], label='Injector Timing', linestyle='--')
axs[2].set_xlabel('Datetime')
axs[2].set_ylabel('Values')
axs[2].set_title('Anomaly Score and Injector Timing over Datetime')
axs[2].legend()

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.plot(faulty_df['Datetime'], faulty_df['Engine_RPM'], label='Engine RPM')
plt.xlabel('Datetime')
plt.ylabel('Values')
plt.title('Engine RPM over Datetime')
plt.legend()
plt.tight_layout()
plt.show()