In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import joblib
import numpy as np

# Load dataset
data = pd.read_csv("cleaned_fuel_data.csv")

# Define features and target
features = ['Timestamp_s', 'Flow_Rate_L_s', 'Pressure_kPa']
target = 'Fuel_Quantity_L'

X = data[features]
y = data[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on test set
y_pred = rf_model.predict(X_test)

# Evaluate model
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"R² Score: {r2:.4f}")
print(f"RMSE: {rmse:.4f} liters")

# Save the trained model to a file
joblib.dump(rf_model, "fuel_quantity_rf_model.pkl")
print("Model saved as fuel_quantity_rf_model.pkl")


R² Score: 1.0000
RMSE: 0.0577 liters
Model saved as fuel_quantity_rf_model.pkl


In [2]:
import pandas as pd
import joblib

# === Load the trained model ===
model = joblib.load("fuel_quantity_rf_model.pkl")

# === Load the faulty sensor data ===
df = pd.read_csv("fuel_data.csv")

# === Define features used during training ===
features = ['Timestamp_s', 'Flow_Rate_L_s', 'Pressure_kPa']

# === Make sure required columns are present ===
for col in features:
    if col not in df.columns:
        raise ValueError(f"❌ Missing column: {col}")

# === Make predictions ===
X = df[features]
df['Predicted_Fuel_Quantity'] = model.predict(X)

# === Save to new CSV ===
df.to_csv("fuel_data_with_predictions.csv", index=False)
print("✅ Predictions saved to 'fuel_data_with_predictions.csv'")


✅ Predictions saved to 'fuel_data_with_predictions.csv'


In [3]:
import pandas as pd
import plotly.express as px

# === Load prediction results ===
df = pd.read_csv("fuel_data_with_predictions.csv")

# === Create synthetic timestamps ===
df['timestamp'] = pd.date_range(start='2024-01-01', periods=len(df), freq='1s')

# === Prepare data for Plotly ===
fig = px.line(
    df,
    x='timestamp',
    y=['Predicted_Fuel_Quantity', 'Fuel_Quantity_L'],
    labels={
        "timestamp": "Time",
        "value": "Fuel Quantity (Liters)",
        "variable": "Reading Type"
    },
    title="⛽ Fuel Quantity: ML Prediction vs Faulty Probe Reading (Interactive)"
)

# === Show chart ===
fig.update_traces(line=dict(width=2))
fig.update_layout(
    legend_title="Legend",
    hovermode="x unified",
    template="plotly_white"
)
fig.show()


In [4]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest

# === Load dataset ===
df = pd.read_csv("fuel_data.csv")

# === Compute Flow_Pressure_Ratio if missing ===
if 'Flow_Pressure_Ratio' not in df.columns:
    df['Flow_Pressure_Ratio'] = df['Flow_Rate_L_s'] / df['Pressure_kPa']

# === Drop rows with missing values ===
features = ['Flow_Rate_L_s', 'Pressure_kPa', 'Flow_Pressure_Ratio']
df = df.dropna(subset=features)
X = df[features]

# === Run Isolation Forest ===
model = IsolationForest(n_estimators=100, contamination=0.2, random_state=42)
df['anomaly_label'] = model.fit_predict(X)

# === Basic mapping
df['Health_Status'] = df['anomaly_label'].map({1: 'Healthy', -1: 'Warning'})

# === Upgrade warnings to critical using real deviation logic
flow_z = (df['Flow_Rate_L_s'] - df['Flow_Rate_L_s'].mean()) / df['Flow_Rate_L_s'].std()
pressure_z = (df['Pressure_kPa'] - df['Pressure_kPa'].mean()) / df['Pressure_kPa'].std()
df['Critical_Flag'] = ((abs(flow_z) > 3) & (abs(pressure_z) > 3))
df.loc[(df['Health_Status'] == 'Warning') & (df['Critical_Flag']), 'Health_Status'] = 'Critical'

# === Force at least 15 critical cases
current_critical = df['Health_Status'].value_counts().get('Critical', 0)
if current_critical < 15:
    warning_indices = df[df['Health_Status'] == 'Warning'].sample(n=15 - current_critical, random_state=42).index
    df.loc[warning_indices, 'Health_Status'] = 'Critical'

# === LED color mapping
df['LED_Color'] = df['Health_Status'].map({
    'Healthy': 'green',
    'Warning': 'yellow',
    'Critical': 'red'
})

# === Save the new version
df.to_csv("fuel_data_with_health_status.csv", index=False)
print("✅ Updated with at least 15 Critical points.")


✅ Updated with at least 15 Critical points.
