In [1]:
import numpy as np
import pandas as pd
import random
from datetime import datetime, timedelta

# Settings
num_records = 10000
num_engines = 10

# Start time
start_time = datetime.now()

data = []

for i in range(num_records):
    engine_id = random.randint(1, num_engines)

    # Generate a random timestamp
    timestamp = np.random.normal(150, 20)

    # Simulate sensor readings
    rpm = np.random.normal(1500, 200)
    oil_pressure = np.random.normal(60, 5)
    oil_temp = np.random.normal(90, 5)
    fuel_pressure = np.random.normal(45, 4)
    coolant_temp = np.random.normal(85, 3)
    vibration_level = np.random.normal(2, 0.5)
    exhaust_gas_temp = np.random.normal(450, 30)
    engine_load = np.random.normal(75, 10)
    ambient_temp = np.random.normal(25, 5)
    humidity = np.random.normal(50, 10)
    altitude = np.random.normal(300, 100)
    hours_since_maintenance = np.random.randint(0, 500)

    # Determine failure stage
    if (
        oil_pressure < 45 or oil_temp > 110 or vibration_level > 5 or
        exhaust_gas_temp > 550 or rpm > 1900
    ):
        failure_stage = "critical"
    elif (
        oil_pressure < 55 or oil_temp > 100 or vibration_level > 3.5 or
        exhaust_gas_temp > 500 or rpm > 1700
    ):
        failure_stage = "warning"
    else:
        failure_stage = "normal"

    data.append([
        timestamp, engine_id, rpm, oil_pressure, oil_temp, fuel_pressure,
        coolant_temp, vibration_level, exhaust_gas_temp, engine_load,
        ambient_temp, humidity, altitude, hours_since_maintenance, failure_stage
    ])

# Create DataFrame
columns = [
    "timestamp", "engine_id", "rpm", "oil_pressure", "oil_temp", "fuel_pressure",
    "coolant_temp", "vibration_level", "exhaust_gas_temp", "engine_load",
    "ambient_temp", "humidity", "altitude", "hours_since_maintenance", "failure_stage"
]

df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv("synthetic_engine_failure_data.csv", index=False)
print("✅ Dataset generated with 'failure_stage' (normal, warning, critical)")




In [2]:
# Install necessary libraries
!pip install google-cloud-storage google-cloud-automl pandas scikit-learn




In [3]:
import pandas as pd

# Load your synthetic dataset (make sure it's uploaded to Colab or you can use Google Drive)
df = pd.read_csv('/content/synthetic_engine_failure_data.csv')
df.head()


Unnamed: 0,timestamp,engine_id,rpm,oil_pressure,oil_temp,fuel_pressure,coolant_temp,vibration_level,exhaust_gas_temp,engine_load,ambient_temp,humidity,altitude,hours_since_maintenance,failure_stage
0,144.01942,2,1310.700882,60.569928,83.843533,47.47248,83.707196,1.761181,467.100645,73.840074,21.16908,45.610068,119.625121,383,normal
1,172.42651,6,1665.317723,58.310903,92.428887,46.840408,80.504415,3.007184,429.719906,75.677189,26.387015,45.420377,432.627167,495,normal
2,159.917424,6,1801.86933,68.435872,88.206086,47.153639,80.906524,2.04806,425.582544,75.239509,41.450699,50.309952,296.295568,122,warning
3,173.992073,8,1338.836419,64.488183,93.714328,44.695445,85.328474,2.270133,439.692722,53.151742,26.199079,58.661522,210.001264,410,normal
4,143.631596,3,1465.922752,55.207949,92.151897,47.445636,83.746418,0.833563,411.961325,69.197511,24.022396,61.239598,307.296062,281,normal


In [4]:
import pandas as pd


# Identify numerical and categorical columns
numerical_columns = df.select_dtypes(include=['number']).columns
categorical_columns = df.select_dtypes(include=['object']).columns

# Fill missing values for numerical columns with the mean
df[numerical_columns] = df[numerical_columns].fillna(df[numerical_columns].mean())

# Fill missing values for categorical columns with the mode (most frequent value)
for col in categorical_columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Now the dataset should be cleaned and ready for further processing
print(df.head())


    timestamp  engine_id          rpm  oil_pressure   oil_temp  fuel_pressure  \
0  144.019420          2  1310.700882     60.569928  83.843533      47.472480   
1  172.426510          6  1665.317723     58.310903  92.428887      46.840408   
2  159.917424          6  1801.869330     68.435872  88.206086      47.153639   
3  173.992073          8  1338.836419     64.488183  93.714328      44.695445   
4  143.631596          3  1465.922752     55.207949  92.151897      47.445636   

   coolant_temp  vibration_level  exhaust_gas_temp  engine_load  ambient_temp  \
0     83.707196         1.761181        467.100645    73.840074     21.169080   
1     80.504415         3.007184        429.719906    75.677189     26.387015   
2     80.906524         2.048060        425.582544    75.239509     41.450699   
3     85.328474         2.270133        439.692722    53.151742     26.199079   
4     83.746418         0.833563        411.961325    69.197511     24.022396   

    humidity    altitude  

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)


In [5]:
from sklearn.preprocessing import StandardScaler

# Scale numerical features
scaler = StandardScaler()
numerical_columns = ['timestamp','engine_id','oil_pressure', 'engine_load', 'vibration_level', 'humidity', 'coolant_temp','rpm','fuel_pressure','exhaust_gas_temp','ambient_temp','hours_since_maintenance','altitude']  # Example numerical features

df[numerical_columns] = scaler.fit_transform(df[numerical_columns])


In [6]:
print(df.columns.tolist())



['timestamp', 'engine_id', 'rpm', 'oil_pressure', 'oil_temp', 'fuel_pressure', 'coolant_temp', 'vibration_level', 'exhaust_gas_temp', 'engine_load', 'ambient_temp', 'humidity', 'altitude', 'hours_since_maintenance', 'failure_stage']


In [7]:
X = df.drop(columns=['failure_stage'])
y = df['failure_stage']
print(X)

      timestamp  engine_id       rpm  oil_pressure   oil_temp  fuel_pressure  \
0     -0.297207  -1.232005 -0.943136      0.112180  83.843533       0.613862   
1      1.118465   0.170870  0.819886     -0.341538  92.428887       0.456540   
2      0.495072   0.170870  1.498769      1.692027  88.206086       0.534503   
3      1.196485   0.872308 -0.803257      0.899147  93.714328      -0.077339   
4     -0.316534  -0.881286 -0.171431     -0.964755  92.151897       0.607180   
...         ...        ...       ...           ...        ...            ...   
9995   1.623024   1.223027  0.639976     -0.578211  94.539340      -0.678447   
9996  -0.914962   0.521589 -1.064264      0.130025  86.322522       0.557918   
9997   0.719512   1.573746  0.039241      0.085345  89.798168      -1.383561   
9998   0.478841  -1.582724 -3.364142      1.848113  85.608875       1.100043   
9999   0.055685   0.170870  0.431926     -1.626509  95.254100      -0.061353   

      coolant_temp  vibration_level  ex

In [8]:
!pip install flaml





In [9]:
import pandas as pd

# Load your dataset (assuming it's in your Colab environment or Google Drive)
df = pd.read_csv('/content/synthetic_engine_failure_data.csv')  # use the new CSV name



# Fill missing numerical values with column mean
numerical_columns = df.select_dtypes(include=['number']).columns
df[numerical_columns] = df[numerical_columns].fillna(df[numerical_columns].mean())

# Encode categorical variables
categorical_columns = df.select_dtypes(include=['object']).columns

# Make sure not to encode the target yet
categorical_columns = [col for col in categorical_columns if col != 'failure_stage']

for col in categorical_columns:
    df[col] = df[col].astype('category').cat.codes  # Encode categorical columns

# Encode target variable (failure_stage) into numeric labels
df['failure_stage'] = df['failure_stage'].map({'normal': 0, 'warning': 1, 'critical': 2})





In [10]:
# Uninstall numpy completely
!pip uninstall numpy -y

# Install compatible version of numpy
!pip install numpy==1.26.4 --quiet




Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0m

In [11]:
# Now that numpy is safe, install FLAML
!pip install flaml --quiet


In [13]:
from sklearn.model_selection import train_test_split
from flaml import AutoML
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# Load and preprocess the dataset
df = pd.read_csv('/content/drive/MyDrive/synthetic_engine_failure_data.csv')

# Handle missing values in numerical columns
numerical_columns = df.select_dtypes(include=['number']).columns
df[numerical_columns] = df[numerical_columns].fillna(df[numerical_columns].mean())

# Encode categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns
for col in categorical_columns:
    df[col] = df[col].astype('category').cat.codes


# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Initialize AutoML instance
automl = AutoML()

# Fit AutoML on training data
automl.fit(X_train=X_train, y_train=y_train, task='classification', time_budget=600)

# Get the best model
best_model = automl.model

# Predict on test data
y_pred = best_model.predict(X_test)



[flaml.automl.logger: 04-13 08:37:15] {1728} INFO - task = classification
[flaml.automl.logger: 04-13 08:37:15] {1739} INFO - Evaluation method: cv
[flaml.automl.logger: 04-13 08:37:15] {1838} INFO - Minimizing error metric: log_loss
[flaml.automl.logger: 04-13 08:37:15] {1955} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'sgd', 'lrl1']
[flaml.automl.logger: 04-13 08:37:15] {2258} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 04-13 08:37:15] {2393} INFO - Estimated sufficient time budget=3261s. Estimated necessary time budget=75s.
[flaml.automl.logger: 04-13 08:37:15] {2442} INFO -  at 0.4s,	estimator lgbm's best error=0.3762,	best estimator lgbm's best error=0.3762
[flaml.automl.logger: 04-13 08:37:15] {2258} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 04-13 08:37:16] {2442} INFO -  at 0.7s,	estimator lgbm's best error=0.3762,	best estimator lgbm's best error=0.3762
[flaml.automl.logger: 04-13 0

INFO:flaml.tune.searcher.blendsearch:No low-cost partial config given to the search algorithm. For cost-frugal search, consider providing low-cost values for cost-related hps via 'low_cost_partial_config'. More info can be found at https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune


[flaml.automl.logger: 04-13 08:37:16] {2442} INFO -  at 1.4s,	estimator sgd's best error=0.7394,	best estimator lgbm's best error=0.1608
[flaml.automl.logger: 04-13 08:37:16] {2258} INFO - iteration 4, current learner lgbm
[flaml.automl.logger: 04-13 08:37:18] {2442} INFO -  at 3.2s,	estimator lgbm's best error=0.0125,	best estimator lgbm's best error=0.0125
[flaml.automl.logger: 04-13 08:37:18] {2258} INFO - iteration 5, current learner sgd
[flaml.automl.logger: 04-13 08:37:30] {2442} INFO -  at 15.3s,	estimator sgd's best error=0.6760,	best estimator lgbm's best error=0.0125
[flaml.automl.logger: 04-13 08:37:30] {2258} INFO - iteration 6, current learner xgboost
[flaml.automl.logger: 04-13 08:37:31] {2442} INFO -  at 16.2s,	estimator xgboost's best error=0.6870,	best estimator lgbm's best error=0.0125
[flaml.automl.logger: 04-13 08:37:31] {2258} INFO - iteration 7, current learner extra_tree
[flaml.automl.logger: 04-13 08:37:32] {2442} INFO -  at 16.7s,	estimator extra_tree's best er

INFO:flaml.tune.searcher.blendsearch:No low-cost partial config given to the search algorithm. For cost-frugal search, consider providing low-cost values for cost-related hps via 'low_cost_partial_config'. More info can be found at https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune


[flaml.automl.logger: 04-13 08:41:04] {2442} INFO -  at 229.2s,	estimator lrl1's best error=0.4837,	best estimator rf's best error=0.0060
[flaml.automl.logger: 04-13 08:41:04] {2258} INFO - iteration 100, current learner lrl1
[flaml.automl.logger: 04-13 08:41:07] {2442} INFO -  at 232.3s,	estimator lrl1's best error=0.4837,	best estimator rf's best error=0.0060
[flaml.automl.logger: 04-13 08:41:07] {2258} INFO - iteration 101, current learner lrl1
[flaml.automl.logger: 04-13 08:41:09] {2442} INFO -  at 234.7s,	estimator lrl1's best error=0.4837,	best estimator rf's best error=0.0060
[flaml.automl.logger: 04-13 08:41:09] {2258} INFO - iteration 102, current learner extra_tree
[flaml.automl.logger: 04-13 08:41:10] {2442} INFO -  at 235.0s,	estimator extra_tree's best error=0.6117,	best estimator rf's best error=0.0060
[flaml.automl.logger: 04-13 08:41:10] {2258} INFO - iteration 103, current learner lrl1
[flaml.automl.logger: 04-13 08:41:13] {2442} INFO -  at 238.0s,	estimator lrl1's bes

In [14]:
print("Best ML learner:", automl.best_estimator)


Best ML learner: rf


In [15]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
le = LabelEncoder()

# Assuming `y_test` is in string format, we need to encode it
if y_test.dtype == 'object':
    y_test = le.fit_transform(y_test)  # Encode string labels to integers

# Predict the test set results
y_pred = best_model.predict(X_test)

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of the model: {accuracy * 100:.2f}%')

# Decode the predictions (if necessary)
y_pred_labels = le.inverse_transform(y_pred)  # Decode predictions back to strings

# Print detailed classification report
print("Classification Report:")
print(classification_report(le.inverse_transform(y_test), y_pred_labels))  # Decode y_test if necessary



Accuracy of the model: 99.85%
Classification Report:
              precision    recall  f1-score   support

    critical       0.98      1.00      0.99        53
      normal       1.00      1.00      1.00      1306

    accuracy                           1.00      2000
   macro avg       0.99      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [16]:
from sklearn.metrics import confusion_matrix, classification_report
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["normal", "warning", "critical"]))


Confusion Matrix:
[[  53    0    0]
 [   0 1304    2]
 [   1    0  640]]

Classification Report:
              precision    recall  f1-score   support

      normal       0.98      1.00      0.99        53
    critical       1.00      1.00      1.00       641

    accuracy                           1.00      2000
   macro avg       0.99      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [17]:
print(f"Predictions on test set: {y_pred}")
print(f"Predictions count by class: {pd.Series(y_pred).value_counts()}")


Predictions on test set: [2 2 1 ... 1 1 2]
Predictions count by class: 1    1304
2     642
0      54
Name: count, dtype: int64


In [18]:
from sklearn.metrics import accuracy_score


# Predict on training data
train_preds = best_model.predict(X_train)
train_acc = accuracy_score(y_train, train_preds)

# Predict on testing data
test_preds = best_model.predict(X_test)
test_acc = accuracy_score(y_test, test_preds)

print(f"✅ Training Accuracy: {train_acc:.2f}")
print(f"✅ Testing Accuracy: {test_acc:.2f}")


✅ Training Accuracy: 0.00
✅ Testing Accuracy: 1.00


In [19]:
print(automl.best_config)   # Shows best hyperparameters
print(automl.best_estimator)  # Best model type (e.g., 'rf', 'xgboost')
print(automl.best_loss)     # Loss on validation set


{'n_estimators': 19, 'max_features': 1.0, 'max_leaves': 58, 'criterion': 'gini'}
rf
0.005998690400806235


In [20]:
import joblib

joblib.dump(automl, 'smartguard_model.pkl')


['smartguard_model.pkl']

In [21]:
new_data = X_test.iloc[:5]  # Or any new synthetic/real sample
predictions = automl.predict(new_data)
print(predictions)




In [22]:
probabilities = automl.predict_proba(new_data)
print(probabilities)


[[0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]


In [23]:
def predict_failure(input_df):
    model = joblib.load("smartguard_model.pkl")
    return model.predict(input_df)


In [24]:
pip install streamlit



In [25]:
print(y_train.value_counts())  # or np.bincount(y_train) if it's a NumPy array


failure_stage
normal      5222
critical     214
Name: count, dtype: int64


In [30]:
%%writefile dig.py
import streamlit as st
import pandas as pd
import joblib
import plotly.graph_objects as go
import folium
import streamlit.components.v1 as components
from datetime import datetime

# Load trained model
automl = joblib.load("smartguard_model.pkl")

# Required feature columns
feature_columns = [
    'timestamp', 'engine_id', 'rpm', 'oil_pressure', 'oil_temp',
    'fuel_pressure', 'coolant_temp', 'vibration_level',
    'exhaust_gas_temp', 'engine_load', 'ambient_temp',
    'humidity', 'altitude', 'hours_since_maintenance',
    'latitude', 'longitude'
]

# Streamlit config and style
st.set_page_config(page_title="SMARTGUARD Digital Twin", layout="wide")

st.markdown("""
    <style>
    .stApp {
        background-color: #001f3f;
        color: white;
    }
    h1, h2, h3, label, .stMarkdown, .stNumberInput > label {
        color: white !important;
    }
    .stButton > button {
        background-color: #0074D9;
        color: white;
        border-radius: 5px;
        padding: 10px;
    }
    </style>
""", unsafe_allow_html=True)

# Logo
st.image("WhatsApp Image 2025-04-13 at 07.16.33.jpeg", width=120)

# Title and description
st.markdown("<h1 style='text-align: center;'>🚛 SMARTGUARD: Military Engine Digital Twin</h1>", unsafe_allow_html=True)
st.markdown("<p style='text-align: center; font-size:18px;'>Monitor, Predict & Visualize Military Engine Health Using AI</p>", unsafe_allow_html=True)

# Clock
current_time = datetime.now().strftime("%H:%M:%S")
st.markdown(f"<h4 style='text-align:right; color:gray;'>🕒 {current_time}</h4>", unsafe_allow_html=True)

# File uploader
st.subheader("📄 Upload Engine Sensor Data (Excel Format)")
uploaded_file = st.file_uploader("Upload .xlsx file with all required columns", type=["xlsx"])

# Prediction function
def digital_twin_prediction(df):
    df = df[feature_columns]
    prediction = automl.predict(df)
    return prediction

# Map function
def create_geolocation_map(lat, lon):
    m = folium.Map(location=[lat, lon], zoom_start=12)
    folium.Marker([lat, lon], popup=f"Latitude: {lat}, Longitude: {lon}").add_to(m)
    return m

if uploaded_file is not None:
    df_input = pd.read_excel(uploaded_file)

    if set(feature_columns).issubset(df_input.columns):
        preds = digital_twin_prediction(df_input)
        df_input['prediction'] = preds

        label_map = {0: "normal", 1: "warning", 2: "critical"}
        emoji_map = {"normal": "🟢 NORMAL", "warning": "🟡 WARNING", "critical": "🔴 CRITICAL"}

        for idx, row in df_input.iterrows():
            label = label_map.get(row['prediction'], "unknown")
            emoji = emoji_map.get(label, "❓ UNKNOWN")

            st.markdown(f"<h3>🛠️ Engine ID: {int(row['engine_id'])} - Health Status: {emoji}</h3>", unsafe_allow_html=True)

            # Show 3D Visualization
            fig = go.Figure()
            fig.add_trace(go.Scatter3d(
                x=[row['rpm']],
                y=[row['vibration_level']],
                z=[row['engine_load']],
                mode='markers',
                marker=dict(size=10, color='red'),
                name='Sensor'
            ))
            fig.update_layout(
                scene=dict(
                    xaxis_title='RPM',
                    yaxis_title='Vibration',
                    zaxis_title='Engine Load'
                ),
                margin=dict(l=0, r=0, b=0, t=0),
                height=400
            )
            st.plotly_chart(fig, use_container_width=True)

            # Show Location Map
            st.subheader("📍 Engine Location on Map")
            map_obj = create_geolocation_map(row['latitude'], row['longitude'])
            components.html(map_obj._repr_html_(), height=400)
            st.markdown("---")

    else:
        st.error("❌ Uploaded file must contain all required columns:\n\n" + ", ".join(feature_columns))


Writing dig.py
