# Introduction
# Datasets
- https://www.kaggle.com/datasets/ziya07/smart-manufacturing-iot-cloud-monitoring-dataset

We used the smart manufacturing iot cloud monitoring dataset as such machines are similar to a PC. It contains features such as temperature, vibration, humidity, pressure, and energy consumption, which can represent the real features of a PC in several ways:
- Temperature: 
    - Represents the thermal state of PC components like CPU and GPU. 
    - Elevated, fluctuating, or sustained high temps often hint at cooling interface degradation or increased workload—just as in IoT systems, abnormal temperature readings indicate equipment stress or failure 

- Vibration: 
    - In PCs, fan vibration or hard-drive spin irregularities act like mechanical failure indicators—mirroring industrial settings where vibration spikes reveal mechanical faults in motors or structures.

- Humidity: 
    - While PCs are kept in controlled environments, ambient humidity still affects internal corrosion risk and electrical stability—just like environmental IoT sensor systems highlight humidity’s impact on electronic device reliability.

- Pressure: 
    - Though PCs don’t have internal pressure sensors, this can stand in for PSU voltage fluctuations or airflow pressure changes. 
    - In industrial IoT, pressure sensors track fluid or airflow—changes often signal blockages or system degradation.

- Energy consumption: 
    - Maps directly to PC power draw, reflecting CPU/GPU workloads or inefficiencies—exist in IoT energy‐monitoring systems where spikes can signal abnormal component behavior .


# Edge Telemetry Anomaly Detection with LightGBM → ONNX

**Steps:**
1. Load SMART telemetry
2. Feature scaling
3. Train compact LightGBM autoencoder
4. Evaluate on holdout set
5. Export to ONNX & quantize
6. Benchmark size & latency
7. Demonstrate streaming inference

# Load Libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")

import time
import os

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from collections import Counter

import lightgbm as lgb

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, balanced_accuracy_score
from sklearn.metrics import ConfusionMatrixDisplay

import onnxmltools
import onnxruntime as ort
from onnxmltools.convert import convert_lightgbm

# Code

In [None]:
# 1. Load dataset (example: SMART CSV)
data = pd.read_csv('dataset/smart_manufacturing_data.csv', parse_dates=['timestamp'])
data = data.sort_values(['machine_id','timestamp']).reset_index(drop=True)
data.head()

In [None]:
print('Number of machines:', len(set(data['machine_id'].tolist())))
Counter(data["anomaly_flag"].tolist())

In [None]:
# 2. Scale and split features
# Split by machines
machine_train = list(range(1, 41)) # 1-40
machine_test = list(range(41, 51)) # 41-50

features = ["temperature", "vibration", "humidity", "pressure", "energy_consumption"]

X_train = data[data['machine_id'].isin(machine_train)][features]
X_test = data[data['machine_id'].isin(machine_test)][features]

y_train = data[data['machine_id'].isin(machine_train)]['anomaly_flag']
y_test = data[data['machine_id'].isin(machine_test)]['anomaly_flag']

In [None]:
print('Train:', len(y_train))
print('Test :', len(y_test))

print('Train:', Counter(y_train))
print('Test :', Counter(y_test))

In [None]:
# scaler = MinMaxScaler()
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train

In [None]:
y_train

We run gridsearch at the start to get the beste parameters, which is:
Best hyperparameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 20, 'num_leaves': 31}

Next, we rerun it without using gridsearch to allow us to optimize the model

In [None]:
# 3. Train LightGBM binary classifier
param_grid = {
    'num_leaves': [31, 50],
    'learning_rate': [0.05, 0.1],
    'n_estimators': [20, 30],
    'max_depth': [3, 5]
}

model = lgb.LGBMClassifier()
clf = GridSearchCV(estimator=model, param_grid=param_grid, scoring='f1', cv=5)
clf.fit(X_train, y_train)

best_params = clf.best_params_
print("Best hyperparameters:", best_params)

y_pred = clf.predict(X_test)
prec, rec, f1, _ = precision_recall_fscore_support(y_test,y_pred,average='binary')

print('')
print(f'Prec={prec:.2f}, Rec={rec:.2f}, F1={f1:.2f}, AUC={roc_auc_score(y_test,clf.predict_proba(X_test)[:,1]):.2f}')


As expected, the temperature and the vibration is the top 2 features to determine the anomaly, which make sense as those are the ones with early signs of issues.

In [None]:
# FEATURE IMPORTANCE
importances = clf.best_estimator_.feature_importances_
feat_imp = pd.Series(importances, index=features).sort_values(ascending=False)
feat_imp.plot(kind='bar', figsize=(6, 3))
plt.title("XGBoost Feature Importances")
plt.show()

Rerun LGBM training with best parameters

In [None]:
model = lgb.LGBMClassifier(learning_rate=0.1, max_depth=3, n_estimators=20, num_leaves=31)
model.fit(X_train, y_train)

y_pred = clf.predict(X_test)
prec, rec, f1, _ = precision_recall_fscore_support(y_test,y_pred,average='binary')

print('')
print(f'Prec={prec:.2f}, Rec={rec:.2f}, F1={f1:.2f}, AUC={roc_auc_score(y_test, model.predict_proba(X_test)[:,1]):.2f}')

In [None]:
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion matrix:\n", cm)

# Normalize by true labels (rows)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=['Normal', 'Abnormal'])
disp.plot(cmap=plt.cm.Blues, values_format='.2%')
plt.title("Confusion Matrix")
plt.show()

In [None]:
# 4. Convert and quantize to ONNX
initial_types = [('input', onnxmltools.convert.common.data_types.FloatTensorType([None,X_train.shape[1]]))]
onnx_model = convert_lightgbm(model.booster_, initial_types=initial_types)
with open('model_fp32.onnx','wb') as f: f.write(onnx_model.SerializeToString())

q_model = quantization.quantize_dynamic('model_fp32.onnx','model_int8.onnx')


In [None]:
# 5. Benchmark model size and inference latency
def bench(onnx_path):
    sess = ort.InferenceSession(onnx_path, providers=['CPUExecutionProvider'])
    Xp = X_test[:1000].astype(np.float32)
    start = time.time()
    for _ in range(10):
        _ = sess.run(None, {'input':Xp})
    return os.path.getsize(onnx_path), (time.time()-start)/10

print('FP32:', bench('model_fp32.onnx'))
print('INT8:', bench('model_int8.onnx'))


In [None]:
# 6. Streaming inference simulation
sess = ort.InferenceSession('model_int8.onnx', providers=['CPUExecutionProvider'])
threshold = 0.8
for i in range(1000,1050):  # sample stream
    x = X_test[i].reshape(1,-1).astype(np.float32)
    prob = sess.run(None, {'input': x})[1][0,1]  # assuming classifier outputs class probabilities
    if prob > threshold:
        print(f'Anomaly at index {i}, score={prob:.2f}')


In [None]:
# 7. Plotting ROC curve
from sklearn.metrics import roc_curve, auc
y_score = model.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve(y_test, y_score)
plt.plot(fpr, tpr, label=f"AUC={auc(fpr,tpr):.2f}")
plt.xlabel('FPR'); plt.ylabel('TPR'); plt.legend(); plt.title('ROC')
