In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score
from xgboost import XGBClassifier

In [None]:
data_path = "/content/drive/MyDrive/Colab Notebooks/smoke_detection_iot.csv"
df = pd.read_csv(data_path)

print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())

Dataset shape: (62630, 16)
Columns: ['Unnamed: 0', 'UTC', 'Temperature[C]', 'Humidity[%]', 'TVOC[ppb]', 'eCO2[ppm]', 'Raw H2', 'Raw Ethanol', 'Pressure[hPa]', 'PM1.0', 'PM2.5', 'NC0.5', 'NC1.0', 'NC2.5', 'CNT', 'Fire Alarm']


In [None]:
X = df.drop("Fire Alarm", axis=1)
y = df["Fire Alarm"]

In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:

lr_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lr', LogisticRegression(max_iter=1000, random_state=42))
])

lr_pipeline.fit(X_train, y_train)
y_pred_lr = lr_pipeline.predict(X_test)
lr_acc = accuracy_score(y_test, y_pred_lr)
lr_f1 = f1_score(y_test, y_pred_lr)
print("Accuracy:", round(lr_acc, 4))
print("F1 Score:", round(lr_f1, 4))


Accuracy: 0.988
F1 Score: 0.9916


In [None]:
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

xgb_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('xgb', XGBRegressor(
        n_estimators=200,
        learning_rate=0.1,
        max_depth=5,
        random_state=42,
    ))
])


xgb_pipeline.fit(X_train, y_train)
y_pred_xgb = xgb_pipeline.predict(X_test)
xgb_mse = mean_squared_error(y_test, y_pred_xgb)
xgb_r2 = r2_score(y_test, y_pred_xgb)


print("Mean Squared Error:", round(xgb_mse, 4))
print("R2 Score:", round(xgb_r2, 4))


Mean Squared Error: 0.0001
R2 Score: 0.9996


In [None]:

import pickle
import sys
import time


# Serialize (save) both pipelines
with open("lr_pipeline.pkl", "wb") as f:
    pickle.dump(lr_pipeline, f)
with open("xgb_pipeline.pkl", "wb") as f:
    pickle.dump(xgb_pipeline, f)

lr_size_bytes = sys.getsizeof(pickle.dumps(lr_pipeline))
xgb_size_bytes = sys.getsizeof(pickle.dumps(xgb_pipeline))


lr_size_kb = lr_size_bytes / 1024
xgb_size_kb = xgb_size_bytes / 1024

print(f"LR-Pipeline: {lr_size_kb:.2f} KB")
print(f"XGB-Pipeline: {xgb_size_kb:.2f} KB")

LR-Pipeline: 1.80 KB
XGB-Pipeline: 431.04 KB


In [None]:
start_lr = time.time()
_ = lr_pipeline.predict(X_test)
end_lr = time.time()
lr_inference_time_total = end_lr - start_lr

start_xgb = time.time()
_ = xgb_pipeline.predict(X_test)
end_xgb = time.time()
xgb_inference_time_total = end_xgb - start_xgb

# Average single inference time (ms)
lr_inference_time_single = (lr_inference_time_total / len(X_test)) * 1000
xgb_inference_time_single = (xgb_inference_time_total / len(X_test)) * 1000

print("\n--- Inference Time ---")
print(f"LR-Pipeline: Total {lr_inference_time_total:.4f} s | Single {lr_inference_time_single:.4f} ms")
print(f"XGB-Pipeline: Total {xgb_inference_time_total:.4f} s | Single {xgb_inference_time_single:.4f} ms")



--- Inference Time ---
LR-Pipeline: Total 0.0222 s | Single 0.0018 ms
XGB-Pipeline: Total 0.2496 s | Single 0.0199 ms


In [None]:
import pandas as pd

summary = pd.DataFrame({
    "Model": ["LR-Pipeline", "XGB-Pipeline"],
    "Size (KB)": [round(lr_size_kb, 2), round(xgb_size_kb, 2)],
    "Total Inference Time (s)": [round(lr_inference_time_total, 4), round(xgb_inference_time_total, 4)],
    "Single Inference Time (ms)": [round(lr_inference_time_single, 4), round(xgb_inference_time_single, 4)]
})

print("\n--- Resource & Efficiency Summary ---")
display(summary)


--- Resource & Efficiency Summary ---


Unnamed: 0,Model,Size (KB),Total Inference Time (s),Single Inference Time (ms)
0,LR-Pipeline,1.8,0.0222,0.0018
1,XGB-Pipeline,431.04,0.2496,0.0199
