In [1]:
# 📦 Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 📂 Load dataset
df = pd.read_csv("final_injected_anomalies.csv")

# 🧹 Drop unnecessary columns
df.drop(columns=["Timestamp"], inplace=True)

# 🧪 Select only Temperature as feature
X = df[["Temperature"]]
y = df["Anomaly"]

# ⚖️ Standardize the temperature feature
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🧪 Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)


In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# 🔁 Logistic Regression with polynomial features and balanced class weights
logistic_pipeline = Pipeline([
    ('scale', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)),
    ('clf', LogisticRegression(class_weight='balanced', C=0.5, solver='liblinear'))
])
logistic_pipeline.fit(X_train, y_train)

# 🧠 Predict
y_pred = logistic_pipeline.predict(X_test)

# 📊 Evaluate
print("Logistic Regression (Optimized)")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Logistic Regression (Optimized)
Accuracy: 0.915
              precision    recall  f1-score   support

           0       0.89      0.95      0.92      2675
           1       0.94      0.88      0.91      2725

    accuracy                           0.92      5400
   macro avg       0.92      0.92      0.91      5400
weighted avg       0.92      0.92      0.91      5400



In [5]:
from sklearn.ensemble import RandomForestClassifier

# 🔁 Train model
rf_model = RandomForestClassifier(n_estimators=100, class_weight="balanced", random_state=42)
rf_model.fit(X_train, y_train)

# 🧠 Predict
y_pred = rf_model.predict(X_test)

# 📊 Evaluate
print("Random Forest")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Random Forest
Accuracy: 0.9107407407407407
              precision    recall  f1-score   support

           0       0.91      0.91      0.91      2675
           1       0.91      0.91      0.91      2725

    accuracy                           0.91      5400
   macro avg       0.91      0.91      0.91      5400
weighted avg       0.91      0.91      0.91      5400



In [7]:
from xgboost import XGBClassifier

# ⚖️ Handle imbalance in anomaly data
scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1])

# 🔁 Train model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric="logloss", scale_pos_weight=scale_pos_weight)
xgb_model.fit(X_train, y_train)

# 🧠 Predict
y_pred = xgb_model.predict(X_test)

# 📊 Evaluate
print("XGBoost")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


XGBoost
Accuracy: 0.9303703703703704
              precision    recall  f1-score   support

           0       0.94      0.92      0.93      2675
           1       0.92      0.94      0.93      2725

    accuracy                           0.93      5400
   macro avg       0.93      0.93      0.93      5400
weighted avg       0.93      0.93      0.93      5400



Parameters: { "use_label_encoder" } are not used.



In [43]:
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report

# 🔁 Train with scaling and optimized parameters
iso_pipeline = make_pipeline(
    StandardScaler(),
    IsolationForest(n_estimators=250, contamination=0.3, max_samples='auto', random_state=42)
)
iso_pipeline.fit(X_train)

# 🧠 Predict
y_pred = iso_pipeline.predict(X_test)
y_pred = [1 if p == -1 else 0 for p in y_pred]

# 📊 Evaluate
print("Isolation Forest (Optimized)")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Isolation Forest (Optimized)
Accuracy: 0.7124074074074074
              precision    recall  f1-score   support

           0       0.65      0.92      0.76      2675
           1       0.87      0.51      0.64      2725

    accuracy                           0.71      5400
   macro avg       0.76      0.71      0.70      5400
weighted avg       0.76      0.71      0.70      5400



In [51]:
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report

# 🔁 Train model with scaling and improved hyperparameters
ocsvm_pipeline = make_pipeline(
    StandardScaler(),
    OneClassSVM(nu=0.4, kernel="rbf", gamma="scale")
)
ocsvm_pipeline.fit(X_train)

# 🧠 Predict
y_pred = ocsvm_pipeline.predict(X_test)
y_pred = [1 if p == -1 else 0 for p in y_pred]  # -1 = anomaly → 1

# 📊 Evaluate
print("One-Class SVM (Optimized)")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

One-Class SVM (Optimized)
Accuracy: 0.6464814814814814
              precision    recall  f1-score   support

           0       0.63      0.68      0.66      2675
           1       0.66      0.61      0.64      2725

    accuracy                           0.65      5400
   macro avg       0.65      0.65      0.65      5400
weighted avg       0.65      0.65      0.65      5400



In [37]:
import joblib
import os

os.makedirs("saved_models_temperature_only", exist_ok=True)

# Save scaler and models
joblib.dump(scaler, "saved_models_temperature_only/scaler.pkl")
joblib.dump(logistic_pipeline, "saved_models_temperature_only/logistic_regression.pkl")
joblib.dump(rf_model, "saved_models_temperature_only/random_forest.pkl")
joblib.dump(xgb_model, "saved_models_temperature_only/xgboost.pkl")
joblib.dump(iso_pipeline, "saved_models_temperature_only/isolation_forest.pkl")
joblib.dump(ocsvm_pipeline, "saved_models_temperature_only/one_class_svm.pkl")

print("✅ All temperature-only models saved!")


✅ All temperature-only models saved!
