# Problem Statement:
### Predict time interval of an HVAC system will overheat using `Machine Learninng`

In [203]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from collections import Counter
from sklearn.utils import resample
from sklearn.utils import class_weight
from sklearn.metrics import precision_recall_curve
import optuna
from sklearn.metrics import f1_score
import joblib

In [204]:
!pip install tensorflow




In [205]:
from google.colab import files
uploaded = files.upload()


Saving HVAC.csv to HVAC (3).csv


In [206]:
df = pd.read_csv("HVAC.csv")

In [207]:
df.head()

Unnamed: 0,Timestamp,T_Supply,T_Return,SP_Return,T_Saturation,T_Outdoor,RH_Supply,RH_Return,RH_Outdoor,Energy,Power
0,2019-10-15 00:00:00+02:00,19.859999,20.469999,18.5,19.02,20.299999,71.110001,58.919998,79.5,0.0,0.0
1,2019-10-15 00:15:00+02:00,19.855,20.43,18.5,19.02,20.299999,71.32,59.0,82.0,0.0,0.0
2,2019-10-15 00:30:00+02:00,19.85,20.41,18.5,19.02,20.299999,71.470001,59.109997,79.5,0.0,0.0
3,2019-10-15 00:45:00+02:00,19.84,20.379999,18.5,19.08,20.299999,71.439995,59.309998,77.0,0.0,0.0
4,2019-10-15 01:00:00+02:00,19.83,20.35,18.5,19.08,20.299999,71.580002,59.559998,79.5,0.0,0.0


In [208]:
df.tail()

Unnamed: 0,Timestamp,T_Supply,T_Return,SP_Return,T_Saturation,T_Outdoor,RH_Supply,RH_Return,RH_Outdoor,Energy,Power
33883,2021-04-14 22:45:00+02:00,19.539999,20.004999,20.5,19.619999,14.7,39.02,27.93,57.0,0.0,0.0
33884,2021-04-14 23:00:00+02:00,19.52,19.949999,20.5,19.539999,13.7,39.02,28.09,57.0,0.0,0.0
33885,2021-04-14 23:15:00+02:00,19.43,19.955,20.5,19.42,13.7,39.399998,27.93,57.0,0.0,0.0
33886,2021-04-14 23:30:00+02:00,19.42,19.92,20.5,19.4,13.7,39.599998,28.039999,57.0,0.0,0.0
33887,2021-04-14 23:45:00+02:00,19.42,19.9,20.5,19.4,13.7,39.599998,28.15,57.0,0.0,0.0


In [209]:
df.shape

(33888, 11)

In [210]:
df.isnull().sum()

Unnamed: 0,0
Timestamp,0
T_Supply,0
T_Return,0
SP_Return,0
T_Saturation,0
T_Outdoor,0
RH_Supply,0
RH_Return,0
RH_Outdoor,0
Energy,0


In [211]:
df.isna().sum()

Unnamed: 0,0
Timestamp,0
T_Supply,0
T_Return,0
SP_Return,0
T_Saturation,0
T_Outdoor,0
RH_Supply,0
RH_Return,0
RH_Outdoor,0
Energy,0


##  HVAC Dataset Column Descriptions

| **Column Name** | **Meaning** |
|------------------|-------------|
| `Timestamp` | The date and time when the sensor readings were taken. This lets us organize the data as a time series (hourly, minutely, etc.). |
| `T_Supply` | Temperature of the air that the HVAC system is blowing into the room/home (cooled or heated air). In cooling mode, it should be cold — around 16–20°C. |
| `T_Return` | Temperature of the air coming back into the HVAC system from the room. It tells us how warm or cool the room currently is. |
| `SP_Return` | The set point temperature for the return air — basically, the target temperature set on the thermostat (e.g., 24°C). The HVAC system works to bring `T_Return` closer to `SP_Return`. i.e Desired room temperature (set by user/system)|
| `T_Saturation` | Temperature of the coolant/refrigerant coil inside the HVAC system. If this gets too high, it may indicate a risk of overheating or poor cooling performance. |
| `T_Outdoor` | Temperature outside the building. This is important because HVAC systems need to work harder if it's very hot (or cold) outside. |
| `RH_Supply` | the amount of moisture (water vapor) in the air, expressed as a percentage (%).RH_Supply is the humidity level of the cool/warm air coming from the HVAC system into your room.High humidity makes rooms feel warmer; too low causes dryness. Ideally balanced between 40–60%. Example : Your AC is supplying air at 18°C. That air contains some moisture (water vapor). RH_Supply = 55% → This means the air the AC is blowing has 55% humidity|
| `RH_Return` | Humidity of the air coming back from the room to the HVAC system. This reflects the moisture level inside the space. |
| `RH_Outdoor` | Humidity outside the building. Like outdoor temp, this affects how hard the HVAC system has to work. |
| `Energy` | Total energy consumed (usually in kWh) by the HVAC system over time. It increases as the system runs longer. Useful for energy efficiency analysis. |
| `Power` | Instantaneous power usage (usually in kW) — how much energy the system is using at that specific moment. Spikes may indicate strain or inefficiency. |


In [212]:
df[df['T_Supply'] > 30].shape

(58, 11)

In [213]:
def detect_mode(row):
    if row['Power'] == 0:
        return 'idle'
    elif row['T_Supply'] < row['T_Return']:
        return 'cooling'
    elif row['T_Supply'] > row['T_Return']:
        return 'heating'
    else:
        return 'unknown'

df['mode'] = df.apply(detect_mode, axis=1)


In [214]:
# Rolling mean of T_Supply
df['T_Supply_roll3'] = df['T_Supply'].rolling(3).mean()


In [215]:
def check_overheat(row):
    # Only apply logic if we have valid rolling avg
    if pd.isna(row['T_Supply_roll3']):
        return 0

    # Cooling mode: supply air should be cold; if hot, it's overheating /malfunctioning
    if row['mode'] == 'cooling' and row['T_Supply_roll3'] > 27:
        return 1

    # Heating mode: supply air should be hot; if too hot, it's overheating /malfunctioning
    elif row['mode'] == 'heating' and row['T_Supply_roll3'] < 20:  #  threshold
        return 1

    # Idle or normal conditions
    else:
        return 0

df['overheat_flag'] = df.apply(check_overheat, axis=1)


In [216]:
df[df['overheat_flag']==1].shape

(442, 14)

In [217]:
df[df['overheat_flag']!=1].shape

(33446, 14)

In [218]:
df.columns

Index(['Timestamp', 'T_Supply', 'T_Return', 'SP_Return', 'T_Saturation',
       'T_Outdoor', 'RH_Supply', 'RH_Return', 'RH_Outdoor', 'Energy', 'Power',
       'mode', 'T_Supply_roll3', 'overheat_flag'],
      dtype='object')

In [219]:
df['T_Return_lag1'] = df['T_Return'].shift(1)
df['Power_lag1'] = df['Power'].shift(1)
df['T_Saturation_lag1'] = df['T_Saturation'].shift(1)


In [220]:
df['delta_T_Return'] = df['T_Return'] - df['T_Return'].shift(1)
df['Power_roll3'] = df['Power'].rolling(3).mean()


In [221]:
# Converting to datetime with timezone handling
df['Timestamp'] = pd.to_datetime(df['Timestamp'], utc=True)

# extracting hour and dayofweek
df['hour'] = df['Timestamp'].dt.hour
df['dayofweek'] = df['Timestamp'].dt.dayofweek


In [222]:
pd.set_option('display.max_columns', None)

In [223]:
df.head()

Unnamed: 0,Timestamp,T_Supply,T_Return,SP_Return,T_Saturation,T_Outdoor,RH_Supply,RH_Return,RH_Outdoor,Energy,Power,mode,T_Supply_roll3,overheat_flag,T_Return_lag1,Power_lag1,T_Saturation_lag1,delta_T_Return,Power_roll3,hour,dayofweek
0,2019-10-14 22:00:00+00:00,19.859999,20.469999,18.5,19.02,20.299999,71.110001,58.919998,79.5,0.0,0.0,idle,,0,,,,,,22,0
1,2019-10-14 22:15:00+00:00,19.855,20.43,18.5,19.02,20.299999,71.32,59.0,82.0,0.0,0.0,idle,,0,20.469999,0.0,19.02,-0.039999,,22,0
2,2019-10-14 22:30:00+00:00,19.85,20.41,18.5,19.02,20.299999,71.470001,59.109997,79.5,0.0,0.0,idle,19.855,0,20.43,0.0,19.02,-0.02,0.0,22,0
3,2019-10-14 22:45:00+00:00,19.84,20.379999,18.5,19.08,20.299999,71.439995,59.309998,77.0,0.0,0.0,idle,19.848333,0,20.41,0.0,19.02,-0.030001,0.0,22,0
4,2019-10-14 23:00:00+00:00,19.83,20.35,18.5,19.08,20.299999,71.580002,59.559998,79.5,0.0,0.0,idle,19.84,0,20.379999,0.0,19.08,-0.029999,0.0,23,0


In [224]:
df.columns

Index(['Timestamp', 'T_Supply', 'T_Return', 'SP_Return', 'T_Saturation',
       'T_Outdoor', 'RH_Supply', 'RH_Return', 'RH_Outdoor', 'Energy', 'Power',
       'mode', 'T_Supply_roll3', 'overheat_flag', 'T_Return_lag1',
       'Power_lag1', 'T_Saturation_lag1', 'delta_T_Return', 'Power_roll3',
       'hour', 'dayofweek'],
      dtype='object')

In [225]:
df.isna().sum()

Unnamed: 0,0
Timestamp,0
T_Supply,0
T_Return,0
SP_Return,0
T_Saturation,0
T_Outdoor,0
RH_Supply,0
RH_Return,0
RH_Outdoor,0
Energy,0


In [226]:
df_model = df.dropna().copy()

In [227]:
df_model.isna().sum()

Unnamed: 0,0
Timestamp,0
T_Supply,0
T_Return,0
SP_Return,0
T_Saturation,0
T_Outdoor,0
RH_Supply,0
RH_Return,0
RH_Outdoor,0
Energy,0


### T_Supply and T_Supply_roll3 can leak data. we will not use those feature

In [228]:
features = [
    'T_Return',
    'SP_Return',
    'T_Saturation',
    'T_Outdoor',
    'RH_Return',
    'RH_Outdoor',
    'Power',
    'Energy',
]

X = df_model[features]
y = df_model['overheat_flag']



# Train test split
split_idx = int(len(df_model) * 0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

In [229]:
X_train.head()

Unnamed: 0,T_Return,SP_Return,T_Saturation,T_Outdoor,RH_Return,RH_Outdoor,Power,Energy
2,20.41,18.5,19.02,20.299999,59.109997,79.5,0.0,0.0
3,20.379999,18.5,19.08,20.299999,59.309998,77.0,0.0,0.0
4,20.35,18.5,19.08,20.299999,59.559998,79.5,0.0,0.0
5,20.32,18.5,19.1,20.299999,59.66,82.0,0.0,0.0
6,20.309999,18.5,19.119999,19.299999,59.759998,85.0,0.0,0.0


In [230]:
y_train.head()

Unnamed: 0,overheat_flag
2,0
3,0
4,0
5,0
6,0


In [231]:


model = RandomForestClassifier(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))



              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6745
           1       0.00      0.00      0.00        33

    accuracy                           0.99      6778
   macro avg       0.50      0.50      0.50      6778
weighted avg       0.99      0.99      0.99      6778

[[6743    2]
 [  33    0]]


### Observation:
* Model is too biased toward predicting class 0.



# Lets try XGBoost

In [232]:

# Computing class imbalance ratio
scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1])

# Defining the model
xgb = XGBClassifier(
    scale_pos_weight=scale_pos_weight,
    eval_metric='logloss',
    random_state=42,
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1
)

#  Fit the model
xgb.fit(X_train, y_train)

# Make predictions
y_pred = xgb.predict(X_test)
y_probs = xgb.predict_proba(X_test)[:, 1]

# Evaluate the model

print(" Classification Report:\n", classification_report(y_test, y_pred))
print(" Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print(" AUC Score:", roc_auc_score(y_test, y_probs))


 Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.98      0.99      6745
           1       0.13      0.52      0.21        33

    accuracy                           0.98      6778
   macro avg       0.56      0.75      0.60      6778
weighted avg       0.99      0.98      0.99      6778

 Confusion Matrix:
 [[6632  113]
 [  16   17]]
 AUC Score: 0.9720938068603006


* True Negatives (TN) — correctly predicted normal condition

* False Positives (FP) — wrongly predicted overheat when it wasn’t

* False Negatives (FN) — missed actual overheat events (concerning)

* True Positives (TP) — correctly predicted overheat

### Lets use LSTM

In [233]:
print("NaNs in X_train:\n", np.isnan(X_train).sum())
print("NaNs in y_train:\n", np.isnan(y_train).sum())


NaNs in X_train:
 T_Return        0
SP_Return       0
T_Saturation    0
T_Outdoor       0
RH_Return       0
RH_Outdoor      0
Power           0
Energy          0
dtype: int64
NaNs in y_train:
 0


In [234]:
def create_lstm_sequences(X, y, time_steps=5):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

# Scale the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_train)

# LSTM-ready data
time_steps = 5
X_lstm, y_lstm = create_lstm_sequences(X_scaled, y_train, time_steps=time_steps)

# Splitting
split = int(0.8 * len(X_lstm))
X_train_seq, X_test_seq = X_lstm[:split], X_lstm[split:]
y_train_seq, y_test_seq = y_lstm[:split], y_lstm[split:]


In [235]:

model = Sequential([
    LSTM(64, input_shape=(X_train_seq.shape[1], X_train_seq.shape[2]), return_sequences=False),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train_seq, y_train_seq, epochs=10, batch_size=64, validation_split=0.1)


Epoch 1/10


  super().__init__(**kwargs)


[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - accuracy: 0.9849 - loss: 0.1460 - val_accuracy: 0.9852 - val_loss: 0.0720
Epoch 2/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.9916 - loss: 0.0442 - val_accuracy: 0.9852 - val_loss: 0.0721
Epoch 3/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9907 - loss: 0.0357 - val_accuracy: 0.9852 - val_loss: 0.0824
Epoch 4/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9921 - loss: 0.0271 - val_accuracy: 0.9779 - val_loss: 0.0731
Epoch 5/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9924 - loss: 0.0223 - val_accuracy: 0.9843 - val_loss: 0.0862
Epoch 6/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9915 - loss: 0.0261 - val_accuracy: 0.9816 - val_loss: 0.0858
Epoch 7/10
[1m305/305[0m [32m━━━━━━

In [236]:
y_pred_probs = model.predict(X_test_seq).flatten()
y_pred = (y_pred_probs > 0.5).astype(int)


print("Classification Report:\n", classification_report(y_test_seq, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test_seq, y_pred))
print("AUC Score:", roc_auc_score(y_test_seq, y_pred_probs))


[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.97      5203
           1       0.05      0.02      0.03       218

    accuracy                           0.95      5421
   macro avg       0.51      0.50      0.50      5421
weighted avg       0.92      0.95      0.93      5421

Confusion Matrix:
 [[5132   71]
 [ 214    4]]
AUC Score: 0.877546828135497


# Handling imbalance

In [237]:


class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_seq),
    y=y_train_seq
)
class_weights = dict(enumerate(class_weights))

history = model.fit(
    X_train_seq, y_train_seq,
    epochs=10,
    batch_size=64,
    validation_split=0.1,
    class_weight=class_weights
)


Epoch 1/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9037 - loss: 0.2578 - val_accuracy: 0.9410 - val_loss: 0.1669
Epoch 2/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9377 - loss: 0.1703 - val_accuracy: 0.9428 - val_loss: 0.1707
Epoch 3/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9455 - loss: 0.1562 - val_accuracy: 0.9212 - val_loss: 0.2476
Epoch 4/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9576 - loss: 0.1374 - val_accuracy: 0.8852 - val_loss: 0.2988
Epoch 5/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9364 - loss: 0.1544 - val_accuracy: 0.8631 - val_loss: 0.3195
Epoch 6/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9454 - loss: 0.1343 - val_accuracy: 0.8414 - val_loss: 0.3861
Epoch 7/10
[1m305/305[0m 

In [238]:


# Combining
X_pos = X_lstm[y_lstm == 1]
y_pos = y_lstm[y_lstm == 1]
X_neg = X_lstm[y_lstm == 0]
y_neg = y_lstm[y_lstm == 0]

X_pos_upsampled, y_pos_upsampled = resample(
    X_pos, y_pos,
    replace=True,
    n_samples=len(y_neg),
    random_state=42
)

# Concatenate
X_balanced = np.concatenate([X_neg, X_pos_upsampled])
y_balanced = np.concatenate([y_neg, y_pos_upsampled])


In [239]:


# Shuffle and split
X_train, X_test, y_train, y_test = train_test_split(
    X_balanced, y_balanced, test_size=0.2, random_state=42, shuffle=True
)


In [240]:

# Using Bidirectional LSTM
model = Sequential([
    Bidirectional(LSTM(64, return_sequences=False), input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=64,
    validation_split=0.1
)


Epoch 1/10


  super().__init__(**kwargs)


[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.8471 - loss: 0.3468 - val_accuracy: 0.9461 - val_loss: 0.1582
Epoch 2/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9490 - loss: 0.1410 - val_accuracy: 0.9583 - val_loss: 0.1255
Epoch 3/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9ms/step - accuracy: 0.9568 - loss: 0.1229 - val_accuracy: 0.9579 - val_loss: 0.1266
Epoch 4/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.9574 - loss: 0.1174 - val_accuracy: 0.9663 - val_loss: 0.1024
Epoch 5/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9640 - loss: 0.1025 - val_accuracy: 0.9609 - val_loss: 0.0984
Epoch 6/10
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.9677 - loss: 0.0955 - val_accuracy: 0.9698 - val_loss: 0.0889
Epoch 7/10
[1m601/601[0m [32m

In [241]:
y_pred_probs = model.predict(X_test).flatten()
y_pred = (y_pred_probs > 0.5).astype(int)


print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("AUC Score:", roc_auc_score(y_test, y_pred_probs))


[1m334/334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.96      0.98      5277
           1       0.96      0.99      0.98      5401

    accuracy                           0.98     10678
   macro avg       0.98      0.98      0.98     10678
weighted avg       0.98      0.98      0.98     10678

Confusion Matrix:
 [[5077  200]
 [  60 5341]]
AUC Score: 0.9955109064825866


#Observation:
* We use oversampling but the problem is the time series is broken. Although it gives great result but it can not predict the trend. so we can not go forward with this.
* i see the 0 --> 33446 nos. and 1--> 442 nos. and the ratio is approx 75 : 1
this output is giving every 15 minute. so in one day 24* 4 = 96 data point.
so if we consider for 1 days and if it is found out at least single "1" in the whole day, the output will be 1 for that day. such we can balance the dataset. lets see what happen.




In [242]:
# Ensuring Timestamp is in datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Creating a 'date' column
df['date'] = df['Timestamp'].dt.date

# Daily aggregation
daily_df = df.groupby('date').agg({
    'T_Supply': 'mean',
    'T_Return': 'mean',
    'SP_Return': 'mean',
    'T_Saturation': 'mean',
    'T_Outdoor': 'mean',
    'RH_Supply': 'mean',
    'RH_Return': 'mean',
    'RH_Outdoor': 'mean',
    'Energy': 'sum',
    'Power': 'sum',
    'overheat_flag': 'max',
})

# Rename target
daily_df['target'] = daily_df['overheat_flag']
daily_df.drop(columns='overheat_flag', inplace=True)

# Now checking balance
print(daily_df['target'].value_counts())

target
0    216
1    145
Name: count, dtype: int64


In [243]:
X = daily_df.drop(columns=['target'])
y = daily_df['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("AUC Score:", roc_auc_score(y_test, y_proba))

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.77      0.85        64
           1       0.32      0.78      0.45         9

    accuracy                           0.77        73
   macro avg       0.64      0.77      0.65        73
weighted avg       0.88      0.77      0.80        73

Confusion Matrix:
 [[49 15]
 [ 2  7]]
AUC Score: 0.875


# Observation:
* Here we got good result, but see one thing.
0 -->   216,
1 -->   145

so approx every 2 days there is a day with over heating. which is not adorable. now let do oversampling using `T-Smote`

In [244]:


#  TSMOTE function
def advanced_tsmote(X, y, minority_class=1, k=5, n_samples=None, random_state=42):
    np.random.seed(random_state)
    X = np.array(X)
    y = np.array(y)

    X_min = X[y == minority_class]
    y_min = y[y == minority_class]
    n_min = len(X_min)

    if n_samples is None:
        n_maj = len(y[y != minority_class])
        n_samples = n_maj - n_min

    X_min_flat = X_min.reshape(n_min, -1)

    from sklearn.neighbors import NearestNeighbors
    knn = NearestNeighbors(n_neighbors=k+1)
    knn.fit(X_min_flat)

    synthetic_samples = []

    for _ in range(n_samples):
        i = np.random.randint(0, n_min)
        xi = X_min[i]
        xi_flat = X_min_flat[i].reshape(1, -1)

        neighbors_idx = knn.kneighbors(xi_flat, return_distance=False)[0][1:]
        j = np.random.choice(neighbors_idx)
        xj = X_min[j]

        lambdas = np.random.uniform(0, 1, size=(xi.shape[0], 1))
        synthetic = xi + lambdas * (xj - xi)
        synthetic_samples.append(synthetic)

    X_syn = np.array(synthetic_samples)
    y_syn = np.full(X_syn.shape[0], minority_class)

    X_combined = np.concatenate([X, X_syn], axis=0)
    y_combined = np.concatenate([y, y_syn], axis=0)

    return X_combined, y_combined




In [245]:
df.head()

Unnamed: 0,Timestamp,T_Supply,T_Return,SP_Return,T_Saturation,T_Outdoor,RH_Supply,RH_Return,RH_Outdoor,Energy,Power,mode,T_Supply_roll3,overheat_flag,T_Return_lag1,Power_lag1,T_Saturation_lag1,delta_T_Return,Power_roll3,hour,dayofweek,date
0,2019-10-14 22:00:00+00:00,19.859999,20.469999,18.5,19.02,20.299999,71.110001,58.919998,79.5,0.0,0.0,idle,,0,,,,,,22,0,2019-10-14
1,2019-10-14 22:15:00+00:00,19.855,20.43,18.5,19.02,20.299999,71.32,59.0,82.0,0.0,0.0,idle,,0,20.469999,0.0,19.02,-0.039999,,22,0,2019-10-14
2,2019-10-14 22:30:00+00:00,19.85,20.41,18.5,19.02,20.299999,71.470001,59.109997,79.5,0.0,0.0,idle,19.855,0,20.43,0.0,19.02,-0.02,0.0,22,0,2019-10-14
3,2019-10-14 22:45:00+00:00,19.84,20.379999,18.5,19.08,20.299999,71.439995,59.309998,77.0,0.0,0.0,idle,19.848333,0,20.41,0.0,19.02,-0.030001,0.0,22,0,2019-10-14
4,2019-10-14 23:00:00+00:00,19.83,20.35,18.5,19.08,20.299999,71.580002,59.559998,79.5,0.0,0.0,idle,19.84,0,20.379999,0.0,19.08,-0.029999,0.0,23,0,2019-10-14


In [246]:
df.columns


Index(['Timestamp', 'T_Supply', 'T_Return', 'SP_Return', 'T_Saturation',
       'T_Outdoor', 'RH_Supply', 'RH_Return', 'RH_Outdoor', 'Energy', 'Power',
       'mode', 'T_Supply_roll3', 'overheat_flag', 'T_Return_lag1',
       'Power_lag1', 'T_Saturation_lag1', 'delta_T_Return', 'Power_roll3',
       'hour', 'dayofweek', 'date'],
      dtype='object')

In [247]:
dataset = df[ [
     'T_Return', 'SP_Return', 'T_Saturation',
       'T_Outdoor', 'RH_Supply', 'RH_Return', 'RH_Outdoor', 'Energy', 'Power','overheat_flag'
]]

In [248]:
dataset

Unnamed: 0,T_Return,SP_Return,T_Saturation,T_Outdoor,RH_Supply,RH_Return,RH_Outdoor,Energy,Power,overheat_flag
0,20.469999,18.5,19.020000,20.299999,71.110001,58.919998,79.5,0.0,0.0,0
1,20.430000,18.5,19.020000,20.299999,71.320000,59.000000,82.0,0.0,0.0,0
2,20.410000,18.5,19.020000,20.299999,71.470001,59.109997,79.5,0.0,0.0,0
3,20.379999,18.5,19.080000,20.299999,71.439995,59.309998,77.0,0.0,0.0,0
4,20.350000,18.5,19.080000,20.299999,71.580002,59.559998,79.5,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...
33883,20.004999,20.5,19.619999,14.700000,39.020000,27.930000,57.0,0.0,0.0,0
33884,19.949999,20.5,19.539999,13.700000,39.020000,28.090000,57.0,0.0,0.0,0
33885,19.955000,20.5,19.420000,13.700000,39.399998,27.930000,57.0,0.0,0.0,0
33886,19.920000,20.5,19.400000,13.700000,39.599998,28.039999,57.0,0.0,0.0,0


In [249]:
train_dataset = dataset.iloc[:-1000]
test_dataset = dataset.iloc[-1000:]
train_dataset.to_csv("final_train_dataset.csv", index=False)
test_dataset.to_csv("final_test_dataset.csv", index=False)


In [250]:
train_dataset.head()

Unnamed: 0,T_Return,SP_Return,T_Saturation,T_Outdoor,RH_Supply,RH_Return,RH_Outdoor,Energy,Power,overheat_flag
0,20.469999,18.5,19.02,20.299999,71.110001,58.919998,79.5,0.0,0.0,0
1,20.43,18.5,19.02,20.299999,71.32,59.0,82.0,0.0,0.0,0
2,20.41,18.5,19.02,20.299999,71.470001,59.109997,79.5,0.0,0.0,0
3,20.379999,18.5,19.08,20.299999,71.439995,59.309998,77.0,0.0,0.0,0
4,20.35,18.5,19.08,20.299999,71.580002,59.559998,79.5,0.0,0.0,0


In [270]:


features = [
     'T_Return', 'SP_Return', 'T_Saturation',
       'T_Outdoor', 'RH_Supply', 'RH_Return', 'RH_Outdoor', 'Energy', 'Power',
]

target = 'overheat_flag'

df = train_dataset

X = df[features].values
y = df[target].values

# Train-test split (no shuffle to respect time order)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

# Applying TSmote
X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))  # shape: (samples, timesteps, features=1)
X_resampled, y_resampled = advanced_tsmote(X_train_reshaped, y_train, k=5)
X_resampled = X_resampled.reshape(X_resampled.shape[0], -1)  # back to 2D

print("Before Oversampling:", Counter(y_train))
print("After  Oversampling:", Counter(y_resampled))



Before Oversampling: Counter({np.int64(0): 25948, np.int64(1): 362})
After  Oversampling: Counter({np.int64(0): 25948, np.int64(1): 25948})


In [268]:
X_train = X_resampled
y_train = y_resampled


In [272]:


# Model training
model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
model.fit(X_resampled, y_resampled)

# Evaluation on untouched test set
y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]

print("\nClassification Report Random Forest:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("AUC Score:", roc_auc_score(y_test, y_probs))



Classification Report Random Forest:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      6498
           1       0.33      0.42      0.37        80

    accuracy                           0.98      6578
   macro avg       0.66      0.71      0.68      6578
weighted avg       0.98      0.98      0.98      6578

Confusion Matrix:
 [[6429   69]
 [  46   34]]
AUC Score: 0.9638552246845182


In [274]:

# Applying XGBoost
model = XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='logloss')
model.fit(X_resampled, y_resampled)

# Evaluation on untouched test set
y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]

print("\nClassification Report XG Boost:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("AUC Score:", roc_auc_score(y_test, y_probs))


Parameters: { "use_label_encoder" } are not used.




Classification Report XG Boost:
               precision    recall  f1-score   support

           0       0.99      0.98      0.99      6498
           1       0.29      0.53      0.38        80

    accuracy                           0.98      6578
   macro avg       0.64      0.75      0.68      6578
weighted avg       0.99      0.98      0.98      6578

Confusion Matrix:
 [[6397  101]
 [  38   42]]
AUC Score: 0.9561095721760543


In [255]:
!pip install optuna xgboost




In [256]:
print(np.isnan(X).sum(), np.isinf(X).sum())
print(np.isnan(y).sum(), np.isinf(y).sum())


0 0
0 0


In [257]:
# Use resampled data
X = np.array(X_resampled)
y = np.array(y_resampled)

# Handling NaN and Inf values
print("Before cleanup:", np.isnan(X).sum(), np.isinf(X).sum())

# Drop rows with NaNs in X
mask = ~np.isnan(X).any(axis=1)
X = X[mask]
y = y[mask]

print("After cleanup:", np.isnan(X).sum(), np.isinf(X).sum())

# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape for LSTM: (samples, timesteps, features)
X_lstm = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

# Encode target if multi-class
num_classes = len(np.unique(y))
if num_classes > 2:
    y = to_categorical(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_lstm, y, test_size=0.2, random_state=42, stratify=y if num_classes > 2 else None
)

# Build LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], 1), return_sequences=False))
model.add(Dropout(0.2))

if num_classes > 2:
    model.add(Dense(num_classes, activation='softmax'))
    loss_fn = 'categorical_crossentropy'
else:
    model.add(Dense(1, activation='sigmoid'))
    loss_fn = 'binary_crossentropy'

model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1
)

# Evaluate
y_pred = model.predict(X_test)

if num_classes > 2:
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)
else:
    y_pred_classes = (y_pred > 0.5).astype("int32").flatten()
    y_true_classes = y_test

print("Classification Report:\n", classification_report(y_true_classes, y_pred_classes))
print("Confusion Matrix:\n", confusion_matrix(y_true_classes, y_pred_classes))


Before cleanup: 0 0
After cleanup: 0 0
Epoch 1/20


  super().__init__(**kwargs)


[1m1298/1298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 16ms/step - accuracy: 0.8681 - loss: 0.3361 - val_accuracy: 0.9415 - val_loss: 0.1601
Epoch 2/20
[1m1298/1298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 10ms/step - accuracy: 0.9397 - loss: 0.1647 - val_accuracy: 0.9421 - val_loss: 0.1494
Epoch 3/20
[1m1298/1298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 8ms/step - accuracy: 0.9432 - loss: 0.1526 - val_accuracy: 0.9514 - val_loss: 0.1314
Epoch 4/20
[1m1298/1298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.9478 - loss: 0.1409 - val_accuracy: 0.9537 - val_loss: 0.1211
Epoch 5/20
[1m1298/1298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.9517 - loss: 0.1268 - val_accuracy: 0.9598 - val_loss: 0.1097
Epoch 6/20
[1m1298/1298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.9560 - loss: 0.1196 - val_accuracy: 0.9640 - val_loss: 0.1031
Epoch 7/20
[1m1298/

### It is seem that LSTM does not perform well. XGBoost perform better at the moment. So lets go forward with XGBoost

In [305]:
X_resampled

array([[20.469999  , 18.5       , 19.02      , ..., 79.5       ,
         0.        ,  0.        ],
       [20.43      , 18.5       , 19.02      , ..., 82.        ,
         0.        ,  0.        ],
       [20.41      , 18.5       , 19.02      , ..., 79.5       ,
         0.        ,  0.        ],
       ...,
       [19.72356923, 22.5       , 19.199999  , ..., 94.        ,
         6.        ,  2.51292502],
       [18.58954738, 22.5       , 17.24695731, ..., 87.        ,
        12.        ,  4.91120026],
       [18.10380572, 22.5       , 17.79471073, ..., 86.        ,
        13.        ,  5.004     ]])

In [306]:
y_resampled

array([0, 0, 0, ..., 1, 1, 1])

In [307]:


X = X_resampled
y = y_resampled

# Apply MinMaxScaler BEFORE train-test split
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=35, stratify=y
)

# Optuna objective function
def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 0.5),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1, 20),
        'use_label_encoder': False,
        'eval_metric': 'logloss'
    }

    model = XGBClassifier(**param)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_val)
    return f1_score(y_val, y_pred)

# Run Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

# Best parameters
best_params = study.best_params
print("Best Trial Parameters:", best_params)

# Final model with best parameters
final_model = XGBClassifier(
    **best_params,
    use_label_encoder=False,
    eval_metric='logloss'
)
final_model.fit(X_train, y_train)

# Evaluation
y_pred = final_model.predict(X_val)
print("Classification Report:\n", classification_report(y_val, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))
print("AUC Score:", roc_auc_score(y_val, final_model.predict_proba(X_val)[:, 1]))


[I 2025-07-20 19:05:50,058] A new study created in memory with name: no-name-5b4f6871-cb51-4fa5-ac68-5ddb963e2074
Parameters: { "use_label_encoder" } are not used.

[I 2025-07-20 19:05:52,878] Trial 0 finished with value: 0.9924409147450005 and parameters: {'n_estimators': 390, 'learning_rate': 0.11478666251989682, 'max_depth': 7, 'subsample': 0.7769000424427462, 'colsample_bytree': 0.8211036960921007, 'gamma': 0.39489331670820776, 'scale_pos_weight': 13.918827000535133}. Best is trial 0 with value: 0.9924409147450005.
Parameters: { "use_label_encoder" } are not used.

[I 2025-07-20 19:05:55,590] Trial 1 finished with value: 0.9922495454980385 and parameters: {'n_estimators': 193, 'learning_rate': 0.17068665576491165, 'max_depth': 8, 'subsample': 0.8651675023597887, 'colsample_bytree': 0.5806045869943814, 'gamma': 0.3270387606423096, 'scale_pos_weight': 11.421596024889302}. Best is trial 0 with value: 0.9924409147450005.
Parameters: { "use_label_encoder" } are not used.

[I 2025-07-20 

Best Trial Parameters: {'n_estimators': 443, 'learning_rate': 0.17879417644294449, 'max_depth': 10, 'subsample': 0.9895095972609319, 'colsample_bytree': 0.7398148661666221, 'gamma': 0.032919258958368, 'scale_pos_weight': 1.0074304060789339}
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00      5190
           1       0.99      1.00      1.00      5190

    accuracy                           1.00     10380
   macro avg       1.00      1.00      1.00     10380
weighted avg       1.00      1.00      1.00     10380

Confusion Matrix:
 [[5154   36]
 [   5 5185]]
AUC Score: 0.999835833695301


In [308]:
best_params = study.best_params
best_params

{'n_estimators': 443,
 'learning_rate': 0.17879417644294449,
 'max_depth': 10,
 'subsample': 0.9895095972609319,
 'colsample_bytree': 0.7398148661666221,
 'gamma': 0.032919258958368,
 'scale_pos_weight': 1.0074304060789339}

In [310]:

best_params = study.best_params

# Update best_params for binary classification
best_params.update({
    'use_label_encoder': False,
    'eval_metric': 'logloss',
    'objective': 'binary:logistic',
    'random_state': 35
})

# Scale  using MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Save the scaler
joblib.dump(scaler, "scaler.pkl")

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=35, stratify=y
)

# Train the model
final_model = XGBClassifier(**best_params)
final_model.fit(X_train, y_train)

# Save the trained model
joblib.dump(final_model, "xgb_final_model.pkl")

# Make predictions
y_pred = final_model.predict(X_val)
y_proba = final_model.predict_proba(X_val)[:, 1]

# Evaluate
print("Classification Report:\n", classification_report(y_val, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))
print("F1 Score:", f1_score(y_val, y_pred))
print("AUC Score:", roc_auc_score(y_val, y_proba))


Parameters: { "use_label_encoder" } are not used.



Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00      5190
           1       0.99      1.00      1.00      5190

    accuracy                           1.00     10380
   macro avg       1.00      1.00      1.00     10380
weighted avg       1.00      1.00      1.00     10380

Confusion Matrix:
 [[5152   38]
 [   8 5182]]
F1 Score: 0.995581171950048
AUC Score: 0.9998238794777269


In [311]:
import joblib
joblib.dump(final_model, 'xgb_final_model.pkl')


['xgb_final_model.pkl']

In [312]:
import numpy as np
from datetime import timedelta

def predict_overheat(model, recent_data, timestep, timestep_unit):
    """
    Predict if overheating will occur in the future time window.

    Args:
    - model: trained XGBoost model
    - recent_data: DataFrame containing most recent data rows (15-min interval)
    - timestep: integer (how many units forward to predict)
    - timestep_unit: "minute", "hour", "day"

    Returns:
    - prediction: 0 or 1 (1 means overheat will occur)
    - expected_overheat_time: estimated datetime of overheating (if any)
    """

    interval_minutes = 15  # dataset frequency
    future_minutes = timestep

    if timestep_unit == "hour":
        future_minutes = timestep * 60
    elif timestep_unit == "day":
        future_minutes = timestep * 24 * 60

    n_steps = future_minutes // interval_minutes

    # Ensure enough data
    if len(recent_data) < n_steps:
        return "Not enough data", None

    X_input = recent_data[-n_steps:]  # most recent 'n_steps' rows
    preds = model.predict(X_input)

    if 1 in preds:
        overheat_index = np.where(preds == 1)[0][0]
        overheat_time = X_input.index[overheat_index]
        return 1, overheat_time
    else:
        return 0, None


In [328]:
import pandas as pd
import numpy as np
import joblib
from sklearn.utils import shuffle

# Load your model and MinMax scaler
model = joblib.load("xgb_final_model.pkl")
scaler = joblib.load("scaler.pkl")

# Load dataset
data = pd.read_csv("final_test_dataset.csv")

# Add new rows (same number of features as input, without output column)
new_rows = [
    [22.059999, 19.5, 20.219999, 14.3, 36.160000, 26.029999, 58.0, 14.0, 5.064],
    [20.754999, 19.5, 18.920000, 9.1, 49.039997, 39.160000, 93.0, 13.0, 5.136],
    [19.365000, 20.5, 18.660000, 7.1, 39.270000, 34.309998, 87.0, 13.0, 4.956],
    [20.660000, 20.5, 17.520000, 12.3, 41.299999, 34.169998, 81.0, 9.0, 4.968],
    [20.330000, 20.5, 17.139999, 8.1, 39.040001, 33.130001, 80.0, 10.0, 4.968],
]

# Drop output column
recent_data = data.iloc[:, :-1].copy()

# Append new rows
new_df = pd.DataFrame(new_rows, columns=recent_data.columns)
recent_data = pd.concat([recent_data, new_df], ignore_index=True)

# Shuffle the dataset
recent_data = shuffle(recent_data, random_state=42).reset_index(drop=True)

# Start infinite loop
while True:
    print("\n🔁 Predict Overheat - Type 'exit' to quit")

    time_unit = input("Enter time unit (minute/hour/day): ").strip().lower()
    if time_unit == "exit":
        print("👋 Exiting system.")
        break

    try:
        time_value = int(input(f"Enter number of {time_unit}s: ").strip())
    except ValueError:
        print("❌ Invalid number. Try again.")
        continue

    # Convert time to number of 15-minute steps
    if time_unit == "minute":
        steps = time_value // 15
    elif time_unit == "hour":
        steps = (time_value * 60) // 15
    elif time_unit == "day":
        steps = (time_value * 24 * 60) // 15
    else:
        print("❌ Invalid time unit! Use minute/hour/day.")
        continue

    if steps <= 0:
        print("❌ Time value too low to form steps.")
        continue
    elif steps > len(recent_data):
        print(f"⚠️ Not enough data for {steps} steps. Using last {len(recent_data)} steps instead.")
        steps = len(recent_data)

    # Select last `steps` rows
    recent_steps = recent_data.tail(steps)

    # Drop unwanted columns (like "index" if it exists)
    X = recent_steps.drop(columns=["index"], errors="ignore")

    # Scale features
    X_scaled = scaler.transform(X)

    # Predict
    y_pred = model.predict(X_scaled)

    if np.any(y_pred == 1):
        print("⚠️ System will OVERHEAT in this interval!")
    else:
        print("✅ System will NOT overheat in this interval.")



🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): minute
Enter number of minutes: 30




✅ System will NOT overheat in this interval.

🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): hour
Enter number of hours: 6




✅ System will NOT overheat in this interval.

🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): day
Enter number of days: 1




✅ System will NOT overheat in this interval.

🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): day
Enter number of days: 2




✅ System will NOT overheat in this interval.

🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): day
Enter number of days: 3




✅ System will NOT overheat in this interval.

🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): day
Enter number of days: 4




✅ System will NOT overheat in this interval.

🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): day
Enter number of days: 5




⚠️ System will OVERHEAT in this interval!

🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): day
Enter number of days: 10




⚠️ System will OVERHEAT in this interval!

🔁 Predict Overheat - Type 'exit' to quit
Enter time unit (minute/hour/day): exit
👋 Exiting system.
