In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# -------------------------------------------------------
# 1. Load historical machine data
# -------------------------------------------------------
DATA_PATH = "Transformer_Data_Merged.csv"
df = pd.read_csv(DATA_PATH)

# -------------------------------------------------------
# 2. Basic data cleaning
# -------------------------------------------------------
df = df.fillna(0)

# Avoid divide-by-zero
df["Scheduled_Hours"] = df["Scheduled_Hours"].replace(0, 1)
df["MTBF_Hours"] = df["MTBF_Hours"].replace(0, 1)
df["MTTR_Hours"] = df["MTTR_Hours"].replace(0, 1)

# -------------------------------------------------------
# 3. Feature engineering (NEW RL-relevant features)
# -------------------------------------------------------

# Reliability indicators
df["Failure_Rate"] = 1.0 / df["MTBF_Hours"]
df["Repair_Efficiency"] = 1.0 / df["MTTR_Hours"]

# Operational stress
df["Utilization_Stress"] = df["Utilization_Rate"] * df["Scheduled_Hours"]

# Downtime severity
df["Downtime_Ratio"] = df["Downtime_Duration"] / df["Scheduled_Hours"]

# Cost intensity
df["Maintenance_Cost_Rate"] = (
    df["Maintenance_Parts_Cost"] / (df["Scheduled_Hours"] + 1)
)
df["Energy_Cost_Rate"] = (
    df["Energy_Consumption_kWh"] / (df["Output_Quantity"] + 1)
)

# Production quality degradation
df["Reject_Rate"] = (
    df["Reject_Quantity"] / (df["Output_Quantity"] + 1)
)

# Health index (compact summary signal)
df["Health_Index"] = (
    0.35 * df["Uptime_Percentage"] +
    0.25 * (1 - df["Failure_Rate"]) +
    0.20 * (1 - df["Downtime_Ratio"]) +
    0.20 * (1 - df["Reject_Rate"])
)

# -------------------------------------------------------
# 4. Define RL state features
# -------------------------------------------------------
state_features = [
    "Health_Index",
    "Failure_Rate",
    "Repair_Efficiency",
    "Utilization_Stress",
    "Downtime_Ratio",
    "Maintenance_Cost_Rate",
    "Energy_Cost_Rate",
    "Reject_Rate",
    "Number_of_Breakdowns"
]

state_df = df[state_features].copy()

# -------------------------------------------------------
# 5. Normalize states (important for stable learning)
# -------------------------------------------------------
scaler = MinMaxScaler()
state_array = scaler.fit_transform(state_df

# -------------------------------------------------------
# 6. Final output
# -------------------------------------------------------
print("State vector shape:", state_array.shape)
print("Sample state vector (S_t):\n", state_array[0])

# Ready-to-use RL states
# state_array[t] â†’ input to Q-learning / SARSA


[[0.94408808 0.01335515 0.42948975 ... 0.04141286 0.02030558 0.25      ]
 [0.77525063 0.0331225  0.22498554 ... 0.04331836 0.05013536 0.375     ]
 [0.70555561 0.05318945 0.42223981 ... 0.03536911 0.04539247 0.5       ]
 ...
 [0.89469432 0.08097612 0.75708502 ... 0.0233932  0.04044751 0.875     ]
 [0.93029    0.0136832  0.27803803 ... 0.04871743 0.03600356 0.25      ]
 [0.88036312 0.00147986 0.45466075 ... 0.02640509 0.02835466 0.        ]]
State vector shape: (2002, 9)
Sample state vector (S_t):
 [0.94408808 0.01335515 0.42948975 0.93250551 0.06749449 0.53643725
 0.04141286 0.02030558 0.25      ]
