In [27]:
import os
from sqlalchemy import create_engine, types
import pandas as pd
import datetime

In [28]:
# 1. Connect to your database
db_url = os.getenv(
    "DATABASE_URL",
    "postgresql://wind_user:windy@localhost:5432/wind_db"
)
engine = create_engine(db_url)

In [29]:
# ──────────────────────────────────────────────────────────────────────────────
# 02. Load raw tables
# ──────────────────────────────────────────────────────────────────────────────
# Replace "wtg_dgr_data" & "wtg_model" with your actual table names if different
ops_df   = pd.read_sql_table("dgr_data", engine)
model_df = pd.read_sql_table("wtg_model",    engine)


In [30]:
# 5. Merge in capacity
df = ops_df.merge(
    model_df[["turbine_id", "capacity"]],
    on="turbine_id", how="left"
)

In [31]:
# 3. Define time-like columns
time_cols = ["operating_hrs","lull_hrs","fault_time","pm_shut_down","int_grid_down","ext_grid_down"]

In [37]:
# 3. A helper to turn anything “time‐like” into a float number of hours
def to_hours(val) -> float:
    if pd.isna(val) or val == "":
        return 0.0
    if isinstance(val, (int, float)):
        return float(val)
    # If it’s a datetime.time object:
    if isinstance(val, datetime.time):
        return val.hour + val.minute/60 + val.second/3600
    s = str(val).strip()
    # Try parsing as "HH:MM:SS AM/PM" or "HH:MM:SS"
    for fmt in ("%I:%M:%S %p", "%H:%M:%S", "%I:%M %p", "%H:%M"):
        try:
            dt = datetime.datetime.strptime(s, fmt)
            return dt.hour + dt.minute/60 + dt.second/3600
        except ValueError:
            continue
    # Fallback: if it’s just a number string
    try:
        return float(s)
    except ValueError:
        return 0.0

In [38]:
# 4. Overwrite each time‐column with its numeric hours version
for col in time_cols:
    df[col] = df[col].apply(to_hours)


In [39]:
# 5. Now compute downtime_hrs as a pure float
df["downtime_hrs"] = (
      df["fault_time"]
    + df["pm_shut_down"]
    + df["int_grid_down"]
    + df["ext_grid_down"]
)

In [40]:

# 6. Compute availability safely (all floats now)
df["availability"] = df["operating_hrs"] / (
    df["operating_hrs"] + df["downtime_hrs"]
)

# 7. Compute PLF %
df["plf_percent"] = df["gen_units"] / (df["capacity"] * 24.0) * 100

In [41]:
# 7. Compute turbine-level MTTR & MTBF
group = df.groupby("turbine_id").agg(
    total_operating_hrs = ("operating_hrs", "sum"),
    total_downtime_hrs  = ("downtime_hrs",   "sum"),
    fault_events        = ("downtime_hrs",   lambda s: (s > 0).sum())
).reset_index()

group["mttr"] = group["total_downtime_hrs"] / group["fault_events"].replace(0, pd.NA)
group["mtbf"] = group["total_operating_hrs"] / group["fault_events"].replace(0, pd.NA)


In [42]:
# 8. Merge MTTR/MTBF back into df
features = df.merge(
    group[["turbine_id", "mttr", "mtbf"]],
    on="turbine_id",
    how="left"
)

In [43]:
# 9. Persist back to Postgres
features.to_sql(
    "wtg_features",
    engine,
    if_exists="replace",
    index=False,
    dtype={
        "downtime_hrs": types.Float,
        "availability": types.Float,
        "plf_percent":  types.Float,
        "mttr":         types.Float,
        "mtbf":         types.Float,
    }
)

print("✅ Re-created 'wtg_features' with corrected downtime. Columns:")
print(features.columns.tolist())

✅ Re-created 'wtg_features' with corrected downtime. Columns:
['dgr_id_no', 'log_date', 'turbine_id', 'gen_units', 'operating_hrs', 'avg_wind_speed', 'lull_hrs', 'fault_time', 'pm_shut_down', 'int_grid_down', 'ext_grid_down', 'nor_1', 'nor_2', 'remarks', 'capacity', 'operating_hrs_hrs', 'lull_hrs_hrs', 'fault_time_hrs', 'pm_shut_down_hrs', 'int_grid_down_hrs', 'ext_grid_down_hrs', 'downtime_hrs', 'availability', 'plf_percent', 'mttr', 'mtbf']
