In [None]:
from __future__ import print_function
import os, json, numpy as np, pandas as pd, tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

pd.options.display.float_format = '{:.3f}'.format

train_candidates = [
    "/mnt/data/bof_train.csv",
    "/content/bof_train.csv",
    "bof_train.csv",
    "https://raw.githubusercontent.com/nichepah/ml-colab/master/bof_train.csv",
]
test_candidates = [
    "/mnt/data/bof_test.csv",
    "/content/bof_test.csv",
    "bof_test.csv",
    "https://raw.githubusercontent.com/nichepah/ml-colab/master/bof_test.csv",
]

FEATURES = ["scrap","silicon","Mn","C","S","P","hot_metal_weight","blow_duration","lime","iron_ore","dolo","sinter","O2","basicity","FeO"]

def _read_first_ok(cands):
    last_e = None
    for p in cands:
        try:
            return pd.read_csv(p, sep=",")
        except Exception as e:
            last_e = e
    raise RuntimeError(str(last_e))

def _force_numeric(df):
    for c in FEATURES + (["temp"] if "temp" in df.columns else []):
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")
    return df

bof_temp_dataframe = _read_first_ok(train_candidates)
bof_temp_dataframe = _force_numeric(bof_temp_dataframe)
bof_temp_dataframe = bof_temp_dataframe.dropna(subset=["hot_metal_weight","temp"])
bof_temp_dataframe = bof_temp_dataframe.dropna(subset=FEATURES)
bof_temp_dataframe = bof_temp_dataframe[bof_temp_dataframe.hot_metal_weight > 90]
bof_temp_dataframe = bof_temp_dataframe[bof_temp_dataframe.temp > 1200]
bof_temp_dataframe = bof_temp_dataframe[(bof_temp_dataframe.blow_duration > 10) & (bof_temp_dataframe.blow_duration < 50)]
bof_temp_dataframe = bof_temp_dataframe.reindex(np.random.permutation(bof_temp_dataframe.index)).reset_index(drop=True)

def preprocess_features(df):
    x = df[FEATURES].copy()
    x["hot_metal_weight"] = df["hot_metal_weight"] * 1000.0
    return x

def preprocess_targets(df):
    return df[["temp"]].copy()

training_examples = preprocess_features(bof_temp_dataframe.head(1000))
training_targets = preprocess_targets(bof_temp_dataframe.head(1000))
validation_examples = preprocess_features(bof_temp_dataframe.tail(320))
validation_targets = preprocess_targets(bof_temp_dataframe.tail(320))

X_train = training_examples.values.astype(np.float32)
y_train = training_targets["temp"].values.astype(np.float32)
X_val = validation_examples.values.astype(np.float32)
y_val = validation_targets["temp"].values.astype(np.float32)

mu = X_train.mean(axis=0)
sigma = X_train.std(axis=0)
sigma = np.where(sigma < 1e-6, 1.0, sigma)

X_train_s = (X_train - mu) / sigma
X_val_s = (X_val - mu) / sigma

model = keras.Sequential([
    layers.Input(shape=(15,)),
    layers.Dense(15, activation="relu"),
    layers.Dense(10, activation="relu"),
    layers.Dense(5, activation="relu"),
    layers.Dense(1)
])

opt = keras.optimizers.SGD(learning_rate=0.01, clipnorm=5.0)
model.compile(optimizer=opt, loss="mse", metrics=[keras.metrics.RootMeanSquaredError()])
model.fit(X_train_s, y_train, validation_data=(X_val_s, y_val), epochs=200, batch_size=20, verbose=1)

bof_temp_test_data = _read_first_ok(test_candidates)
bof_temp_test_data = _force_numeric(bof_temp_test_data)
bof_temp_test_data = bof_temp_test_data.dropna(subset=FEATURES)

test_examples = preprocess_features(bof_temp_test_data)
X_test = test_examples.values.astype(np.float32)
X_test_s = (X_test - mu) / sigma
pred = model.predict(X_test_s, verbose=0).reshape(-1)

out_df = bof_temp_test_data.copy()
out_df["p_temp"] = pred

rmse = None
if "temp" in out_df.columns:
    y_true = pd.to_numeric(out_df["temp"], errors="coerce").values.astype(np.float32)
    m = np.isfinite(y_true) & np.isfinite(pred)
    if m.any():
        rmse = float(np.sqrt(np.mean((pred[m] - y_true[m])**2)))

UNITS = {"scrap":"tonne","silicon":"%","Mn":"%","C":"%","S":"%","P":"%","hot_metal_weight":"tonne","blow_duration":"min","lime":"kg","iron_ore":"kg","dolo":"kg","sinter":"kg","O2":"Nm3","basicity":"ratio","FeO":"%"}
ranges = {}
df_clean_ui = bof_temp_dataframe.copy()
for f in FEATURES:
    ranges[f] = {"min": float(df_clean_ui[f].min()), "max": float(df_clean_ui[f].max())}

os.makedirs("artifacts", exist_ok=True)
model.save("artifacts/bof_temp_model.keras")
out_df.to_csv("artifacts/pred.csv", index=False)

schema = {
  "features_ordered": FEATURES,
  "units": UNITS,
  "ranges": ranges,
  "transforms": {
      "hot_metal_weight": "UI in tonne; internally multiplied by 1000 (tonne→kg)",
      "standardization": "Inputs standardized using training mean/std (z-score) for stable inference"
  },
  "standardization_params": {
      "mean": [float(x) for x in mu.tolist()],
      "std": [float(x) for x in sigma.tolist()]
  },
  "target": {"name":"temp","unit":"C"},
  "training": {
      "tf_version": tf.__version__,
      "rows_cleaned": int(len(bof_temp_dataframe)),
      "train_rows": int(len(training_examples)),
      "val_rows": int(len(validation_examples)),
      "val_rmse_last_epoch": float(model.evaluate(X_val_s, y_val, verbose=0)[1])
  }
}

with open("artifacts/feature_schema.json","w") as fp:
    json.dump(schema, fp, indent=2)

print("TF", tf.__version__)
print("X_train", X_train.shape, "y_train", y_train.shape)
print("X_val", X_val.shape, "y_val", y_val.shape)
print("pred[:5]", pred[:5])
print("test_rmse", rmse)
print("saved:", "artifacts/bof_temp_model.keras", "artifacts/feature_schema.json", "artifacts/pred.csv")


Epoch 1/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 2688099.7500 - root_mean_squared_error: 1639.4983 - val_loss: 2216762.7500 - val_root_mean_squared_error: 1488.8796
Epoch 2/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1776857.2500 - root_mean_squared_error: 1328.1040 - val_loss: 250649.7031 - val_root_mean_squared_error: 500.6493
Epoch 3/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 90629.6875 - root_mean_squared_error: 297.6214 - val_loss: 179610.1250 - val_root_mean_squared_error: 423.8044
Epoch 4/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 18806.5000 - root_mean_squared_error: 135.1736 - val_loss: 99005.7266 - val_root_mean_squared_error: 314.6518
Epoch 5/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 11027.3242 - root_mean_squared_error: 104.6598 - val_loss: 77001.0156 - val_root_me