In [7]:
import sys
print(sys.path)


['/Users/allanzhang/Documents/GitHub/firecast_pipeline/examples', '/opt/anaconda3/lib/python312.zip', '/opt/anaconda3/lib/python3.12', '/opt/anaconda3/lib/python3.12/lib-dynload', '', '/opt/anaconda3/lib/python3.12/site-packages', '/opt/anaconda3/lib/python3.12/site-packages/aeosa', '/var/folders/v7/ppg9621s76ddb3zsdn52lt8m0000gn/T/tmpx0wtj1v3']


In [6]:
# ✅ Test paths
train_path = "example_data_train.xlsx"
test_path = "example_data_test.xlsx"

from regressorpipeline.train import train_fire_model, train_multiple_cnn_for_fire
from regressorpipeline.predict import (
    load_model_bundle,
    predict_fire_risk,
    predict_fire_risk_from_models,
)
from regressorpipeline.visualize import plot_fire_risk_surface_matplotlib
import pandas as pd
import numpy as np
import joblib
import torch 
import argparse
import pandas as pd
import numpy as np
import joblib
import torch
import statsmodels.api as sm
from sklearn.base import BaseEstimator


def load_model_bundle(model_path):
    bundle = joblib.load(model_path)
    models = bundle.get("models") or [bundle["model"]]
    scaler_X = bundle["scaler_X"]
    scaler_y = bundle["scaler_y"]
    feature_names = bundle.get("feature_names", None)
    return models, scaler_X, scaler_y, feature_names

def predict_fire_risk(model, scaler_X, scaler_y, input_path, feature_names=None, model_type=None):
    df = pd.read_excel(input_path, engine="openpyxl")
    X = np.log1p(df.select_dtypes(include=[np.number]))

    # Ensure feature alignment
    if feature_names is not None:
        X = X[feature_names]
    elif hasattr(scaler_X, "feature_names_in_"):
        X = X[scaler_X.feature_names_in_]

    X_scaled = scaler_X.transform(X)

    # PyTorch CNN
    if isinstance(model, torch.nn.Module) or model_type == "cnn":
        model.eval()
        with torch.no_grad():
            preds = model(torch.tensor(X_scaled, dtype=torch.float32).unsqueeze(1)).numpy()

    # statsmodels OLS
    elif isinstance(model, sm.regression.linear_model.RegressionResultsWrapper) or model_type == "ols":
        X_scaled = sm.add_constant(X_scaled, has_constant="add")
        preds = model.predict(X_scaled)

    # sklearn model
    else:
        preds = model.predict(X_scaled)

    preds = np.expm1(scaler_y.inverse_transform(preds.reshape(-1, 1))).ravel()
    return preds


def predict_fire_risk_from_models(models, scaler_X, scaler_y, input_path, feature_names=None, model_type=None):
    df = pd.read_excel(input_path, engine="openpyxl")
    X = np.log1p(df.select_dtypes(include=[np.number]))

    if feature_names is not None:
        X = X[feature_names]
    elif hasattr(scaler_X, "feature_names_in_"):
        X = X[scaler_X.feature_names_in_]

    X_scaled_raw = scaler_X.transform(X)
    preds_list = []

    for m in models:
        X_scaled = X_scaled_raw.copy()

        if isinstance(m, torch.nn.Module) or model_type == "cnn":
            m.eval()
            with torch.no_grad():
                pred = m(torch.tensor(X_scaled, dtype=torch.float32).unsqueeze(1)).numpy()
        elif isinstance(m, sm.regression.linear_model.RegressionResultsWrapper) or model_type == "ols":
            X_scaled = sm.add_constant(X_scaled, has_constant="add")
            pred = m.predict(X_scaled)
        else:
            pred = m.predict(X_scaled)

        preds_list.append(pred)

    avg_pred = np.mean(np.stack(preds_list, axis=0), axis=0)
    avg_pred = np.expm1(scaler_y.inverse_transform(avg_pred.reshape(-1, 1))).ravel()
    return avg_pred


# 🔧 1. Train and evaluate a single CNN model
model, metrics = train_fire_model("cnn", train_path, save=True)
print("Single CNN model metrics:", metrics)

# 🔧 2. Train CNN ensemble (3 runs) and save
models, metrics_list, ensemble_metrics = train_multiple_cnn_for_fire(
    train_path, n_runs=3, save=True
)
print("Ensemble metrics:", ensemble_metrics)

# 🔍 3. Load and test prediction (single CNN model)
models, scaler_X, scaler_y, feature_names = load_model_bundle("best_cnn_model.joblib")
preds = predict_fire_risk(
    models[0],
    scaler_X,
    scaler_y,
    test_path,
    feature_names=feature_names,
    model_type="cnn"
)
print("Single CNN Predictions:", preds[:5])

# 🔍 4. Load and test ensemble prediction
models, scaler_X, scaler_y, feature_names = load_model_bundle("cnn_ensemble.joblib")
avg_preds = predict_fire_risk_from_models(
    models,
    scaler_X,
    scaler_y,
    test_path,
    feature_names=feature_names,
    model_type="cnn"
)
print("Averaged Ensemble Predictions:", avg_preds[:5])

# 🔍 5. Load and test OLS model prediction
models, scaler_X, scaler_y, feature_names = load_model_bundle("best_ols_model.joblib")
ols_preds = predict_fire_risk(
    models[0],
    scaler_X,
    scaler_y,
    test_path,
    feature_names=feature_names,
    model_type="ols"
)
print("OLS Predictions:", ols_preds[:5])

# 📊 6. Generate CNN surface plot
df = pd.read_excel(test_path, engine="openpyxl")
X = np.log1p(df.select_dtypes(include="number"))
X = X[feature_names]  # Match training-time feature order
X_scaled_df = pd.DataFrame(
    scaler_X.transform(X),
    columns=feature_names
)

plot_fire_risk_surface_matplotlib(
    model=models[0],
    X_scaled_df=X_scaled_df,
    scaler_X=scaler_X,
    scaler_y=scaler_y,
    feat1_name="ThermalInertia",  # must match one of feature_names
    feat2_name="FuelLoadDensity",           # must match one of feature_names
    title="CNN Prediction Surface",
    save_path="examples/cnn_surface.html"
)
print("✅ Surface plot saved to examples/cnn_surface.html")


[I 2025-06-10 15:36:48,706] A new study created in memory with name: no-name-1dbdaf43-b20a-49ad-99aa-bdc093d190c2
[I 2025-06-10 15:36:48,797] Trial 0 finished with value: -0.7019086741775664 and parameters: {'num_filters1': 21, 'num_filters2': 53, 'fc1_size': 56, 'lr': 0.0016658762935723992}. Best is trial 0 with value: -0.7019086741775664.
[I 2025-06-10 15:36:48,830] Trial 1 finished with value: 5.872742568399288 and parameters: {'num_filters1': 24, 'num_filters2': 53, 'fc1_size': 39, 'lr': 0.0007672544663275102}. Best is trial 0 with value: -0.7019086741775664.
[I 2025-06-10 15:36:48,857] Trial 2 finished with value: -0.2162416408654405 and parameters: {'num_filters1': 25, 'num_filters2': 22, 'fc1_size': 92, 'lr': 0.0019361367232212546}. Best is trial 0 with value: -0.7019086741775664.
[I 2025-06-10 15:36:48,886] Trial 3 finished with value: -0.6072355671616028 and parameters: {'num_filters1': 8, 'num_filters2': 50, 'fc1_size': 46, 'lr': 0.00441475360968998}. Best is trial 0 with val


🔥 Model 'cnn' Evaluation:
R²: -0.2117
MAE: 0.1764
MSE: 0.0415
Single CNN model metrics: {'R²': -0.21168472251560355, 'MAE': 0.17638747, 'MSE': 0.041489262}


[I 2025-06-10 15:36:49,350] Trial 5 finished with value: 0.42671421287803324 and parameters: {'num_filters1': 22, 'num_filters2': 57, 'fc1_size': 78, 'lr': 0.006825170712019785}. Best is trial 1 with value: -0.265432044181232.
[I 2025-06-10 15:36:49,381] Trial 6 finished with value: 7.458017801676869 and parameters: {'num_filters1': 26, 'num_filters2': 45, 'fc1_size': 126, 'lr': 0.0002715345307540316}. Best is trial 1 with value: -0.265432044181232.
[I 2025-06-10 15:36:49,416] Trial 7 finished with value: 12.98674162656328 and parameters: {'num_filters1': 20, 'num_filters2': 63, 'fc1_size': 65, 'lr': 0.00013314443297515372}. Best is trial 1 with value: -0.265432044181232.
[I 2025-06-10 15:36:49,442] Trial 8 finished with value: 3.127709854947695 and parameters: {'num_filters1': 15, 'num_filters2': 27, 'fc1_size': 105, 'lr': 0.0007734264778643277}. Best is trial 1 with value: -0.265432044181232.
[I 2025-06-10 15:36:49,470] Trial 9 finished with value: 7.473788260133988 and parameters: {


🔥 CNN Ensemble Evaluation:
R²: 0.6607
MAE: 0.0968
MSE: 0.0116
Ensemble metrics: {'R²': 0.6607154768640405, 'MAE': 0.0968009505385254, 'MSE': 0.011617431574563783}
Single CNN Predictions: [4861.7197   4278.63       18.240555   94.019936  139.47878 ]
Averaged Ensemble Predictions: [4861.7197   4278.63       18.240555   94.019936  139.47878 ]
OLS Predictions: [23847.18709575  6598.14581948    24.10545527   101.12828635
    77.21530505]


TypeError: 'RegressionResultsWrapper' object is not callable

In [2]:
import joblib
bundle = joblib.load("best_cnn_model.joblib")
print(bundle.keys())
# Should include: model, scaler_X, scaler_y, feature_names
bundle.get("feature_names", None)  # Should return the list of feature names used during training

dict_keys(['model', 'scaler_X', 'scaler_y', 'feature_names'])


['T_ig',
 'ThermalInertia',
 'AverageEffectiveHeatofCombustion',
 'EffectiveHeatofGasification',
 'FuelLoadDensity']

In [3]:
out = load_model_bundle("best_cnn_model.joblib")
out

([CNNModel(
    (conv1): Conv1d(1, 31, kernel_size=(3,), stride=(1,), padding=(1,))
    (conv2): Conv1d(31, 41, kernel_size=(3,), stride=(1,), padding=(1,))
    (relu): ReLU()
    (fc2): Linear(in_features=53, out_features=1, bias=True)
    (fc1): Linear(in_features=205, out_features=53, bias=True)
  )],
 MinMaxScaler(),
 MinMaxScaler())