In [17]:
import numpy as np
import dice_ml
from dice_ml import Dice
import pandas as pd
import xgboost as xgb
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
# ✅ 1. Load the saved model
model_path = "C:/Users/hangang/Desktop/best_xgb_model_19_features.pkl"
with open(model_path, 'rb') as f:
    model = pickle.load(f)

In [None]:
# ✅ 2. Load data
data_path = "C:/Users/hangang/Desktop/sci/02. LC-OCD for algal bloom/1_raw data/01_data_full.csv"
data = pd.read_csv(data_path, encoding='utf-8')

In [None]:
# ✅ 3. Used variables when model training
shap_top_features = ['TN', 'NO3-N', 'BOD', 'BODTOC ratio', 'Molecularity', 'SR', 'TOC', 'WT', 'DO', 'SS',
                     'HS-N', 'NH3-N', 'HS', 'COD', 'PO4-P', 'pH', 'S275-295', 'Aromaticity', 'EC']

In [None]:
# ✅ 4. Available variables from data
available_features = [feature for feature in shap_top_features if feature in data.columns]
X = data[available_features]
y = data['Chl-a']

In [None]:
# ✅ 5. Set the DiCE model
full_data = data[available_features + ['Chl-a']]
d = dice_ml.Data(dataframe=full_data, continuous_features=available_features, outcome_name='Chl-a')
m = dice_ml.Model(model=model, backend="sklearn", model_type='regressor')
exp = Dice(d, m)

In [None]:
# ✅ 6. Generating Counterfactual Scenarios for Chl-a > 12.2 cases
cf_results = []

desired_value = 12.2
high_chla_data = full_data[full_data['Chl-a'] > desired_value].copy()
high_chla_X = high_chla_data[available_features]

for idx, row in high_chla_X.iterrows():
    query_instance = row.to_frame().T
    try:
        pred = model.predict(query_instance)[0]
        query_instance_with_pred = query_instance.copy()
        query_instance_with_pred['Chl-a'] = pred

        cf = exp.generate_counterfactuals(query_instance, total_CFs=20, desired_range=[0, desired_value])
        cf_df = cf.cf_examples_list[0].final_cfs_df

        combined = pd.concat([query_instance_with_pred, cf_df], ignore_index=True)
        combined['original_index'] = idx
        cf_results.append(combined)

    except Exception as e:
        print(f"⚠️ error (index {idx}): {e}")
        continue

In [None]:
# ✅ 7. Save results
if cf_results:
    all_cf_df = pd.concat(cf_results, ignore_index=True)
    save_path = "C:/Users/hangang/Desktop/6_Counterfactual explanation/counterfactual_results_all.csv"
    all_cf_df.to_csv(save_path, index=False, encoding='utf-8-sig')
    print(f"✅ Finish the saving All Counterfactual results: {save_path}")
