In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import rasterio
import os
import sys
import time
import xarray as xr
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from Water_Blance_Model import mYWBMnlS, abcdnlS, DWBMnlS, abcdnlS_RE, DWBMnlS_RE, mYWBMnlS_RE
from Rewrite_Func import nash_sutcliffe_efficiency, relative_error, kling_gupta_efficiency
from numba import float64, njit
from numba.experimental import jitclass
from netCDF4 import Dataset
from scipy.spatial.distance import mahalanobis
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import pairwise_distances
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor, XGBClassifier
from scipy.ndimage import median_filter
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.notebook import trange

# 定义流域信息

In [2]:
# 读取流域信息
basin_info      = pd.read_excel('../../Data/Basin_Selection/All_Selected_Basins.xlsx')
basin_list      = basin_info['stat_num']
cali_start_list = basin_info['cali_start']
cali_end_list   = basin_info['cali_end']
vali_start_list = basin_info['vali_start']
vali_end_list   = basin_info['vali_end']

# 定义数据读取函数

In [3]:
# 集总式模型数据读取
def get_data_lumped(basin, basin_idx):
    filepath = f"../../../2025_03_Hydrological_Models/Data/New_Hydro_Climatic/NHC_{basin}.txt"
    hc_data = pd.read_csv(filepath, sep = '\t', header=0, index_col='Time', parse_dates=['Time'])
    cali_start = pd.to_datetime(f"{str(cali_start_list[basin_idx])}-01-01")
    cali_end   = pd.to_datetime(f"{str(cali_end_list[basin_idx])}-12-31")
    vali_start = pd.to_datetime(f"{str(vali_start_list[basin_idx])}-01-01")
    vali_end   = pd.to_datetime(f"{str(vali_end_list[basin_idx])}-12-31")

    cali_data = hc_data.loc[cali_start : cali_end]
    vali_data = hc_data.loc[vali_start : vali_end]

    x_cali = cali_data[['PRE_CRU', 'TMP_CRU', 'PET_CRU']].to_numpy()
    y_cali = cali_data['RUN'].to_numpy()
    x_vali = vali_data[['PRE_CRU', 'TMP_CRU', 'PET_CRU']].to_numpy()
    y_vali = vali_data['RUN'].to_numpy()
    return x_cali, y_cali, x_vali, y_vali

# 获取流域属性

In [4]:
Basin_Properties = pd.read_csv("../../Data/Properties/Basin_Properties.txt", sep = '\t', header=0, index_col='stat_num')

# 获取率定参数

In [5]:
params_mYWBM   = pd.read_csv("../../Data/Params/03_mYWBM_Best_Params_CF.txt", sep = '\t', header=0, index_col='stat_num')
params_abcd    = pd.read_csv("../../Data/Params/03_abcd_Best_Params_CF.txt", sep = '\t', header=0, index_col='stat_num')
params_DWBM    = pd.read_csv("../../Data/Params/03_DWBM_Best_Params_CF.txt", sep = '\t', header=0, index_col='stat_num')
params_GmYWBM  = pd.read_csv("../../Data/Params/03_GmYWBM_Best_Params_CF.txt", sep = '\t', header=0, index_col='stat_num')

# 参数移植方法

## 地理临近法（空间距离相似）

In [6]:
def get_params_by_SP_AM(basin_properties, params, N, lon, lat):
    """
    使用空间临近法（地理临近法）算术平均为目标流域获取参数。
    
    参数：
    -----------
    basin_properties : np.ndarray
        所有流域属性的数组，形状为 (n_basins, 2)，第0列为经度，第1列为纬度。
    params : np.ndarray
        所有流域参数的数组，形状为 (n_basins, n_params)。
    N : int
        用于参数移植的供体流域数量
    lon : float
        目标流域中心点的经度
    lat : float
        目标流域中心点的纬度
    
    返回：
    --------
    np.ndarray
        目标流域的参数（N个最近流域参数的平均值）。
    """
    # 提取经纬度
    longitudes = basin_properties[:, 0]
    latitudes = basin_properties[:, 1]

    # 计算欧氏距离
    distances = np.sqrt((longitudes - lon) ** 2 + (latitudes - lat) ** 2)
    
    # 找到距离最近的 N 个流域的索引
    sorted_indices = np.argsort(distances)
    # 如果第一个距离非常小，认为是目标流域本身
    if distances[sorted_indices[0]] <= 1e-5:
        return params[sorted_indices[0]]
    
    # 获取最近的 N 个流域参数
    nearest_params = params[sorted_indices[:N]]
    
    # 返回参数平均值
    return nearest_params.mean(axis=0)

def get_params_by_SP_IDW(basin_properties, params, N, lon, lat):
    """
    使用空间临近法（地理临近法）反距离加权平均为目标流域获取参数。
    
    参数：
    -----------
    basin_properties : np.ndarray
        所有流域属性的数组，形状为 (n_basins, 2)，第0列为经度，第1列为纬度。
    params : np.ndarray
        所有流域参数的数组，形状为 (n_basins, n_params)。
    N : int
        用于参数移植的供体流域数量
    lon : float
        目标流域中心点的经度
    lat : float
        目标流域中心点的纬度
    
    返回：
    --------
    np.ndarray
        目标流域的参数（N个最近流域参数的平均值）。
    """
    # 提取经纬度
    longitudes = basin_properties[:, 0]
    latitudes = basin_properties[:, 1]

    # 计算欧氏距离
    distances = np.sqrt((longitudes - lon) ** 2 + (latitudes - lat) ** 2)
    
    # 找到距离最近的 N 个流域的索引
    sorted_indices = np.argsort(distances)
    # 如果第一个距离非常小，认为是目标流域本身
    if distances[sorted_indices[0]] <= 1e-5:
        return params[sorted_indices[0]]
    
    # 获取最近的 N 个流域参数和距离
    nearest_params = params[sorted_indices[:N]]
    nearest_distances = distances[sorted_indices[:N]]

    # 计算反距离权重
    weights = 1 / (nearest_distances + 1e-10)  # 防止除以零
    weights /= weights.sum()

    # 返回加权平均值
    return (weights[:, np.newaxis] * nearest_params).sum(axis=0)

## 物理属性相似

In [7]:
def calculate_similarity(target_properties, basin_properties, method='mahalanobis'):
    """
    计算目标流域与其他流域之间的相似性。
    可以使用马氏距离或余弦相似度。
    
    参数：
    -----------
    target_properties: 目标流域的属性 (1x19 numpy 数组或 pandas Series)
    basin_properties: 所有流域的属性 (N x 19 numpy 数组或 pandas DataFrame)
    method: 计算相似度的方法 ('mahalanobis' 或 'cosine')
    
    返回：
    --------
    list: 按相似性排序的流域索引
    """
    if method == 'mahalanobis':
        # 计算马氏距离
        covariance_matrix = np.cov(basin_properties.T)
        inverse_cov_matrix = np.linalg.inv(covariance_matrix)
        similarities = []
        
        for basin in basin_properties:
            dist = mahalanobis(np.squeeze(target_properties), basin, inverse_cov_matrix)
            similarities.append(dist)
    
    elif method == 'cosine':
        # 计算余弦相似度
        similarities = pairwise_distances([target_properties], basin_properties, metric='cosine')[0]
    
    # 按相似度排序（升序）
    sorted_indices = np.argsort(similarities)
    return sorted_indices

def get_params_by_PS(basin_properties, params, target_properties, N=5, method='mahalanobis'):
    """
    使用物理相似性方法为目标流域获取参数。
    
    参数：
    -----------
    basin_properties: 所有流域的属性，包含19个属性 (numpy 数组或 pandas DataFrame)
    params: 所有流域的参数 (numpy 数组或 pandas DataFrame)
    target_properties: 目标流域的属性 (1x19 numpy 数组或 pandas Series)
    N: 用于参数移植的相似流域数量
    method: 计算相似度的方法 ('mahalanobis' 或 'cosine')
    
    返回：
    --------
    numpy.array: 目标流域的参数（N个最相似流域的参数平均值）
    """
    # 标准化流域属性
    scaler = StandardScaler()
    basin_properties_scaled = scaler.fit_transform(basin_properties)
    target_properties_scaled = scaler.transform(target_properties)  # 标准化目标流域属性
    
    # 使用PCA选择最重要的属性
    pca = PCA(n_components=0.95)  # 保留95%的方差
    basin_properties_pca = pca.fit_transform(basin_properties_scaled)
    target_properties_pca = pca.transform(target_properties_scaled)
    
    # 计算目标流域与所有供体流域之间的相似性
    similar_basins = calculate_similarity(target_properties_pca, basin_properties_pca, method)
    
    # 选择N个最相似的供体流域
    selected_params = params[similar_basins[:N]]
    
    # 返回N个供体流域参数的平均值作为目标流域的参数
    target_params = np.mean(selected_params, axis=0)
    
    return target_params

In [8]:
# basin_properties = np.random.rand(100, 19)  # 示例数据
# params = np.random.rand(100, 5)  # 假设每个流域有5个参数
# target_properties = np.random.rand(1, 19)  # 目标流域的19个属性

# # 使用流域物理属性相似法获取目标流域的参数
# target_params = get_params_by_physical_similarity(basin_properties, params, target_properties, N=5, method='mahalanobis')
# print("Physical Similarity Predicted Parameters:", target_params)

## 回归模型

In [9]:
def train_random_forest(basin_properties_scaled, params):
    """
    训练随机森林回归模型（多目标回归）
    
    参数：
    - basin_properties_scaled: 所有流域的标准化后的属性，包含19个属性 (numpy 数组或 pandas DataFrame)
    - params: 所有流域的参数 (numpy 数组或 pandas DataFrame)
    
    返回：
    - trained_rf_model: 训练好的随机森林回归模型
    """
    # 初始化并训练模型
    rf_model = MultiOutputRegressor(RandomForestRegressor(n_estimators      = 100, 
                                                          max_depth         = 10,
                                                          min_samples_split = 5,
                                                          min_samples_leaf  = 2,
                                                          random_state      = 42,
                                                          n_jobs            = -1), n_jobs=-1)
    rf_model.fit(basin_properties_scaled, params)
    
    return rf_model

def train_svm(basin_properties_scaled, params):
    """
    训练支持向量回归模型（多目标回归）
    
    参数：
    - basin_properties_scaled: 所有流域的标准化后的属性，包含19个属性 (numpy 数组或 pandas DataFrame)
    - params: 所有流域的参数 (numpy 数组或 pandas DataFrame)
    
    返回：
    - trained_svm_model: 训练好的支持向量机回归模型
    """    
    # 初始化并训练模型
    svr_model = MultiOutputRegressor(SVR(kernel     = 'rbf',
                                         C          = 100,
                                         epsilon    = 0.01,
                                         gamma      = 0.1), n_jobs=-1)
    svr_model.fit(basin_properties_scaled, params)
    
    return svr_model

def train_xgboost(basin_properties_scaled, params):
    """
    训练XGBoost回归模型（多目标回归）
    
    参数：
    - basin_properties_scaled: 所有流域的标准化后的属性，包含19个属性 (numpy 数组或 pandas DataFrame)
    - params: 所有流域的参数 (numpy 数组或 pandas DataFrame)
    
    返回：
    - trained_xgb_model: 训练好的XGBoost回归模型
    """
    # 初始化并训练模型
    xgb_model = MultiOutputRegressor(XGBRegressor(n_estimators      = 100,
                                                  learning_rate     = 0.1,
                                                  max_depth         = 6,
                                                  min_child_weight  = 3,
                                                  gamma             = 0.1,
                                                  colsample_bytree  = 0.8,
                                                  subsample         = 0.8,
                                                  reg_alpha         = 0.1,
                                                  random_state      = 42,
                                                  n_jobs            = -1), n_jobs=-1)
    xgb_model.fit(basin_properties_scaled, params)
    
    return xgb_model

def get_params_by_regression(target_properties_scaled, trained_models):
    """
    使用训练好的回归模型根据目标流域的属性预测多个参数。
    
    参数：
    - target_properties_scaled: 目标流域的标准化后的属性 (1x19 numpy 数组或 pandas Series)
    - model: 训练好的回归模型（可以是随机森林、支持向量机或XGBoost）
    - scaler: 用于标准化的Scaler
    
    返回：
    - predicted_params: 模型预测的多个参数
    """
    # 使用模型进行预测
    predicted_params = trained_models.predict(target_properties_scaled)
    return np.squeeze(predicted_params)

In [10]:
# # 示例数据
# basin_properties = np.random.rand(100, 19)  # 100个流域的19个属性数据
# params = np.random.rand(100, 5)  # 100个流域的参数数据，每个流域有5个参数
# target_properties = np.random.rand(1, 19)  # 目标流域的19个属性数据

# # 数据标准化
# scaler = StandardScaler()
# basin_properties_scaled = scaler.fit_transform(basin_properties)
# target_properties_scaled = scaler.transform(target_properties)

# rf_model  = train_random_forest(basin_properties_scaled, params)
# svm_model = train_svm(basin_properties_scaled, params)
# xgb_model = train_xgboost(basin_properties_scaled, params)

# rf_pred   = get_params_by_regression(target_properties_scaled, rf_model)
# svm_pred  = get_params_by_regression(target_properties_scaled, svm_model)
# xgb_pred  = get_params_by_regression(target_properties_scaled, xgb_model)
# print("Random Forest Predicted Parameters:", rf_pred)
# print("SVM Predicted Parameters:", svm_pred)
# print("XGBoost Predicted Parameters:", xgb_pred)

# 移植模型参数并计算效率系数

## 定义结果数组

### 模型运行效率

In [None]:
# mYWBM模型结果数组
cali_kge_YM_list = np.full((len(basin_list), 6), np.nan)
vali_kge_YM_list = np.full((len(basin_list), 6), np.nan)
cali_re_YM_list  = np.full((len(basin_list), 6), np.nan)
vali_re_YM_list  = np.full((len(basin_list), 6), np.nan)
# abcd模型结果数组
cali_kge_AM_list = np.full((len(basin_list), 6), np.nan)
vali_kge_AM_list = np.full((len(basin_list), 6), np.nan)
cali_re_AM_list  = np.full((len(basin_list), 6), np.nan)
vali_re_AM_list  = np.full((len(basin_list), 6), np.nan)
# DWBM模型结果数组
cali_kge_DM_list = np.full((len(basin_list), 6), np.nan)
vali_kge_DM_list = np.full((len(basin_list), 6), np.nan)
cali_re_DM_list  = np.full((len(basin_list), 6), np.nan)
vali_re_DM_list  = np.full((len(basin_list), 6), np.nan)

In [None]:
cali_kge_YM_list = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_mYWBM_Calibration_KGE.txt", sep = '\t', header=0, index_col=0).to_numpy()
vali_kge_YM_list = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_mYWBM_Validation_KGE.txt", sep = '\t', header=0, index_col=0).to_numpy()
cali_re_YM_list  = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_mYWBM_Calibration_RE.txt", sep = '\t', header=0, index_col=0).to_numpy()
vali_re_YM_list  = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_mYWBM_Validation_RE.txt", sep = '\t', header=0, index_col=0).to_numpy()

cali_kge_AM_list = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_abcd_Calibration_KGE.txt", sep = '\t', header=0, index_col=0).to_numpy()
vali_kge_AM_list = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_abcd_Validation_KGE.txt", sep = '\t', header=0, index_col=0).to_numpy()
cali_re_AM_list  = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_abcd_Calibration_RE.txt", sep = '\t', header=0, index_col=0).to_numpy()
vali_re_AM_list  = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_abcd_Validation_RE.txt", sep = '\t', header=0, index_col=0).to_numpy()

cali_kge_DM_list  = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_DWBM_Calibration_KGE.txt", sep = '\t', header=0, index_col=0).to_numpy()
vali_kge_DM_list  = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_DWBM_Validation_KGE.txt", sep = '\t', header=0, index_col=0).to_numpy()
cali_re_DM_list   = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_DWBM_Calibration_RE.txt", sep = '\t', header=0, index_col=0).to_numpy()
vali_re_DM_list   = pd.read_csv("../../Results/Metrics_Basin_Params_Transplant/1_DWBM_Validation_RE.txt", sep = '\t', header=0, index_col=0).to_numpy()

### 移植的参数

In [None]:
params_YM_SP_AM     = np.full((len(basin_list), 5), np.nan)
params_YM_SP_IDW    = np.full((len(basin_list), 5), np.nan)
params_YM_PS        = np.full((len(basin_list), 5), np.nan)
params_YM_rf        = np.full((len(basin_list), 5), np.nan)
params_YM_svm       = np.full((len(basin_list), 5), np.nan)
params_YM_xgb       = np.full((len(basin_list), 5), np.nan)

params_AM_SP_AM     = np.full((len(basin_list), 5), np.nan)
params_AM_SP_IDW    = np.full((len(basin_list), 5), np.nan)
params_AM_PS        = np.full((len(basin_list), 5), np.nan)
params_AM_rf        = np.full((len(basin_list), 5), np.nan)
params_AM_svm       = np.full((len(basin_list), 5), np.nan)
params_AM_xgb       = np.full((len(basin_list), 5), np.nan)

params_DM_SP_AM     = np.full((len(basin_list), 5), np.nan)
params_DM_SP_IDW    = np.full((len(basin_list), 5), np.nan)
params_DM_PS        = np.full((len(basin_list), 5), np.nan)
params_DM_rf        = np.full((len(basin_list), 5), np.nan)
params_DM_svm       = np.full((len(basin_list), 5), np.nan)
params_DM_xgb       = np.full((len(basin_list), 5), np.nan)

params_GYM_SP_AM    = np.full((len(basin_list), 5), np.nan)
params_GYM_SP_IDW   = np.full((len(basin_list), 5), np.nan)
params_GYM_PS       = np.full((len(basin_list), 5), np.nan)
params_GYM_rf       = np.full((len(basin_list), 5), np.nan)
params_GYM_svm      = np.full((len(basin_list), 5), np.nan)
params_GYM_xgb      = np.full((len(basin_list), 5), np.nan)

In [None]:
params_YM_SP_AM     = pd.read_csv("../../Results/Params_Transplant/mYWBM_SP_AM_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_YM_SP_IDW    = pd.read_csv("../../Results/Params_Transplant/mYWBM_SP_IDW_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_YM_PS        = pd.read_csv("../../Results/Params_Transplant/mYWBM_PS_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_YM_rf        = pd.read_csv("../../Results/Params_Transplant/mYWBM_RF_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_YM_svm       = pd.read_csv("../../Results/Params_Transplant/mYWBM_SVM_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_YM_xgb       = pd.read_csv("../../Results/Params_Transplant/mYWBM_XGB_Params.txt", sep='\t', header=0, index_col=0).to_numpy()

params_AM_SP_AM     = pd.read_csv("../../Results/Params_Transplant/abcd_SP_AM_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_AM_SP_IDW    = pd.read_csv("../../Results/Params_Transplant/abcd_SP_IDW_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_AM_PS        = pd.read_csv("../../Results/Params_Transplant/abcd_PS_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_AM_rf        = pd.read_csv("../../Results/Params_Transplant/abcd_RF_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_AM_svm       = pd.read_csv("../../Results/Params_Transplant/abcd_SVM_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_AM_xgb       = pd.read_csv("../../Results/Params_Transplant/abcd_XGB_Params.txt", sep='\t', header=0, index_col=0).to_numpy()

params_DM_SP_AM     = pd.read_csv("../../Results/Params_Transplant/DWBM_SP_AM_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_DM_SP_IDW    = pd.read_csv("../../Results/Params_Transplant/DWBM_SP_IDW_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_DM_PS        = pd.read_csv("../../Results/Params_Transplant/DWBM_PS_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_DM_rf        = pd.read_csv("../../Results/Params_Transplant/DWBM_RF_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_DM_svm       = pd.read_csv("../../Results/Params_Transplant/DWBM_SVM_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_DM_xgb       = pd.read_csv("../../Results/Params_Transplant/DWBM_XGB_Params.txt", sep='\t', header=0, index_col=0).to_numpy()

params_GYM_SP_AM     = pd.read_csv("../../Results/Params_Transplant/GmYWBM_SP_AM_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_GYM_SP_IDW    = pd.read_csv("../../Results/Params_Transplant/GmYWBM_SP_IDW_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_GYM_PS        = pd.read_csv("../../Results/Params_Transplant/GmYWBM_PS_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_GYM_rf        = pd.read_csv("../../Results/Params_Transplant/GmYWBM_RF_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_GYM_svm       = pd.read_csv("../../Results/Params_Transplant/GmYWBM_SVM_Params.txt", sep='\t', header=0, index_col=0).to_numpy()
params_GYM_xgb       = pd.read_csv("../../Results/Params_Transplant/GmYWBM_XGB_Params.txt", sep='\t', header=0, index_col=0).to_numpy()

## 参数移植数据获取函数

In [36]:
def get_params(basin, Basin_Properties, model_params):
    # 获取该流域的经纬度
    basin_lon = Basin_Properties.loc[basin]['Longitude']
    basin_lat = Basin_Properties.loc[basin]['Latitude']
    # 删除该流域的经纬度
    # Basin_Properties = Basin_Properties.drop(columns=['Longitude', 'Latitude'])
    # 获取率定的参数
    cali_params = model_params.loc[basin].to_numpy()
    # 获取流域属性
    basin_properties = Basin_Properties.loc[basin].to_numpy().reshape(1, -1)
    # 获取该流域剩余流域的属性
    rest_properties = Basin_Properties.copy().drop(index=basin).to_numpy()
    # 获取该流域剩余流域的参数
    rest_params = model_params.copy().drop(index=basin).to_numpy()
    return cali_params, basin_properties, rest_properties, rest_params, basin_lon, basin_lat
def cal_metrics(cali_obs, vali_obs, cali_sim, vali_sim):
    # 计算率定期和验证期的NSE和RE
    cali_nse = kling_gupta_efficiency(cali_obs, cali_sim)
    vali_nse = kling_gupta_efficiency(vali_obs, vali_sim)
    cali_re  = relative_error(cali_obs, cali_sim) * 100
    vali_re  = relative_error(vali_obs, vali_sim) * 100
    return cali_nse, vali_nse, cali_re, vali_re
def clean_params(params, all_params, lower_bound, upper_bound):
    r, c = params.shape
    for i in range(r):
        for j in range(c):
            if params[i, j] < lower_bound[j] or params[i, j] > upper_bound[j]:
                params[i, j] = np.nanmean(all_params[:, j])
    return params

## 参数移植

In [None]:
# # for basin_idx in range(len(basin_list)):
def process_basin(basin_idx):
    st = time.time()
    basin = str(basin_list[basin_idx])
    print(f"Processing basin {basin} ({basin_idx + 1}/{len(basin_list)})...")
    
    ## mYWBM
    cali_params, basin_properties, rest_properties, rest_params, basin_lon, basin_lat = get_params(
        basin, Basin_Properties[['Longitude', 'Latitude', 'Climate', 'Clay', 'Silt', 'Sand', 'Slope', 'BFI', 'PRE', 'TMP', 'PET', 'TMAX', 'TMIN', 'AE', 'NDVI', 'TI']], params_mYWBM)

    # scaler = StandardScaler()
    # source_properties_scaled = scaler.fit_transform(rest_properties[:, 2:])
    # target_properties_scaled = scaler.transform(basin_properties[:, 2:])

    # rf_model  = train_random_forest(source_properties_scaled, rest_params)
    # svm_model = train_svm(source_properties_scaled, rest_params)
    # xgb_model = train_xgboost(source_properties_scaled, rest_params)

    pred_params_SP_AM   = get_params_by_SP_AM(rest_properties, rest_params, 8, basin_lon, basin_lat)
    pred_params_SP_IDW  = get_params_by_SP_IDW(rest_properties, rest_params, 8, basin_lon, basin_lat)
    pred_params_PS      = get_params_by_PS(rest_properties, rest_params, basin_properties, N=5, method='mahalanobis')
    # pred_params_rf      = clean_params(get_params_by_regression(target_properties_scaled, rf_model).reshape(1, -1), rest_params, [0, 0, 0.05, 100, 0], [2, 0.65, 0.95, 2000, 1])
    # pred_params_svr     = clean_params(get_params_by_regression(target_properties_scaled, svm_model).reshape(1, -1), rest_params, [0, 0, 0.05, 100, 0], [2, 0.65, 0.95, 2000, 1])
    # pred_params_xgb     = clean_params(get_params_by_regression(target_properties_scaled, xgb_model).reshape(1, -1), rest_params, [0, 0, 0.05, 100, 0], [2, 0.65, 0.95, 2000, 1])

    params_YM_SP_AM[basin_idx]      = pred_params_SP_AM
    params_YM_SP_IDW[basin_idx]     = pred_params_SP_IDW
    params_YM_PS[basin_idx]         = pred_params_PS
    # params_YM_rf[basin_idx]         = np.squeeze(pred_params_rf)
    # params_YM_svm[basin_idx]        = np.squeeze(pred_params_svr)
    # params_YM_xgb[basin_idx]        = np.squeeze(pred_params_xgb)

    ## abcd
    cali_params, basin_properties, rest_properties, rest_params, basin_lon, basin_lat = get_params(
        basin, Basin_Properties[['Longitude', 'Latitude', 'Climate', 'Clay', 'Silt', 'Sand', 'Slope', 'BFI', 'PRE', 'TMP', 'PET', 'TMAX', 'TMIN', 'AE', 'NDVI', 'TI']], params_abcd)

    # scaler = StandardScaler()
    # source_properties_scaled = scaler.fit_transform(rest_properties[:, 2:])
    # target_properties_scaled = scaler.transform(basin_properties[:, 2:])

    # rf_model  = train_random_forest(source_properties_scaled, rest_params)
    # svm_model = train_svm(source_properties_scaled, rest_params)
    # xgb_model = train_xgboost(source_properties_scaled, rest_params)

    pred_params_SP_AM   = get_params_by_SP_AM(rest_properties, rest_params, 8, basin_lon, basin_lat)
    pred_params_SP_IDW  = get_params_by_SP_IDW(rest_properties, rest_params, 8, basin_lon, basin_lat)
    pred_params_PS      = get_params_by_PS(rest_properties, rest_params, basin_properties, N=5, method='mahalanobis')
    # pred_params_rf      = clean_params(get_params_by_regression(target_properties_scaled, rf_model).reshape(1, -1), rest_params, [0, 100, 0, 0, 0], [1, 2000, 1, 1, 1])
    # pred_params_svr     = clean_params(get_params_by_regression(target_properties_scaled, svm_model).reshape(1, -1), rest_params, [0, 100, 0, 0, 0], [1, 2000, 1, 1, 1])
    # pred_params_xgb     = clean_params(get_params_by_regression(target_properties_scaled, xgb_model).reshape(1, -1), rest_params, [0, 100, 0, 0, 0], [1, 2000, 1, 1, 1])

    params_AM_SP_AM[basin_idx]      = pred_params_SP_AM
    params_AM_SP_IDW[basin_idx]     = pred_params_SP_IDW
    params_AM_PS[basin_idx]         = pred_params_PS
    # params_AM_rf[basin_idx]         = np.squeeze(pred_params_rf)
    # params_AM_svm[basin_idx]        = np.squeeze(pred_params_svr)
    # params_AM_xgb[basin_idx]        = np.squeeze(pred_params_xgb)

    ## DWBM模型
    cali_params, basin_properties, rest_properties, rest_params, basin_lon, basin_lat = get_params(
        basin, Basin_Properties[['Longitude', 'Latitude', 'Climate', 'Clay', 'Silt', 'Sand', 'Slope', 'BFI', 'PRE', 'TMP', 'PET', 'TMAX', 'TMIN', 'AE', 'NDVI', 'TI']], params_DWBM)

    # scaler = StandardScaler()
    # source_properties_scaled = scaler.fit_transform(rest_properties[:, 2:])
    # target_properties_scaled = scaler.transform(basin_properties[:, 2:])

    # rf_model  = train_random_forest(source_properties_scaled, rest_params)
    # svm_model = train_svm(source_properties_scaled, rest_params)
    # xgb_model = train_xgboost(source_properties_scaled, rest_params)

    pred_params_SP_AM   = get_params_by_SP_AM(rest_properties, rest_params, 8, basin_lon, basin_lat)
    pred_params_SP_IDW  = get_params_by_SP_IDW(rest_properties, rest_params, 8, basin_lon, basin_lat)
    pred_params_PS      = get_params_by_PS(rest_properties, rest_params, basin_properties, N=5, method='mahalanobis')
    # pred_params_rf      = clean_params(get_params_by_regression(target_properties_scaled, rf_model).reshape(1, -1), rest_params, [0, 0, 100, 0, 0], [1, 1, 2000, 1, 1])
    # pred_params_svr     = clean_params(get_params_by_regression(target_properties_scaled, svm_model).reshape(1, -1), rest_params, [0, 0, 100, 0, 0], [1, 1, 2000, 1, 1])
    # pred_params_xgb     = clean_params(get_params_by_regression(target_properties_scaled, xgb_model).reshape(1, -1), rest_params, [0, 0, 100, 0, 0], [1, 1, 2000, 1, 1])

    params_DM_SP_AM[basin_idx]  = pred_params_SP_AM
    params_DM_SP_IDW[basin_idx] = pred_params_SP_IDW
    params_DM_PS[basin_idx]     = pred_params_PS
    # params_DM_rf[basin_idx]     = np.squeeze(pred_params_rf)
    # params_DM_svm[basin_idx]    = np.squeeze(pred_params_svr)
    # params_DM_xgb[basin_idx]    = np.squeeze(pred_params_xgb)

    ## GmYWBM模型
    cali_params, basin_properties, rest_properties, rest_params, basin_lon, basin_lat = get_params(
        basin, Basin_Properties[['Longitude', 'Latitude', 'Climate', 'Clay', 'Silt', 'Sand', 'Slope', 'BFI', 'PRE', 'TMP', 'PET', 'TMAX', 'TMIN', 'AE', 'NDVI', 'TI']], params_GmYWBM)

    # scaler = StandardScaler()
    # source_properties_scaled = scaler.fit_transform(rest_properties[:, 2:])
    # target_properties_scaled = scaler.transform(basin_properties[:, 2:])

    # rf_model  = train_random_forest(source_properties_scaled, rest_params)
    # svm_model = train_svm(source_properties_scaled, rest_params)
    # xgb_model = train_xgboost(source_properties_scaled, rest_params)

    pred_params_SP_AM   = get_params_by_SP_AM(rest_properties, rest_params, 8, basin_lon, basin_lat)
    pred_params_SP_IDW  = get_params_by_SP_IDW(rest_properties, rest_params, 8, basin_lon, basin_lat)
    pred_params_PS      = get_params_by_PS(rest_properties, rest_params, basin_properties, N=5, method='mahalanobis')
    # pred_params_rf      = clean_params(get_params_by_regression(target_properties_scaled, rf_model).reshape(1, -1), rest_params, [0, 0, 0.05, 100, 0], [2, 0.65, 0.95, 2000, 1])
    # pred_params_svr     = clean_params(get_params_by_regression(target_properties_scaled, svm_model).reshape(1, -1), rest_params, [0, 0, 0.05, 100, 0], [2, 0.65, 0.95, 2000, 1])
    # pred_params_xgb     = clean_params(get_params_by_regression(target_properties_scaled, xgb_model).reshape(1, -1), rest_params, [0, 0, 0.05, 100, 0], [2, 0.65, 0.95, 2000, 1])

    params_GYM_SP_AM[basin_idx]     = pred_params_SP_AM
    params_GYM_SP_IDW[basin_idx]    = pred_params_SP_IDW
    params_GYM_PS[basin_idx]        = pred_params_PS
    # params_GYM_rf[basin_idx]        = np.squeeze(pred_params_rf)
    # params_GYM_svm[basin_idx]       = np.squeeze(pred_params_svr)
    # params_GYM_xgb[basin_idx]       = np.squeeze(pred_params_xgb)

#     et = time.time()
#     print(f"No. {basin_idx+1} 流域 {basin} 参数获取完成，耗时 {et - st:.2f} 秒")

# # 设置线程池大小（可以按 CPU 核数或手动指定）
# max_workers = 10

# with ThreadPoolExecutor(max_workers=max_workers) as executor:
#     futures = [executor.submit(process_basin, i) for i in range(len(basin_list))]
#     for future in as_completed(futures):
#         future.result()

for basin_idx in range(len(basin_list)):
    process_basin(basin_idx)

In [None]:
# params_YM_spatial_proximity_df    = pd.DataFrame(params_YM_spatial_proximity, index=params_mYWBM.index, columns=params_mYWBM.columns)
# params_YM_physical_similarity_df  = pd.DataFrame(params_YM_physical_similarity, index=params_mYWBM.index, columns=params_mYWBM.columns)
# params_YM_rf_df                   = pd.DataFrame(params_YM_rf, index=params_mYWBM.index, columns=params_mYWBM.columns)
# params_YM_svm_df                  = pd.DataFrame(params_YM_svm, index=params_mYWBM.index, columns=params_mYWBM.columns)
# params_YM_xgb_df                  = pd.DataFrame(params_YM_xgb, index=params_mYWBM.index, columns=params_mYWBM.columns)

# params_AM_spatial_proximity_df    = pd.DataFrame(params_AM_spatial_proximity, index=params_abcd.index, columns=params_abcd.columns)
# params_AM_physical_similarity_df  = pd.DataFrame(params_AM_physical_similarity, index=params_abcd.index, columns=params_abcd.columns)
# params_AM_rf_df                   = pd.DataFrame(params_AM_rf, index=params_abcd.index, columns=params_abcd.columns)
# params_AM_svm_df                  = pd.DataFrame(params_AM_svm, index=params_abcd.index, columns=params_abcd.columns)
# params_AM_xgb_df                  = pd.DataFrame(params_AM_xgb, index=params_abcd.index, columns=params_abcd.columns)

# params_DM_spatial_proximity_df    = pd.DataFrame(params_DM_spatial_proximity, index=params_DWBM.index, columns=params_DWBM.columns)
# params_DM_physical_similarity_df  = pd.DataFrame(params_DM_physical_similarity, index=params_DWBM.index, columns=params_DWBM.columns)
# params_DM_rf_df                   = pd.DataFrame(params_DM_rf, index=params_DWBM.index, columns=params_DWBM.columns)
# params_DM_svm_df                  = pd.DataFrame(params_DM_svm, index=params_DWBM.index, columns=params_DWBM.columns)
# params_DM_xgb_df                  = pd.DataFrame(params_DM_xgb, index=params_DWBM.index, columns=params_DWBM.columns)

# params_YM_spatial_proximity_df.to_csv("../../Results/Params_Transplant/mYWBM_Spatial_Proximity_Params.txt", sep = '\t', float_format='%.2f')
# params_YM_physical_similarity_df.to_csv("../../Results/Params_Transplant/mYWBM_Physical_Similarity_Params.txt", sep = '\t', float_format='%.2f')
# params_YM_rf_df.to_csv("../../Results/Params_Transplant/mYWBM_RF_Params.txt", sep = '\t', float_format='%.2f')
# params_YM_svm_df.to_csv("../../Results/Params_Transplant/mYWBM_SVM_Params.txt", sep = '\t', float_format='%.2f')
# params_YM_xgb_df.to_csv("../../Results/Params_Transplant/mYWBM_XGB_Params.txt", sep = '\t', float_format='%.2f')

# params_AM_spatial_proximity_df.to_csv("../../Results/Params_Transplant/abcd_Spatial_Proximity_Params.txt", sep = '\t', float_format='%.2f')
# params_AM_physical_similarity_df.to_csv("../../Results/Params_Transplant/abcd_Physical_Similarity_Params.txt", sep = '\t', float_format='%.2f')
# params_AM_rf_df.to_csv("../../Results/Params_Transplant/abcd_RF_Params.txt", sep = '\t', float_format='%.2f')
# params_AM_svm_df.to_csv("../../Results/Params_Transplant/abcd_SVM_Params.txt", sep = '\t', float_format='%.2f')
# params_AM_xgb_df.to_csv("../../Results/Params_Transplant/abcd_XGB_Params.txt", sep = '\t', float_format='%.2f')

# params_DM_spatial_proximity_df.to_csv("../../Results/Params_Transplant/DWBM_Spatial_Proximity_Params.txt", sep = '\t', float_format='%.2f')
# params_DM_physical_similarity_df.to_csv("../../Results/Params_Transplant/DWBM_Physical_Similarity_Params.txt", sep = '\t', float_format='%.2f')
# params_DM_rf_df.to_csv("../../Results/Params_Transplant/DWBM_RF_Params.txt", sep = '\t', float_format='%.2f')
# params_DM_svm_df.to_csv("../../Results/Params_Transplant/DWBM_SVM_Params.txt", sep = '\t', float_format='%.2f')
# params_DM_xgb_df.to_csv("../../Results/Params_Transplant/DWBM_XGB_Params.txt", sep = '\t', float_format='%.2f')

## 读取参数移植结果并运行模型

In [None]:
params_YM_spatial_proximity_df      = pd.read_csv("../../Results/Params_Transplant/mYWBM_Spatial_Proximity_Params.txt", sep = '\t', header=0, index_col=0)
params_YM_physical_similarity_df    = pd.read_csv("../../Results/Params_Transplant/mYWBM_Physical_Similarity_Params.txt", sep = '\t', header=0, index_col=0)
params_YM_rf_df                     = pd.read_csv("../../Results/Params_Transplant/mYWBM_RF_Params.txt", sep = '\t', header=0, index_col=0)
params_YM_svm_df                    = pd.read_csv("../../Results/Params_Transplant/mYWBM_SVM_Params.txt", sep = '\t', header=0, index_col=0)
params_YM_xgb_df                    = pd.read_csv("../../Results/Params_Transplant/mYWBM_XGB_Params.txt", sep = '\t', header=0, index_col=0)

params_AM_spatial_proximity_df      = pd.read_csv("../../Results/Params_Transplant/abcd_Spatial_Proximity_Params.txt", sep = '\t', header=0, index_col=0)
params_AM_physical_similarity_df    = pd.read_csv("../../Results/Params_Transplant/abcd_Physical_Similarity_Params.txt", sep = '\t', header=0, index_col=0)
params_AM_rf_df                     = pd.read_csv("../../Results/Params_Transplant/abcd_RF_Params.txt", sep = '\t', header=0, index_col=0)
params_AM_svm_df                    = pd.read_csv("../../Results/Params_Transplant/abcd_SVM_Params.txt", sep = '\t', header=0, index_col=0)
params_AM_xgb_df                    = pd.read_csv("../../Results/Params_Transplant/abcd_XGB_Params.txt", sep = '\t', header=0, index_col=0)

params_DM_spatial_proximity_df      = pd.read_csv("../../Results/Params_Transplant/DWBM_Spatial_Proximity_Params.txt", sep = '\t', header=0, index_col=0)
params_DM_physical_similarity_df    = pd.read_csv("../../Results/Params_Transplant/DWBM_Physical_Similarity_Params.txt", sep = '\t', header=0, index_col=0)
params_DM_rf_df                     = pd.read_csv("../../Results/Params_Transplant/DWBM_RF_Params.txt", sep = '\t', header=0, index_col=0)
params_DM_svm_df                    = pd.read_csv("../../Results/Params_Transplant/DWBM_SVM_Params.txt", sep = '\t', header=0, index_col=0)
params_DM_xgb_df                    = pd.read_csv("../../Results/Params_Transplant/DWBM_XGB_Params.txt", sep = '\t', header=0, index_col=0)

In [None]:
def run_models(fn_hm, basin_idx, x_cali, y_cali, x_vali, y_vali, cali_params_df, params_spatial_proximity_df, params_physical_similarity_df, params_rf_df, params_svm_df, params_xgb_df, cali_kge_list, vali_kge_list, cali_re_list, vali_re_list):
    basin = str(basin_list[basin_idx])
    # 获取参数
    cali_params                     = cali_params_df.loc[basin].to_numpy() + 1e-6
    pred_params_spatial_proximity   = params_spatial_proximity_df.loc[basin].to_numpy() + 1e-6
    pred_params_physical_similarity = params_physical_similarity_df.loc[basin].to_numpy() + 1e-6
    pred_params_rf                  = params_rf_df.loc[basin].to_numpy() + 1e-6
    pred_params_svr                 = params_svm_df.loc[basin].to_numpy() + 1e-6
    pred_params_xgb                 = params_xgb_df.loc[basin].to_numpy() + 1e-6

    # 利用率定参数运行模型
    y_sim_cali_caliparams = fn_hm(x_cali, cali_params)
    y_sim_vali_caliparams = fn_hm(x_vali, cali_params)
    # 利用空间临近法参数运行模型
    y_sim_cali_spatial_proximity = fn_hm(x_cali, pred_params_spatial_proximity)
    y_sim_vali_spatial_proximity = fn_hm(x_vali, pred_params_spatial_proximity)
    # 利用物理相似性法参数运行模型
    y_sim_cali_physical_similarity = fn_hm(x_cali, pred_params_physical_similarity)
    y_sim_vali_physical_similarity = fn_hm(x_vali, pred_params_physical_similarity)
    # 利用随机森林回归模型参数运行模型
    y_sim_cali_rf = fn_hm(x_cali, pred_params_rf)
    y_sim_vali_rf = fn_hm(x_vali, pred_params_rf)
    # 利用支持向量机回归模型参数运行模型
    y_sim_cali_svr = fn_hm(x_cali, pred_params_svr)
    y_sim_vali_svr = fn_hm(x_vali, pred_params_svr)
    # 利用XGBoost回归模型参数运行模型
    y_sim_cali_xgb = fn_hm(x_cali, pred_params_xgb)
    y_sim_vali_xgb = fn_hm(x_vali, pred_params_xgb)

    # 计算NSE和RE
    cali_kge_list[basin_idx, 0], vali_kge_list[basin_idx, 0], cali_re_list[basin_idx, 0], vali_re_list[basin_idx, 0] = cal_metrics(y_cali, y_vali, y_sim_cali_caliparams, y_sim_vali_caliparams)
    cali_kge_list[basin_idx, 1], vali_kge_list[basin_idx, 1], cali_re_list[basin_idx, 1], vali_re_list[basin_idx, 1] = cal_metrics(y_cali, y_vali, y_sim_cali_spatial_proximity, y_sim_vali_spatial_proximity)
    cali_kge_list[basin_idx, 2], vali_kge_list[basin_idx, 2], cali_re_list[basin_idx, 2], vali_re_list[basin_idx, 2] = cal_metrics(y_cali, y_vali, y_sim_cali_physical_similarity, y_sim_vali_physical_similarity)
    cali_kge_list[basin_idx, 3], vali_kge_list[basin_idx, 3], cali_re_list[basin_idx, 3], vali_re_list[basin_idx, 3] = cal_metrics(y_cali, y_vali, y_sim_cali_rf, y_sim_vali_rf)
    cali_kge_list[basin_idx, 4], vali_kge_list[basin_idx, 4], cali_re_list[basin_idx, 4], vali_re_list[basin_idx, 4] = cal_metrics(y_cali, y_vali, y_sim_cali_svr, y_sim_vali_svr)
    cali_kge_list[basin_idx, 5], vali_kge_list[basin_idx, 5], cali_re_list[basin_idx, 5], vali_re_list[basin_idx, 5] = cal_metrics(y_cali, y_vali, y_sim_cali_xgb, y_sim_vali_xgb)
    return cali_kge_list, vali_kge_list, cali_re_list, vali_re_list

In [None]:
for basin_idx in range(len(basin_list)):
    st = time.time()
    basin = str(basin_list[basin_idx])
    ## 集总式模型
    fn_hm = mYWBMnlS
    # 获取数据
    x_cali, y_cali, x_vali, y_vali = get_data_lumped(basin, basin_idx)
    cali_kge_YM_list, vali_kge_YM_list, cali_re_YM_list, vali_re_YM_list = run_models(fn_hm, basin_idx, x_cali, y_cali, x_vali, y_vali, 
                                                                                      params_mYWBM, params_YM_spatial_proximity_df, params_YM_physical_similarity_df, params_YM_rf_df, params_YM_svm_df, params_YM_xgb_df, 
                                                                                      cali_kge_YM_list, vali_kge_YM_list, cali_re_YM_list, vali_re_YM_list)

    ## 分布式模型
    fn_hm = abcdnlS
    # 获取数据
    x_cali, y_cali, x_vali, y_vali = get_data_lumped(basin, basin_idx)
    cali_kge_AM_list, vali_kge_AM_list, cali_re_AM_list, vali_re_AM_list = run_models(fn_hm, basin_idx, x_cali, y_cali, x_vali, y_vali,
                                                                                         params_abcd, params_AM_spatial_proximity_df, params_AM_physical_similarity_df, params_AM_rf_df, params_AM_svm_df, params_AM_xgb_df, 
                                                                                         cali_kge_AM_list, vali_kge_AM_list, cali_re_AM_list, vali_re_AM_list)
    
    ## RCCCWBM模型
    fn_hm = DWBMnlS
    # 获取数据
    x_cali, y_cali, x_vali, y_vali = get_data_lumped(basin, basin_idx)
    cali_kge_DM_list, vali_kge_DM_list, cali_re_DM_list, vali_re_DM_list = run_models(fn_hm, basin_idx, x_cali, y_cali, x_vali, y_vali,
                                                                                         params_DWBM, params_DM_spatial_proximity_df, params_DM_physical_similarity_df, params_DM_rf_df, params_DM_svm_df, params_DM_xgb_df,
                                                                                         cali_kge_DM_list, vali_kge_DM_list, cali_re_DM_list, vali_re_DM_list)
    et = time.time()
    print(f"Processing No. {basin_idx+1}. basin {basin} finished, time: {et-st:.2f}s")

# 保存数据

In [None]:
cali_kge_YM_df = pd.DataFrame(cali_kge_YM_list, index=params_mYWBM.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
vali_kge_YM_df = pd.DataFrame(vali_kge_YM_list, index=params_mYWBM.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
cali_re_YM_df  = pd.DataFrame(cali_re_YM_list, index=params_mYWBM.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
vali_re_YM_df  = pd.DataFrame(vali_re_YM_list, index=params_mYWBM.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
# cali_kge_YM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_mYWBM_Calibration_KGE.txt", sep='\t', float_format='%.2f')
# vali_kge_YM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_mYWBM_Validation_KGE.txt", sep='\t', float_format='%.2f')
# cali_re_YM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_mYWBM_Calibration_RE.txt", sep='\t', float_format='%.2f')
# vali_re_YM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_mYWBM_Validation_RE.txt", sep='\t', float_format='%.2f')

cali_kge_AM_df = pd.DataFrame(cali_kge_AM_list, index=params_abcd.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
vali_kge_AM_df = pd.DataFrame(vali_kge_AM_list, index=params_abcd.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
cali_re_AM_df  = pd.DataFrame(cali_re_AM_list, index=params_abcd.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
vali_re_AM_df  = pd.DataFrame(vali_re_AM_list, index=params_abcd.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
# cali_kge_AM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_abcd_Calibration_KGE.txt", sep='\t', float_format='%.2f')
# vali_kge_AM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_abcd_Validation_KGE.txt", sep='\t', float_format='%.2f')
# cali_re_AM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_abcd_Calibration_RE.txt", sep='\t', float_format='%.2f')
# vali_re_AM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_abcd_Validation_RE.txt", sep='\t', float_format='%.2f')

cali_kge_DM_df = pd.DataFrame(cali_kge_DM_list, index=params_DWBM.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
vali_kge_DM_df = pd.DataFrame(vali_kge_DM_list, index=params_DWBM.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
cali_re_DM_df  = pd.DataFrame(cali_re_DM_list, index=params_DWBM.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
vali_re_DM_df  = pd.DataFrame(vali_re_DM_list, index=params_DWBM.index, columns=['Calibrated', 'Spatial Proximity', 'Physical Similarity', 'Random Forest', 'SVM', 'XGBoost'])
# cali_kge_DM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_DWBM_Calibration_KGE.txt", sep='\t', float_format='%.2f')
# vali_kge_DM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_DWBM_Validation_KGE.txt", sep='\t', float_format='%.2f')
# cali_re_DM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_DWBM_Calibration_RE.txt", sep='\t', float_format='%.2f')
# vali_re_DM_df.to_csv(f"../../Results/Metrics_Basin_Params_Transplant/1_DWBM_Validation_RE.txt", sep='\t', float_format='%.2f')

# 画图

In [None]:
def get_df_plot(basin_list, cali_df, vali_df):
    df_plot = pd.DataFrame({
        'Value': np.concatenate([
            cali_df['Calibrated'].values,            vali_df['Calibrated'].values,
            cali_df['Spatial Proximity'].values,     vali_df['Spatial Proximity'].values,
            cali_df['Physical Similarity'].values,   vali_df['Physical Similarity'].values,
            cali_df['Random Forest'].values,         vali_df['Random Forest'].values,
            cali_df['SVM'].values,                   vali_df['SVM'].values,
            cali_df['XGBoost'].values,               vali_df['XGBoost'].values
        ]), 
        'Period': (['Cali'] * len(basin_list) + ['Vali'] * len(basin_list)) * 6,
        'Method': ['Calibrated'] * len(basin_list) * 2 + ['Spatial Proximity'] * len(basin_list) * 2 +
                    ['Physical Similarity'] * len(basin_list) * 2 + ['Random Forest'] * len(basin_list) * 2 +
                    ['SVM'] * len(basin_list) * 2 + ['XGBoost'] * len(basin_list) * 2
    })
    return df_plot

In [None]:
kge_YM_df_plot  = get_df_plot(basin_list, cali_kge_YM_df, vali_kge_YM_df)
re_YM_df_plot   = get_df_plot(basin_list, cali_re_YM_df, vali_re_YM_df)
kge_AM_df_plot  = get_df_plot(basin_list, cali_kge_AM_df, vali_kge_AM_df)
re_AM_df_plot   = get_df_plot(basin_list, cali_re_AM_df, vali_re_AM_df)
kge_DM_df_plot  = get_df_plot(basin_list, cali_kge_DM_df, vali_kge_DM_df)
re_DM_df_plot   = get_df_plot(basin_list, cali_re_DM_df, vali_re_DM_df)

In [None]:
def draw_boxplot(ax, data, x, y, hue, title, ylabel, ylim):
    sns.boxplot(ax=ax, data=data, x=x, y=y, hue=hue, palette='Set2', dodge=True, width=0.6, showfliers=False)
    ax.set_title(title, fontweight='bold')
    ax.set_ylabel(ylabel)
    ax.set_ylim(ylim)
    ax.grid(axis='y', linestyle='--', alpha=0.7)
    ax.legend(title='', loc='upper right')

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(16, 12))

ax1_1 = axs[0, 0]
ax1_2 = axs[0, 1]
ax2_1 = axs[1, 0]
ax2_2 = axs[1, 1]
ax3_1 = axs[2, 0]
ax3_2 = axs[2, 1]

draw_boxplot(ax1_1, kge_YM_df_plot, 'Method', 'Value', 'Period', 'Kling-Gupta Efficiency for mYWBM', 'KGE', (-0.5, 1.0))
draw_boxplot(ax1_2, re_YM_df_plot, 'Method', 'Value', 'Period', 'Relative Error for mYWBM', 'RE', (-40, 40))
draw_boxplot(ax2_1, kge_DM_df_plot, 'Method', 'Value', 'Period', 'Kling-Gupta Efficiency for DWBM', 'KGE', (-0.5, 1.0))
draw_boxplot(ax2_2, re_DM_df_plot, 'Method', 'Value', 'Period', 'Relative Error for DWBM', 'RE', (-40, 40))
draw_boxplot(ax3_1, kge_AM_df_plot, 'Method', 'Value', 'Period', 'Kling-Gupta Efficiency for abcd', 'KGE', (-0.5, 1.0))
draw_boxplot(ax3_2, re_AM_df_plot, 'Method', 'Value', 'Period', 'Relative Error for abcd', 'RE', (-40, 40))

plt.tight_layout()
# fig.savefig("../../Images/Compare_Parameter_Estimation_Methods.jpg", dpi=300, bbox_inches='tight')

# 将参数移植到格点

In [12]:
# 定义nc读取函数
def read_nc(filepath, var_name, flip=True):
    ds = xr.open_dataset(filepath)
    if flip:
        data = np.fliplr(np.rot90(ds[var_name].values, k=3))  # 逆时针旋转90度
    else:
        data = ds[var_name].values
    ds.close()
    return data

In [13]:
grids_mask    = read_nc("../../Data/Grids_Prop/mask.nc", "mask")
grids_climate = read_nc("../../Data/Grids_Prop/Climate.nc", "Climate")
grids_clay    = read_nc("../../Data/Grids_Prop/clay.nc", "clay")
grids_silt    = read_nc("../../Data/Grids_Prop/silt.nc", "silt")
grids_sand    = read_nc("../../Data/Grids_Prop/sand.nc", "sand")
grids_slope   = read_nc("../../Data/Grids_Prop/Slope.nc", "Slope")
grids_bfi     = read_nc("../../Data/Grids_Prop/BFI.nc", "BFI")
grids_pre     = read_nc("../../Data/Grids_Prop/pre.nc", "pre")
grids_tmp     = read_nc("../../Data/Grids_Prop/tem.nc", "tem")
grids_pet     = read_nc("../../Data/Grids_Prop/pet.nc", "pet") * 30.4
grids_tmax    = read_nc("../../Data/Grids_Prop/tmx.nc", "tmx")
grids_tmin    = read_nc("../../Data/Grids_Prop/tmn.nc", "tmn")
grids_ndvi    = read_nc("../../Data/Grids_Prop/NDVI.nc", "NDVI")
grids_ti      = read_nc("../../Data/Grids_Prop/TI.nc", "TI")

In [14]:
num_list = np.arange(0, 360*720)
hang_list = (num_list // 720).astype(int)
lie_list  = (num_list % 720).astype(int)
mask_list = grids_mask.flatten()

climate_list = grids_climate.flatten()
clay_list    = grids_clay.flatten()
silt_list    = grids_silt.flatten()
sand_list    = grids_sand.flatten()
slope_list   = grids_slope.flatten()
bfi_list     = grids_bfi.flatten()
pre_list     = grids_pre.flatten()
tmp_list     = grids_tmp.flatten()
pet_list     = grids_pet.flatten()
tmax_list    = grids_tmax.flatten()
tmin_list    = grids_tmin.flatten()
ndvi_list    = grids_ndvi.flatten()
ti_list      = grids_ti.flatten()

grids_prop = pd.DataFrame({
    'Climate': climate_list, 'Clay': clay_list, 'Silt': silt_list, 'Sand': sand_list, 'Slope': slope_list, 'BFI' : bfi_list,
    'PRE'    : pre_list,     'TMP' : tmp_list,  'PET' : pet_list,  'TMAX': tmax_list, 'TMIN' : tmin_list,  'NDVI': ndvi_list,
    'TI'     : ti_list,      'NUM' : num_list,  'HANG': hang_list, 'LIE' : lie_list,  'MASK' : mask_list
})
grids_prop = grids_prop[grids_prop['MASK'] == 1].reset_index(drop=True)
# 舍弃grids_prop中含有NaN值的行
grids_prop = grids_prop.dropna().reset_index(drop=True)
grids_prop.set_index('NUM', inplace=True)

In [15]:
# grids_prop.to_csv("../../Data/Grids_Prop/Grids_Properties.txt", sep='\t', float_format='%.4f', index=True)
grids_prop_np = grids_prop[['Climate', 'Clay', 'Silt', 'Sand', 'Slope', 'BFI', 'PRE', 'TMP', 'PET', 'TMAX', 'TMIN', 'NDVI', 'TI']].to_numpy()

basin_props_np  = Basin_Properties[['Climate', 'Clay', 'Silt', 'Sand', 'Slope', 'BFI', 'PRE', 'TMP', 'PET', 'TMAX', 'TMIN', 'NDVI', 'TI']].to_numpy()
params_mYWBM_np = params_mYWBM.to_numpy()
params_abcd_np  = params_abcd.to_numpy()
params_DWBM_np  = params_DWBM.to_numpy()

### 利用物理相似性移植和RF移植参数

In [None]:
# grids_params_YM_PS  = np.full((grids_prop.shape[0], 5), np.nan)
# grids_params_YM_rf  = np.full((grids_prop.shape[0], 5), np.nan)
# grids_params_AM_PS  = np.full((grids_prop.shape[0], 5), np.nan)
# grids_params_AM_rf  = np.full((grids_prop.shape[0], 5), np.nan)
# grids_params_DM_PS  = np.full((grids_prop.shape[0], 5), np.nan)
# grids_params_DM_rf  = np.full((grids_prop.shape[0], 5), np.nan)

# # 物理相似性移植
# for i in trange(grids_prop.shape[0]):
#     grids_params_YM_PS[i, :] = get_params_by_PS(basin_props_np, params_mYWBM_np, grids_prop_np[i].reshape(1, -1))
#     grids_params_AM_PS[i, :] = get_params_by_PS(basin_props_np, params_abcd_np, grids_prop_np[i].reshape(1, -1))
#     grids_params_DM_PS[i, :] = get_params_by_PS(basin_props_np, params_DWBM_np, grids_prop_np[i].reshape(1, -1))

In [16]:
grids_params_YM_PS = pd.read_csv("../../Results/Grids_Params_Transplant/YM_PS.txt", sep='\t', header=0, index_col=0)[['Ks', 'Kg', 'alpha', 'smax', 'Ksn']].values
grids_params_AM_PS = pd.read_csv("../../Results/Grids_Params_Transplant/AM_PS.txt", sep='\t', header=0, index_col=0)[['a', 'b', 'c', 'd', 'Ksn']].values
grids_params_DM_PS = pd.read_csv("../../Results/Grids_Params_Transplant/DM_PS.txt", sep='\t', header=0, index_col=0)[['alpha1', 'alpha2', 'smax', 'd', 'Ksn']].values

In [None]:
# ## 随机森林回归移植
# scaler = StandardScaler()
# source_properties_scaled = scaler.fit_transform(basin_props_np)
# target_properties_scaled = scaler.transform(grids_prop_np)

# rf_model_YM = train_random_forest(source_properties_scaled, params_mYWBM)
# rf_model_AM = train_random_forest(source_properties_scaled, params_abcd)
# rf_model_DM = train_random_forest(source_properties_scaled, params_DWBM)

# pred_params_YM_RF  = clean_params(get_params_by_regression(target_properties_scaled, rf_model_YM), params_mYWBM.to_numpy(), [0, 0, 0.05, 100, 0], [2, 0.65, 0.95, 1000, 1])
# pred_params_AM_RF  = clean_params(get_params_by_regression(target_properties_scaled, rf_model_AM), params_abcd.to_numpy(), [0, 100, 0, 0, 0], [1, 2000, 1, 1, 1])
# pred_params_DM_RF  = clean_params(get_params_by_regression(target_properties_scaled, rf_model_DM), params_DWBM.to_numpy(), [0, 0, 100, 0, 0], [1, 1, 2000, 1, 1])

### 把两组参数根据grids_loc的顺序放回去

In [None]:
# ks_grids_PS_YM    = np.full_like(grids_mask, np.nan, dtype=float)
# kg_grids_PS_YM    = np.full_like(grids_mask, np.nan, dtype=float)
# alpha_grids_PS_YM = np.full_like(grids_mask, np.nan, dtype=float)
# smax_grids_PS_YM  = np.full_like(grids_mask, np.nan, dtype=float)
# ksn_grids_PS_YM   = np.full_like(grids_mask, np.nan, dtype=float)

# ks_grids_RF_YM     = np.full_like(grids_mask, np.nan, dtype=float)
# kg_grids_RF_YM     = np.full_like(grids_mask, np.nan, dtype=float)
# alpha_grids_RF_YM  = np.full_like(grids_mask, np.nan, dtype=float)
# smax_grids_RF_YM   = np.full_like(grids_mask, np.nan, dtype=float)
# ksn_grids_RF_YM    = np.full_like(grids_mask, np.nan, dtype=float)

# a_grids_PS_AM    = np.full_like(grids_mask, np.nan, dtype=float)
# b_grids_PS_AM    = np.full_like(grids_mask, np.nan, dtype=float)
# c_grids_PS_AM    = np.full_like(grids_mask, np.nan, dtype=float)
# d_grids_PS_AM    = np.full_like(grids_mask, np.nan, dtype=float)
# ksn_grids_PS_AM  = np.full_like(grids_mask, np.nan, dtype=float)

# a_grids_RF_AM     = np.full_like(grids_mask, np.nan, dtype=float)
# b_grids_RF_AM     = np.full_like(grids_mask, np.nan, dtype=float)
# c_grids_RF_AM     = np.full_like(grids_mask, np.nan, dtype=float)
# d_grids_RF_AM     = np.full_like(grids_mask, np.nan, dtype=float)
# ksn_grids_RF_AM   = np.full_like(grids_mask, np.nan, dtype=float)

# alpha1_grids_PS_DM = np.full_like(grids_mask, np.nan, dtype=float)
# alpha2_grids_PS_DM = np.full_like(grids_mask, np.nan, dtype=float)
# smax_grids_PS_DM   = np.full_like(grids_mask, np.nan, dtype=float)
# d_grids_PS_DM      = np.full_like(grids_mask, np.nan, dtype=float)
# ksn_grids_PS_DM    = np.full_like(grids_mask, np.nan, dtype=float)

# alpha1_grids_RF_DM   = np.full_like(grids_mask, np.nan, dtype=float)
# alpha2_grids_RF_DM   = np.full_like(grids_mask, np.nan, dtype=float)
# smax_grids_RF_DM     = np.full_like(grids_mask, np.nan, dtype=float)
# d_grids_RF_DM        = np.full_like(grids_mask, np.nan, dtype=float)
# ksn_grids_RF_DM      = np.full_like(grids_mask, np.nan, dtype=float)

In [None]:
# ks_grids_PS_YM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = grids_params_YM_PS[:, 0]
# kg_grids_PS_YM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = grids_params_YM_PS[:, 1]
# alpha_grids_PS_YM[grids_prop['HANG'].values, grids_prop['LIE'].values] = grids_params_YM_PS[:, 2]
# smax_grids_PS_YM[grids_prop['HANG'].values, grids_prop['LIE'].values]  = grids_params_YM_PS[:, 3]
# ksn_grids_PS_YM[grids_prop['HANG'].values, grids_prop['LIE'].values]   = grids_params_YM_PS[:, 4]

# ks_grids_RF_YM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = pred_params_YM_RF[:, 0]
# kg_grids_RF_YM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = pred_params_YM_RF[:, 1]
# alpha_grids_RF_YM[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_params_YM_RF[:, 2]
# smax_grids_RF_YM[grids_prop['HANG'].values, grids_prop['LIE'].values]  = pred_params_YM_RF[:, 3]
# ksn_grids_RF_YM[grids_prop['HANG'].values, grids_prop['LIE'].values]   = pred_params_YM_RF[:, 4]

# a_grids_PS_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = grids_params_AM_PS[:, 0]
# b_grids_PS_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = grids_params_AM_PS[:, 1]
# c_grids_PS_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = grids_params_AM_PS[:, 2]
# d_grids_PS_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = grids_params_AM_PS[:, 3]
# ksn_grids_PS_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]  = grids_params_AM_PS[:, 4]

# a_grids_RF_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = pred_params_AM_RF[:, 0]
# b_grids_RF_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = pred_params_AM_RF[:, 1]
# c_grids_RF_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = pred_params_AM_RF[:, 2]
# d_grids_RF_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = pred_params_AM_RF[:, 3]
# ksn_grids_RF_AM[grids_prop['HANG'].values, grids_prop['LIE'].values]  = pred_params_AM_RF[:, 4]

# alpha1_grids_PS_DM[grids_prop['HANG'].values, grids_prop['LIE'].values] = grids_params_DM_PS[:, 0]
# alpha2_grids_PS_DM[grids_prop['HANG'].values, grids_prop['LIE'].values] = grids_params_DM_PS[:, 1]
# smax_grids_PS_DM[grids_prop['HANG'].values, grids_prop['LIE'].values]   = grids_params_DM_PS[:, 2]
# d_grids_PS_DM[grids_prop['HANG'].values, grids_prop['LIE'].values]      = grids_params_DM_PS[:, 3]
# ksn_grids_PS_DM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = grids_params_DM_PS[:, 4]

# alpha1_grids_RF_DM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = pred_params_DM_RF[:, 0]
# alpha2_grids_RF_DM[grids_prop['HANG'].values, grids_prop['LIE'].values]    = pred_params_DM_RF[:, 1]
# smax_grids_RF_DM[grids_prop['HANG'].values, grids_prop['LIE'].values]      = pred_params_DM_RF[:, 2]
# d_grids_RF_DM[grids_prop['HANG'].values, grids_prop['LIE'].values]         = pred_params_DM_RF[:, 3]
# ksn_grids_RF_DM[grids_prop['HANG'].values, grids_prop['LIE'].values]       = pred_params_DM_RF[:, 4]

In [None]:
# fig, axs = plt.subplots(5, 6, figsize=(25, 13))
# plt.subplots_adjust(hspace=0.4, wspace=0.3)

# param_names = ['Ks', 'Kg', 'Alpha', 'Smax', 'Ksn']
# method_names = ['mY_Phy', 'mY_RF', 'AM_Phy', 'AM_RF', 'DM_Phy', 'DM_RF']

# # Data for plotting
# data_to_plot = [
#     [ks_grids_physical_similarity_YM,    ks_grids_rf_YM,    a_grids_physical_similarity_AM,   a_grids_rf_AM,   alpha1_grids_physical_similarity_DM, alpha1_grids_rf_DM],
#     [kg_grids_physical_similarity_YM,    kg_grids_rf_YM,    b_grids_physical_similarity_AM,   b_grids_rf_AM,   alpha2_grids_physical_similarity_DM, alpha2_grids_rf_DM],
#     [alpha_grids_physical_similarity_YM, alpha_grids_rf_YM, c_grids_physical_similarity_AM,   c_grids_rf_AM,   smax_grids_physical_similarity_DM,   smax_grids_rf_DM],
#     [smax_grids_physical_similarity_YM,  smax_grids_rf_YM,  d_grids_physical_similarity_AM,   d_grids_rf_AM,   d_grids_physical_similarity_DM,      d_grids_rf_DM],
#     [ksn_grids_physical_similarity_YM,   ksn_grids_rf_YM,   ksn_grids_physical_similarity_AM, ksn_grids_rf_AM, ksn_grids_physical_similarity_DM,    ksn_grids_rf_DM]
# ]

# for i in range(5):  # Rows for parameters
#     for j in range(6):  # Columns for methods
#         ax = axs[i, j]
#         im = ax.imshow(data_to_plot[i][j], cmap='viridis', aspect='auto')
#         ax.set_title(f'{param_names[i]} - {method_names[j]}')
#         ax.set_xlabel('Longitude Index')
#         ax.set_ylabel('Latitude Index')
#         fig.colorbar(im, ax=ax, orientation='vertical', fraction=0.046, pad=0.04)

# fig.suptitle('Spatial Distribution of Transplanted Parameters', fontsize=16, fontweight='bold')
# plt.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust layout to make space for suptitle

### 写入nc文件

In [None]:
# def write_nc(filepath, lon, lat, ks, kg, alpha, smax, ksn):
#     # 创建一个新的NetCDF文件
#     ds = xr.Dataset(
#         {
#             'ks': (['lat', 'lon'], ks),
#             'kg': (['lat', 'lon'], kg),
#             'alpha': (['lat', 'lon'], alpha),
#             'smax': (['lat', 'lon'], smax),
#             'ksn': (['lat', 'lon'], ksn)
#         },
#         coords={
#             'lon': (['lon'], lon),
#             'lat': (['lat'], lat)
#         }
#     )
#     ds.to_netcdf(filepath, mode='w')
#     ds.close()

In [None]:
# grids_lon = xr.open_dataset("../../Data/Grids_Prop/mask.nc").longitude.values
# grids_lat = xr.open_dataset("../../Data/Grids_Prop/mask.nc").latitude.values

# write_nc("../../Results/Grids_Params_Transplant/YM_PS.nc", grids_lon, grids_lat,
#          ks_grids_PS_YM, kg_grids_PS_YM, alpha_grids_PS_YM,
#          smax_grids_PS_YM, ksn_grids_PS_YM)
# write_nc("../../Results/Grids_Params_Transplant/YM_RF.nc", grids_lon, grids_lat,
#          ks_grids_RF_YM, kg_grids_RF_YM, alpha_grids_RF_YM,
#          smax_grids_RF_YM, ksn_grids_RF_YM)
# write_nc("../../Results/Grids_Params_Transplant/AM_PS.nc", grids_lon, grids_lat,
#             a_grids_PS_AM, b_grids_PS_AM, c_grids_PS_AM,
#             d_grids_PS_AM, ksn_grids_PS_AM)
# write_nc("../../Results/Grids_Params_Transplant/AM_RF.nc", grids_lon, grids_lat,
#             a_grids_RF_AM, b_grids_RF_AM, c_grids_RF_AM,
#             d_grids_RF_AM, ksn_grids_RF_AM)
# write_nc("../../Results/Grids_Params_Transplant/DM_PS.nc", grids_lon, grids_lat,
#          alpha1_grids_PS_DM, alpha2_grids_PS_DM, smax_grids_PS_DM,
#          d_grids_PS_DM, ksn_grids_PS_DM)
# write_nc("../../Results/Grids_Params_Transplant/DM_RF.nc", grids_lon, grids_lat,
#          alpha1_grids_RF_DM, alpha2_grids_RF_DM, smax_grids_RF_DM,
#          d_grids_RF_DM, ksn_grids_RF_DM)

### 读取格点参数

In [17]:
ks_grids_PS_YM    = read_nc("../../Results/Grids_Params_Transplant/YM_PS.nc", "ks", False)
kg_grids_PS_YM    = read_nc("../../Results/Grids_Params_Transplant/YM_PS.nc", "kg", False)
alpha_grids_PS_YM = read_nc("../../Results/Grids_Params_Transplant/YM_PS.nc", "alpha", False)
smax_grids_PS_YM  = read_nc("../../Results/Grids_Params_Transplant/YM_PS.nc", "smax", False)
ksn_grids_PS_YM   = read_nc("../../Results/Grids_Params_Transplant/YM_PS.nc", "ksn", False)

ks_grids_rf_YM    = read_nc("../../Results/Grids_Params_Transplant/YM_rf.nc", "ks", False)
kg_grids_rf_YM    = read_nc("../../Results/Grids_Params_Transplant/YM_rf.nc", "kg", False)
alpha_grids_rf_YM = read_nc("../../Results/Grids_Params_Transplant/YM_rf.nc", "alpha", False)
smax_grids_rf_YM  = read_nc("../../Results/Grids_Params_Transplant/YM_rf.nc", "smax", False)
ksn_grids_rf_YM   = read_nc("../../Results/Grids_Params_Transplant/YM_rf.nc", "ksn", False)

a_grids_PS_AM   = read_nc("../../Results/Grids_Params_Transplant/AM_PS.nc", "ks", False)
b_grids_PS_AM   = read_nc("../../Results/Grids_Params_Transplant/AM_PS.nc", "kg", False)
c_grids_PS_AM   = read_nc("../../Results/Grids_Params_Transplant/AM_PS.nc", "alpha", False)
d_grids_PS_AM   = read_nc("../../Results/Grids_Params_Transplant/AM_PS.nc", "smax", False)
ksn_grids_PS_AM = read_nc("../../Results/Grids_Params_Transplant/AM_PS.nc", "ksn", False)

a_grids_rf_AM   = read_nc("../../Results/Grids_Params_Transplant/AM_rf.nc", "ks", False)
b_grids_rf_AM   = read_nc("../../Results/Grids_Params_Transplant/AM_rf.nc", "kg", False)
c_grids_rf_AM   = read_nc("../../Results/Grids_Params_Transplant/AM_rf.nc", "alpha", False)
d_grids_rf_AM   = read_nc("../../Results/Grids_Params_Transplant/AM_rf.nc", "smax", False)
ksn_grids_rf_AM = read_nc("../../Results/Grids_Params_Transplant/AM_rf.nc", "ksn", False)

alpha1_grids_PS_DM = read_nc("../../Results/Grids_Params_Transplant/DM_PS.nc", "ks", False)
alpha2_grids_PS_DM = read_nc("../../Results/Grids_Params_Transplant/DM_PS.nc", "kg", False)
smax_grids_PS_DM   = read_nc("../../Results/Grids_Params_Transplant/DM_PS.nc", "alpha", False)
d_grids_PS_DM      = read_nc("../../Results/Grids_Params_Transplant/DM_PS.nc", "smax", False)
ksn_grids_PS_DM    = read_nc("../../Results/Grids_Params_Transplant/DM_PS.nc", "ksn", False)

alpha1_grids_rf_DM  = read_nc("../../Results/Grids_Params_Transplant/DM_rf.nc", "ks", False)
alpha2_grids_rf_DM  = read_nc("../../Results/Grids_Params_Transplant/DM_rf.nc", "kg", False)
smax_grids_rf_DM    = read_nc("../../Results/Grids_Params_Transplant/DM_rf.nc", "alpha", False)
d_grids_rf_DM       = read_nc("../../Results/Grids_Params_Transplant/DM_rf.nc", "smax", False)
ksn_grids_rf_DM     = read_nc("../../Results/Grids_Params_Transplant/DM_rf.nc", "ksn", False)

# 预估格点的最佳模型

In [None]:
# sim_results = pd.read_csv("../../Results/Best_Model_Transplant/Best_Model_Calibration.txt", sep="\t", index_col='stat_num')[['BM_3']]

# models = {
#     "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42),
#     "SVM": Pipeline([("scaler", StandardScaler()), 
#                      ("clf", SVC(kernel="rbf", probability=True, random_state=42))]),
#     "XGBoost": XGBClassifier(n_estimators=300, max_depth=10, learning_rate=0.01, random_state=42),
#     "KNN": Pipeline([("scaler", StandardScaler()), 
#                      ("clf", KNeighborsClassifier(n_neighbors=15))])
# }

# scaler = StandardScaler()
# source_properties_scaled = scaler.fit_transform(basin_props_np)
# y_train = sim_results['BM_3'].values - 1

# target_properties_scaled = scaler.transform(grids_prop_np)

# trained_rf_model  = models['RandomForest'].fit(source_properties_scaled, y_train)
# trained_svm_model = models['SVM'].fit(source_properties_scaled, y_train)
# trained_xgb_model = models['XGBoost'].fit(source_properties_scaled, y_train)
# trained_knn_model = models['KNN'].fit(source_properties_scaled, y_train)

# pred_grids_best_model_rf  = trained_rf_model.predict(target_properties_scaled)
# pred_grids_best_model_svm = trained_svm_model.predict(target_properties_scaled)
# pred_grids_best_model_xgb = trained_xgb_model.predict(target_properties_scaled)
# pred_grids_best_model_knn = trained_knn_model.predict(target_properties_scaled)

In [None]:
# grids_best_model_rf  = np.full_like(grids_mask, np.nan, dtype=np.float32)
# grids_best_model_svm = np.full_like(grids_mask, np.nan, dtype=np.float32)
# grids_best_model_xgb = np.full_like(grids_mask, np.nan, dtype=np.float32)
# grids_best_model_knn = np.full_like(grids_mask, np.nan, dtype=np.float32)

# grids_best_model_rf[grids_prop['HANG'].values, grids_prop['LIE'].values]  = pred_grids_best_model_rf
# grids_best_model_svm[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_best_model_svm
# grids_best_model_xgb[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_best_model_xgb
# grids_best_model_knn[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_best_model_knn

In [None]:
# def write_nc(filepath, lon, lat, best_model):
#     # 创建一个新的NetCDF文件
#     ds = xr.Dataset(
#         {
#             'best_model': (['lat', 'lon'], best_model),
#         },
#         coords={
#             'lon': (['lon'], lon),
#             'lat': (['lat'], lat)
#         }
#     )
#     ds.to_netcdf(filepath, mode='w')
#     ds.close()
# grids_lon = xr.open_dataset("../../Data/Grids_Prop/mask.nc").longitude.values
# grids_lat = xr.open_dataset("../../Data/Grids_Prop/mask.nc").latitude.values

# write_nc("../../Results/Grids_Params_Transplant/Best_Model_rf.nc", grids_lon, grids_lat, grids_best_model_rf)
# write_nc("../../Results/Grids_Params_Transplant/Best_Model_svm.nc", grids_lon, grids_lat, grids_best_model_svm)
# write_nc("../../Results/Grids_Params_Transplant/Best_Model_xgb.nc", grids_lon, grids_lat, grids_best_model_xgb)
# write_nc("../../Results/Grids_Params_Transplant/Best_Model_knn.nc", grids_lon, grids_lat, grids_best_model_knn)

In [18]:
Best_Model_rf = read_nc("../../Results/Grids_Params_Transplant/Best_Model_rf.nc", "best_model", False)
Best_Model_svm = read_nc("../../Results/Grids_Params_Transplant/Best_Model_svm.nc", "best_model", False)
Best_Model_xgb = read_nc("../../Results/Grids_Params_Transplant/Best_Model_xgb.nc", "best_model", False)
Best_Model_knn = read_nc("../../Results/Grids_Params_Transplant/Best_Model_knn.nc", "best_model", False)

# 预估格点的模型权重

In [20]:
sim_results = pd.read_csv("../../Results/Weighted_Average/Weighted_Average_Results_AIAC.txt", sep="\t", index_col='stat_num')[['r_w_YM', 'r_w_AM', 'r_w_DM']]

sim_results.loc[(sim_results[['r_w_YM', 'r_w_AM', 'r_w_DM']].sum(axis=1) == 0), 'r_w_YM'] = 1

sim_results['new_w_YM'] = sim_results['r_w_YM'] / (sim_results['r_w_YM'] + sim_results['r_w_AM'] + sim_results['r_w_DM'])
sim_results['new_w_AM'] = sim_results['r_w_AM'] / (sim_results['r_w_YM'] + sim_results['r_w_AM'] + sim_results['r_w_DM'])
sim_results['new_w_DM'] = sim_results['r_w_DM'] / (sim_results['r_w_YM'] + sim_results['r_w_AM'] + sim_results['r_w_DM'])

scaler = StandardScaler()
source_properties_scaled = scaler.fit_transform(basin_props_np)
y_train = sim_results[['new_w_YM', 'new_w_AM', 'new_w_DM']].values

target_properties_scaled = scaler.transform(grids_prop_np)

trained_rf_model  = train_random_forest(source_properties_scaled, y_train)
trained_svr_model = train_svm(source_properties_scaled, y_train)
trained_xgb_model = train_xgboost(source_properties_scaled, y_train)

pred_grids_model_weight_rf  = get_params_by_regression(target_properties_scaled, trained_rf_model)
pred_grids_model_weight_svr = get_params_by_regression(target_properties_scaled, trained_svr_model)
pred_grids_model_weight_xgb = get_params_by_regression(target_properties_scaled, trained_xgb_model)

In [None]:
grids_model_weight_YM_rf = np.full_like(grids_mask, np.nan, dtype=np.float32)
grids_model_weight_AM_rf = np.full_like(grids_mask, np.nan, dtype=np.float32)
grids_model_weight_DM_rf = np.full_like(grids_mask, np.nan, dtype=np.float32)

grids_model_weight_YM_svr = np.full_like(grids_mask, np.nan, dtype=np.float32)
grids_model_weight_AM_svr = np.full_like(grids_mask, np.nan, dtype=np.float32)
grids_model_weight_DM_svr = np.full_like(grids_mask, np.nan, dtype=np.float32)

grids_model_weight_YM_xgb = np.full_like(grids_mask, np.nan, dtype=np.float32)
grids_model_weight_AM_xgb = np.full_like(grids_mask, np.nan, dtype=np.float32)
grids_model_weight_DM_xgb = np.full_like(grids_mask, np.nan, dtype=np.float32)

grids_model_weight_YM_rf[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_rf[:, 0]
grids_model_weight_AM_rf[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_rf[:, 1]
grids_model_weight_DM_rf[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_rf[:, 2]

grids_model_weight_YM_svr[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_svr[:, 0]
grids_model_weight_AM_svr[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_svr[:, 1]
grids_model_weight_DM_svr[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_svr[:, 2]

grids_model_weight_YM_xgb[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_xgb[:, 0]
grids_model_weight_AM_xgb[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_xgb[:, 1]
grids_model_weight_DM_xgb[grids_prop['HANG'].values, grids_prop['LIE'].values] = pred_grids_model_weight_xgb[:, 2]

In [None]:
def write_nc(filepath, lon, lat, weight_YM, weight_AM, weight_DM):
    # 创建一个新的NetCDF文件
    ds = xr.Dataset(
        {
            'weight_YM': (['lat', 'lon'], weight_YM),
            'weight_AM': (['lat', 'lon'], weight_AM),
            'weight_DM': (['lat', 'lon'], weight_DM),
        },
        coords={
            'lon': (['lon'], lon),
            'lat': (['lat'], lat)
        }
    )
    ds.to_netcdf(filepath, mode='w')
    ds.close()

grids_lon = xr.open_dataset("../../Data/Grids_Prop/mask.nc").longitude.values
grids_lat = xr.open_dataset("../../Data/Grids_Prop/mask.nc").latitude.values

write_nc("../../Results/Grids_Params_Transplant/Model_Weight_rf.nc", grids_lon, grids_lat,
         grids_model_weight_YM_rf, grids_model_weight_AM_rf, grids_model_weight_DM_rf)
write_nc("../../Results/Grids_Params_Transplant/Model_Weight_svr.nc", grids_lon, grids_lat,
         grids_model_weight_YM_svr, grids_model_weight_AM_svr, grids_model_weight_DM_svr)
write_nc("../../Results/Grids_Params_Transplant/Model_Weight_xgb.nc", grids_lon, grids_lat,
         grids_model_weight_YM_xgb, grids_model_weight_AM_xgb, grids_model_weight_DM_xgb)

# 气象数据驱动格点模型

In [None]:
def read_grid_params(filepath):
    grids_param_dataset = Dataset(filepath, 'r')
    grid_param_1 = grids_param_dataset.variables['ks'][:].data
    grid_param_2 = grids_param_dataset.variables['kg'][:].data
    grid_param_3 = grids_param_dataset.variables['alpha'][:].data
    grid_param_4 = grids_param_dataset.variables['smax'][:].data
    grid_param_5 = grids_param_dataset.variables['ksn'][:].data
    grids_param_dataset.close()
    params_PhS_3d = np.stack([grid_param_1, grid_param_2, grid_param_3, grid_param_4, grid_param_5], axis=-1)
    return params_PhS_3d

def read_forcing_data(filepath, element):
    forcing_dataset = Dataset(filepath, 'r')
    forcing_data = forcing_dataset[element][:].data.swapaxes(1, 2)
    return forcing_data

params_grids_YM_PhS = read_grid_params("../../Results/Grids_Params_Transplant/YM_PhS.nc")
params_grids_AM_PhS = read_grid_params("../../Results/Grids_Params_Transplant/AM_PhS.nc")
params_grids_DM_PhS = read_grid_params("../../Results/Grids_Params_Transplant/DM_PhS.nc")

dataset   = "gswp3"
start_time = 1971
end_time   = 2010
data_time = f"{start_time}_{end_time}"
time_series = pd.date_range(start=f"{start_time}-01", end=f"{end_time}-12", freq='MS')
time_delta  = (time_series - pd.Timestamp('1900-01-01')).days.values

pr_data  = np.load(f"../../Data/forcing/pr_{dataset}_{data_time}.npy").astype(np.float32)
tas_data = np.load(f"../../Data/forcing/tas_{dataset}_{data_time}.npy").astype(np.float32)
pet_data = np.load(f"../../Data/forcing/pet_{dataset}_{data_time}.npy").astype(np.float32)

In [None]:
Rsim_YM = np.full_like(pr_data, np.nan, dtype=np.float32)
Rsim_AM = np.full_like(pr_data, np.nan, dtype=np.float32)
Rsim_DM = np.full_like(pr_data, np.nan, dtype=np.float32)

Esim_YM = np.full_like(pr_data, np.nan, dtype=np.float32)
Esim_AM = np.full_like(pr_data, np.nan, dtype=np.float32)
Esim_DM = np.full_like(pr_data, np.nan, dtype=np.float32)

In [None]:
for i in range(grids_prop.shape[0]):
    hang = grids_prop.iloc[i]['HANG'].astype(int)
    lie  = grids_prop.iloc[i]['LIE'].astype(int)
    pr   = pr_data[:, hang, lie] + 1e-5
    tas  = tas_data[:, hang, lie] + 1e-5
    pet  = pet_data[:, hang, lie] + 1e-5
    params_YM = params_grids_YM_PhS[hang, lie]
    params_AM = params_grids_AM_PhS[hang, lie]
    params_DM = params_grids_DM_PhS[hang, lie]
    grid_forcing = np.vstack([pr, tas, pet]).T
    Rsim_YM[:, hang, lie], Esim_YM[:, hang, lie] = mYWBMnlS_RE(grid_forcing, params_YM)
    Rsim_AM[:, hang, lie], Esim_AM[:, hang, lie] = abcdnlS_RE(grid_forcing, params_AM)
    Rsim_DM[:, hang, lie], Esim_DM[:, hang, lie] = DWBMnlS_RE(grid_forcing, params_DM)

Rsim_YM = Rsim_YM.astype(np.float16)
Rsim_AM = Rsim_AM.astype(np.float16)
Rsim_DM = Rsim_DM.astype(np.float16)
Esim_YM = Esim_YM.astype(np.float16)
Esim_AM = Esim_AM.astype(np.float16)
Esim_DM = Esim_DM.astype(np.float16)

In [None]:
np.save(f"../../Results/Grids_Simulation_Results/Rsim_YM_{dataset}.npy", Rsim_YM)
np.save(f"../../Results/Grids_Simulation_Results/Rsim_AM_{dataset}.npy", Rsim_AM)
np.save(f"../../Results/Grids_Simulation_Results/Rsim_DM_{dataset}.npy", Rsim_DM)

np.save(f"../../Results/Grids_Simulation_Results/Esim_YM_{dataset}.npy", Esim_YM)
np.save(f"../../Results/Grids_Simulation_Results/Esim_AM_{dataset}.npy", Esim_AM)
np.save(f"../../Results/Grids_Simulation_Results/Esim_DM_{dataset}.npy", Esim_DM)

# 格点模拟结果的加权平均

In [None]:
dataset = "gswp3"
# 加载结果
Rsim_YM = np.load(f"../../Results/Grids_Simulation_Results/Rsim_YM_{dataset}.npy")
Rsim_AM = np.load(f"../../Results/Grids_Simulation_Results/Rsim_AM_{dataset}.npy")
Rsim_DM = np.load(f"../../Results/Grids_Simulation_Results/Rsim_DM_{dataset}.npy")

Esim_YM = np.load(f"../../Results/Grids_Simulation_Results/Esim_YM_{dataset}.npy")
Esim_AM = np.load(f"../../Results/Grids_Simulation_Results/Esim_AM_{dataset}.npy")
Esim_DM = np.load(f"../../Results/Grids_Simulation_Results/Esim_DM_{dataset}.npy")

In [None]:
# 算术平均
Rsim_ArM = (Rsim_YM + Rsim_AM + Rsim_DM) / 3
Esim_ArM = (Esim_YM + Esim_AM + Esim_DM) / 3

np.save(f"../../Results/Grids_Simulation_Results/Rsim_ArM_{dataset}.npy", Rsim_ArM.astype(np.float16))
np.save(f"../../Results/Grids_Simulation_Results/Esim_ArM_{dataset}.npy", Esim_ArM.astype(np.float16))

In [None]:
# 加权平均
grids_model_weight_YM_svr = read_nc("../../Results/Grids_Params_Transplant/Model_Weight_svr.nc", "weight_YM", False)
grids_model_weight_AM_svr = read_nc("../../Results/Grids_Params_Transplant/Model_Weight_svr.nc", "weight_AM", False)
grids_model_weight_DM_svr = read_nc("../../Results/Grids_Params_Transplant/Model_Weight_svr.nc", "weight_DM", False)

Rsim_WeM_svr = Rsim_YM * grids_model_weight_YM_svr + Rsim_AM * grids_model_weight_AM_svr + Rsim_DM * grids_model_weight_DM_svr
Esim_WeM_svr = Esim_YM * grids_model_weight_YM_svr + Esim_AM * grids_model_weight_AM_svr + Esim_DM * grids_model_weight_DM_svr

np.save(f"../../Results/Grids_Simulation_Results/Rsim_WeM_svr_{dataset}.npy", Rsim_WeM_svr.astype(np.float16))
np.save(f"../../Results/Grids_Simulation_Results/Esim_WeM_svr_{dataset}.npy", Esim_WeM_svr.astype(np.float16))

In [None]:
# 自动识别最佳模型
grids_best_model_svm = read_nc("../../Results/Grids_Params_Transplant/Best_Model_svm.nc", "best_model", False)

grids_model_weight_YM_svr = (grids_best_model_svm == 0).astype(np.float32)
grids_model_weight_AM_svr = (grids_best_model_svm == 1).astype(np.float32)
grids_model_weight_DM_svr = (grids_best_model_svm == 2).astype(np.float32)

Rsim_BeM_svr = Rsim_YM * grids_model_weight_YM_svr + Rsim_AM * grids_model_weight_AM_svr + Rsim_DM * grids_model_weight_DM_svr
Esim_BeM_svr = Esim_YM * grids_model_weight_YM_svr + Esim_AM * grids_model_weight_AM_svr + Esim_DM * grids_model_weight_DM_svr

np.save(f"../../Results/Grids_Simulation_Results/Rsim_BeM_svr_{dataset}.npy", Rsim_BeM_svr.astype(np.float16))
np.save(f"../../Results/Grids_Simulation_Results/Esim_BeM_svr_{dataset}.npy", Esim_BeM_svr.astype(np.float16))