## **Import Dependencies**

In [13]:
from lxml import etree
import yaml
import pandas as pd
import optuna
import os
import simplace
from tqdm import tqdm
from glob import glob
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import subprocess

import warnings
warnings.filterwarnings('ignore')

In [14]:
import jpype

if jpype.isJVMStarted():
    print("JVM is running")
else:
    print("JVM is not running")

JVM is not running


In [15]:
# Define directories
install_dir = r'simplace_portable/workspace/'
work_dir = r'simulation-optuna/'
output_dir = r'simulation-optuna/output/'

# Initialize Simplace
# sp = simplace.initSimplace(install_dir, work_dir, output_dir, javaParameters=["-Xmx16384m"])

# Specify solution file
# solution_file = r'simulation-optuna/SimulationExperimentTemplateTest/solution/Soil3_Germany_AllKreis_Test.sol.xml'
# project_file = r'simulation-optuna/SimulationExperimentTemplateTest/project/Soil3_Germany_AllKreis.proj.xml'
# simplace.openProject(sp, solution_file, project_file)
# simplace.runProject(sp)

In [16]:
# Function to update single-value parameters (e.g., TBASEM)
def update_single_value_param(root, param_id, crop_name, new_value):
    xpath = f"//crop[parameter[@id='CropName' and text()='{crop_name}']]/parameter[@id='{param_id}']"
    for param in root.xpath(xpath):
        if not param.getchildren():
            param.text = str(new_value)

In [17]:
# Function to update multi-value parameters (e.g., PHOTTB)
def update_multi_value_param(root, param_id, crop_name, new_values):
    xpath = f"//crop[parameter[@id='CropName' and text()='{crop_name}']]/parameter[@id='{param_id}']"
    for param in root.xpath(xpath):
        value_elements = param.findall('value')
        for i, val in enumerate(new_values):
            if i < len(value_elements):
                value_elements[i].text = str(val)
            else:
                new_elem = etree.SubElement(param, 'value')
                new_elem.text = str(val)
        # # Remove any extra existing <value> tags
        for extra in value_elements[len(new_values):]:
            param.remove(extra)

In [18]:
# Define the XML path
xml_path = r'simulation-optuna\SimulationExperimentTemplateTest\data\crop\crop_cka_latest_USL_test.xml'
config_path = r'config.yaml'

In [19]:
def process_result(sim_dir, obs_dir):
    # Define the state name
    state = 'Brandenburg'

    # Read the location csv
    location_data = pd.read_csv(os.path.join('DE_DWD_UBN_Crop.csv'))
    location_data = location_data[['Cell_ID', 'NUTS_ID', 'NUTS_NAME', 'STATE_ID', 'STATE_NAME']]
    location_data.rename(columns={'Cell_ID': 'Location'}, inplace=True)
    location_data = location_data[location_data['STATE_NAME']==state]

    # Read the simulated data
    sim_data = pd.DataFrame()

    for path in tqdm(glob(os.path.join(sim_dir, '*.csv'))):
        df = pd.read_csv(path, delimiter=';')
        sim_data = pd.concat((sim_data, df), ignore_index=True)

    # Filter columns
    sim_variables = ['projectid', 'Year', 'AnthesisDOY', 'MaturityDOY']
    sim_data_filtered = sim_data[sim_variables]
    sim_data_filtered.rename(columns={'projectid': 'Location'}, inplace=True)
    sim_data_filtered = pd.merge(
        left=sim_data_filtered, 
        right=location_data,
        on='Location', 
        how='inner'
    )

    # Read the observed datasets
    obs_phen_data = pd.read_csv(os.path.join(obs_dir))
    obs_phen_data['AnthesisDOY'] = pd.to_datetime(obs_phen_data['Flowering_DOY']).dt.day_of_year
    obs_phen_data['MaturityDOY'] = pd.to_datetime(obs_phen_data['Harvest_DOY']).dt.day_of_year
    obs_phen_data = obs_phen_data[['STATE_ID', 'STATE_NAME', 'Year', 'AnthesisDOY', 'MaturityDOY']]
    obs_phen_data = obs_phen_data[obs_phen_data['STATE_NAME']==state]

    sim_data_grouped = sim_data_filtered[['Year', 'AnthesisDOY', 'MaturityDOY']].groupby(by='Year')
    sim_data_grouped_mean = sim_data_grouped.mean().reset_index()
    sim_data_grouped_std = sim_data_grouped.std().reset_index()
    sim_data_grouped_mean[['AnthesisDOY', 'MaturityDOY']] = sim_data_grouped_mean[['AnthesisDOY', 'MaturityDOY']].astype('int')
    sim_data_grouped_std = sim_data_grouped_std[['Year', 'AnthesisDOY', 'MaturityDOY']].rename(
        columns={'AnthesisDOY': 'AnthesisDOY_std', 'MaturityDOY': 'MaturityDOY_std'}
    )
    sim_data_grouped_mean = pd.merge(left=sim_data_grouped_mean, right=sim_data_grouped_std, on='Year', how='inner')
    sim_data_grouped_mean.rename(
        columns={col: f'{col}_sim' for col in sim_data_grouped_mean.columns[1:]},
        inplace=True
    )

    phen_comparison_df = pd.merge(
    left=obs_phen_data[obs_phen_data['STATE_NAME']==state], 
    right=sim_data_grouped_mean, 
    on='Year', 
    how='inner'
    )

    return phen_comparison_df

In [8]:
# process_result(
#     r'C:\HALDER\GITHUB\MSM-Research\Simplace-Python\simulation-optuna\output\yearly',
#     r'C:\HALDER\GITHUB\MSM-Research\Simplace-Python\phenology_WW_1999_2021.csv'
# )

In [20]:
def objective(trial, xml_path, config_path, sim_dir, obs_dir):
    # Load XML fresh every time
    tree = etree.parse(xml_path)
    root = tree.getroot()

    # Load YAML config
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)

    crop_name = config['crop_name']
    single_value_specs = config.get('single_value_params', {})
    multi_value_specs = config.get('multi_value_params', {})

    # Suggest single value parameters
    for param_id, spec in single_value_specs.items():
        if spec['type'] == 'int':
            val = trial.suggest_int(param_id, spec['low'], spec['high'])
        elif spec['type'] == 'float':
            precision = spec.get('precision', 5)
            val = round(trial.suggest_float(param_id, spec['low'], spec['high']), precision)

        print(param_id, val)
        update_single_value_param(root, param_id, crop_name, val)

    # Suggest multi-value parameters
    for param_id, spec in multi_value_specs.items():
        values = []
        
        if spec['type'] == 'int':
            for i, bounds in enumerate(spec['values']):
                val = trial.suggest_int(f"{param_id}_{i}", bounds['low'], bounds['high'])
                values.append(val)

            update_multi_value_param(root, param_id, crop_name, values)
            print(param_id, values)
            
        elif spec['type'] == 'float':
            precision = spec.get('precision', 3)
            for i, bounds in enumerate(spec['values']):
                val = round(trial.suggest_float(f"{param_id}_{i}", bounds['low'], bounds['high']), precision)
                values.append(val)

            update_multi_value_param(root, param_id, crop_name, values)
            print(param_id, values)

    # Save modified XML
    tree.write(xml_path, pretty_print=True, xml_declaration=True, encoding='UTF-8')

    # Define directories
    # install_dir = r'simplace_portable/workspace/'
    # work_dir = r'simulation-optuna/'
    # output_dir = r'simulation-optuna/output/'

    # # Initialize Simplace
    # sp = simplace.initSimplace(install_dir, work_dir, output_dir)

    # # Specify solution file
    # solution_file = r'simulation-optuna/SimulationExperimentTemplateTest/solution/Soil3_Germany_AllKreis_Test.sol.xml'
    # project_file = r'simulation-optuna/SimulationExperimentTemplateTest/project/Soil3_Germany_AllKreis.proj.xml'
    # simplace.openProject(sp, solution_file, project_file)

    # # Run your simulation model here and read the result
    # simplace.runProject(sp)
    # # simplace.closeProject(sp)
    # sp.shutDown()
    # import jpype
    # jpype.java.lang.System.exit(0)

    # Run the script
    result = subprocess.run(["python", "run_simplace.py"], capture_output=True, text=True)

    # Print output or error if needed
    # print("STDOUT:")
    # print(result.stdout)

    # print("STDERR:")
    # print(result.stderr)

    result = process_result(sim_dir, obs_dir)

    rmse = np.sqrt(mean_squared_error(result['AnthesisDOY'], result['AnthesisDOY_sim']))

    return rmse

In [21]:
sim_dir = r'simulation-optuna\output\yearly'
obs_dir = r'phenology_WW_1999_2021.csv'

In [22]:
study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial, xml_path, config_path, sim_dir, obs_dir), n_trials=30)

[I 2025-05-22 17:43:32,389] A new study created in memory with name: no-name-e50560d5-68df-4587-b359-1260f79c83c3


TSUM1 818
TSUM2 1108


100%|██████████| 3/3 [00:00<00:00, 179.67it/s]
[I 2025-05-22 17:44:59,944] Trial 0 finished with value: 62.77876960440909 and parameters: {'TSUM1': 818, 'TSUM2': 1108}. Best is trial 0 with value: 62.77876960440909.


TSUM1 942
TSUM2 924


100%|██████████| 3/3 [00:00<?, ?it/s]
[I 2025-05-22 17:46:25,831] Trial 1 finished with value: 44.857261541956376 and parameters: {'TSUM1': 942, 'TSUM2': 924}. Best is trial 1 with value: 44.857261541956376.


TSUM1 1439
TSUM2 1483


100%|██████████| 3/3 [00:00<00:00, 179.62it/s]
[I 2025-05-22 17:47:52,974] Trial 2 finished with value: 13.868951874130811 and parameters: {'TSUM1': 1439, 'TSUM2': 1483}. Best is trial 2 with value: 13.868951874130811.


TSUM1 1015
TSUM2 832


100%|██████████| 3/3 [00:00<00:00, 177.87it/s]
[I 2025-05-22 17:49:23,643] Trial 3 finished with value: 36.961201749707875 and parameters: {'TSUM1': 1015, 'TSUM2': 832}. Best is trial 2 with value: 13.868951874130811.


TSUM1 1068
TSUM2 1182


100%|██████████| 3/3 [00:00<00:00, 108.03it/s]
[I 2025-05-22 17:50:53,651] Trial 4 finished with value: 31.67360717228696 and parameters: {'TSUM1': 1068, 'TSUM2': 1182}. Best is trial 2 with value: 13.868951874130811.


TSUM1 889
TSUM2 1317


100%|██████████| 3/3 [00:00<?, ?it/s]
[I 2025-05-22 17:52:19,605] Trial 5 finished with value: 52.563832829609474 and parameters: {'TSUM1': 889, 'TSUM2': 1317}. Best is trial 2 with value: 13.868951874130811.


TSUM1 1278
TSUM2 1185


100%|██████████| 3/3 [00:00<00:00, 913.99it/s]
[I 2025-05-22 17:53:46,451] Trial 6 finished with value: 17.11724276862369 and parameters: {'TSUM1': 1278, 'TSUM2': 1185}. Best is trial 2 with value: 13.868951874130811.


TSUM1 1387
TSUM2 1368


100%|██████████| 3/3 [00:00<00:00, 116.04it/s]
[I 2025-05-22 17:55:14,519] Trial 7 finished with value: 13.75088929930494 and parameters: {'TSUM1': 1387, 'TSUM2': 1368}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1102
TSUM2 1273


100%|██████████| 3/3 [00:00<00:00, 274.60it/s]
[I 2025-05-22 17:56:41,153] Trial 8 finished with value: 28.563430557574684 and parameters: {'TSUM1': 1102, 'TSUM2': 1273}. Best is trial 7 with value: 13.75088929930494.


TSUM1 895
TSUM2 1328


100%|██████████| 3/3 [00:00<?, ?it/s]
[I 2025-05-22 17:58:06,918] Trial 9 finished with value: 51.686765609103794 and parameters: {'TSUM1': 895, 'TSUM2': 1328}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1651
TSUM2 1680


100%|██████████| 3/3 [00:00<00:00, 320.43it/s]
[I 2025-05-22 17:59:34,005] Trial 10 finished with value: 13.997023493109763 and parameters: {'TSUM1': 1651, 'TSUM2': 1680}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1446
TSUM2 1529


100%|██████████| 3/3 [00:00<00:00, 853.66it/s]
[I 2025-05-22 18:01:01,881] Trial 11 finished with value: 13.89557327071248 and parameters: {'TSUM1': 1446, 'TSUM2': 1529}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1441
TSUM2 1495


100%|██████████| 3/3 [00:00<00:00, 278.88it/s]
[I 2025-05-22 18:02:29,806] Trial 12 finished with value: 13.887748745473159 and parameters: {'TSUM1': 1441, 'TSUM2': 1495}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1388
TSUM2 1478


100%|██████████| 3/3 [00:00<?, ?it/s]
[I 2025-05-22 18:03:56,261] Trial 13 finished with value: 13.75088929930494 and parameters: {'TSUM1': 1388, 'TSUM2': 1478}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1269
TSUM2 1675


100%|██████████| 3/3 [00:00<?, ?it/s]
[I 2025-05-22 18:05:24,379] Trial 14 finished with value: 17.608051518009297 and parameters: {'TSUM1': 1269, 'TSUM2': 1675}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1601
TSUM2 1399


100%|██████████| 3/3 [00:00<00:00, 316.91it/s]
[I 2025-05-22 18:06:51,543] Trial 15 finished with value: 19.391840507307954 and parameters: {'TSUM1': 1601, 'TSUM2': 1399}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1366
TSUM2 1552


100%|██████████| 3/3 [00:00<00:00, 333.19it/s]
[I 2025-05-22 18:08:20,423] Trial 16 finished with value: 14.186644055773634 and parameters: {'TSUM1': 1366, 'TSUM2': 1552}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1536
TSUM2 1040


100%|██████████| 3/3 [00:00<00:00, 374.97it/s]
[I 2025-05-22 18:09:47,649] Trial 17 finished with value: 16.399893955118444 and parameters: {'TSUM1': 1536, 'TSUM2': 1040}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1188
TSUM2 1364


100%|██████████| 3/3 [00:00<00:00, 437.97it/s]
[I 2025-05-22 18:11:15,182] Trial 18 finished with value: 22.12022485930211 and parameters: {'TSUM1': 1188, 'TSUM2': 1364}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1344
TSUM2 1432


100%|██████████| 3/3 [00:00<00:00, 459.57it/s]
[I 2025-05-22 18:12:47,512] Trial 19 finished with value: 14.832396974191326 and parameters: {'TSUM1': 1344, 'TSUM2': 1432}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1195
TSUM2 1208


100%|██████████| 3/3 [00:00<?, ?it/s]
[I 2025-05-22 18:14:19,173] Trial 20 finished with value: 21.66142634510359 and parameters: {'TSUM1': 1195, 'TSUM2': 1208}. Best is trial 7 with value: 13.75088929930494.


TSUM1 1493
TSUM2 1604


100%|██████████| 3/3 [00:00<00:00, 300.00it/s]
[I 2025-05-22 18:15:49,764] Trial 21 finished with value: 12.82575533838066 and parameters: {'TSUM1': 1493, 'TSUM2': 1604}. Best is trial 21 with value: 12.82575533838066.


TSUM1 1542
TSUM2 1599


100%|██████████| 3/3 [00:00<00:00, 187.33it/s]
[I 2025-05-22 18:17:19,219] Trial 22 finished with value: 14.207392441964851 and parameters: {'TSUM1': 1542, 'TSUM2': 1599}. Best is trial 21 with value: 12.82575533838066.


TSUM1 1695
TSUM2 1602


100%|██████████| 3/3 [00:00<00:00, 289.49it/s]
[I 2025-05-22 18:18:49,497] Trial 23 finished with value: 16.945614363422948 and parameters: {'TSUM1': 1695, 'TSUM2': 1602}. Best is trial 21 with value: 12.82575533838066.


TSUM1 1357
TSUM2 1418


100%|██████████| 3/3 [00:00<00:00, 174.27it/s]
[I 2025-05-22 18:20:19,432] Trial 24 finished with value: 14.38144940388561 and parameters: {'TSUM1': 1357, 'TSUM2': 1418}. Best is trial 21 with value: 12.82575533838066.


TSUM1 1505
TSUM2 1604


100%|██████████| 3/3 [00:00<?, ?it/s]
[I 2025-05-22 18:21:45,524] Trial 25 finished with value: 13.078608488673403 and parameters: {'TSUM1': 1505, 'TSUM2': 1604}. Best is trial 21 with value: 12.82575533838066.


TSUM1 1529
TSUM2 1614


100%|██████████| 3/3 [00:00<00:00, 380.80it/s]
[I 2025-05-22 18:23:14,983] Trial 26 finished with value: 13.807606599262597 and parameters: {'TSUM1': 1529, 'TSUM2': 1614}. Best is trial 21 with value: 12.82575533838066.


TSUM1 1484
TSUM2 1567


100%|██████████| 3/3 [00:00<00:00, 183.67it/s]
[I 2025-05-22 18:24:45,728] Trial 27 finished with value: 14.714677676742864 and parameters: {'TSUM1': 1484, 'TSUM2': 1567}. Best is trial 21 with value: 12.82575533838066.


TSUM1 1617
TSUM2 1691


100%|██████████| 3/3 [00:00<00:00, 172.70it/s]
[I 2025-05-22 18:26:17,881] Trial 28 finished with value: 12.832251036613439 and parameters: {'TSUM1': 1617, 'TSUM2': 1691}. Best is trial 21 with value: 12.82575533838066.


TSUM1 1603
TSUM2 1651


100%|██████████| 3/3 [00:00<00:00, 385.08it/s]
[I 2025-05-22 18:27:47,150] Trial 29 finished with value: 15.742061491431166 and parameters: {'TSUM1': 1603, 'TSUM2': 1651}. Best is trial 21 with value: 12.82575533838066.


In [12]:
study.best_params

{'TSUM1': 1199, 'TSUM2': 1003}

In [None]:
# Class
import yaml
from lxml import etree
import optuna

class SimplaceOptimizer:
    def __init__(self, xml_path, config_path):
        self.xml_path = xml_path
        self.config_path = config_path
        self.config = self._load_config()

    def _load_config(self):
        with open(self.config_path, 'r') as f:
            return yaml.safe_load(f)

    def _load_xml(self):
        tree = etree.parse(self.xml_path)
        return tree, tree.getroot()

    def _suggest_single_params(self, trial, root):
        crop_name = self.config['crop_name']
        single_specs = self.config.get('single_value_params', {})

        for param_id, spec in single_specs.items():
            if spec['type'] == 'int':
                val = trial.suggest_int(param_id, spec['low'], spec['high'])
            elif spec['type'] == 'float':
                precision = spec.get('precision', 5)
                val = round(trial.suggest_float(param_id, spec['low'], spec['high']), precision)

            update_single_value_param(root, param_id, crop_name, val)

    def _suggest_multi_params(self, trial, root):
        crop_name = self.config['crop_name']
        multi_specs = self.config.get('multi_value_params', {})

        for param_id, spec in multi_specs.items():
            values = []
            if spec['type'] == 'int':
                for i, bounds in enumerate(spec['values']):
                    val = trial.suggest_int(f"{param_id}_{i}", bounds['low'], bounds['high'])
                    values.append(val)
            elif spec['type'] == 'float':
                precision = spec.get('precision', 3)
                for i, bounds in enumerate(spec['values']):
                    val = round(trial.suggest_float(f"{param_id}_{i}", bounds['low'], bounds['high']), precision)
                    values.append(val)

            update_multi_value_param(root, param_id, crop_name, values)

    def objective(self, trial):
        tree, root = self._load_xml()
        self._suggest_single_params(trial, root)
        self._suggest_multi_params(trial, root)

        # Save updated XML
        tree.write(self.xml_path, pretty_print=True, xml_declaration=True, encoding='UTF-8')

        # Run model and calculate score
        # simulated_yield = run_simulation(self.xml_path)
        # score = evaluate(simulated_yield)
        score = trial.suggest_float("dummy_score", 0.7, 0.9)  # Replace with real score
        return score

In [46]:
optimizer = SimplaceOptimizer(xml_path, config_path)
study = optuna.create_study(direction="minimize")
study.optimize(optimizer.objective, n_trials=10)

[I 2025-05-22 02:11:20,052] A new study created in memory with name: no-name-d157e040-706c-4b7b-a223-eafdf5d4f700
[I 2025-05-22 02:11:20,074] Trial 0 finished with value: 0.7668463420921908 and parameters: {'TEFFMX': 39.66944619132069, 'TSUM1': 1682, 'TSUM2': 674, 'RUETableRUE_0': 2.664652745162674, 'RUETableRUE_1': 3.05148158148924, 'RUETableRUE_2': 3.341120846449469, 'RUETableRUE_3': 3.6887364731616197, 'dummy_score': 0.7668463420921908}. Best is trial 0 with value: 0.7668463420921908.
[I 2025-05-22 02:11:20,090] Trial 1 finished with value: 0.7973347957356189 and parameters: {'TEFFMX': 39.78551345243318, 'TSUM1': 2113, 'TSUM2': 459, 'RUETableRUE_0': 2.805025805893568, 'RUETableRUE_1': 3.324112052559183, 'RUETableRUE_2': 3.99324345378398, 'RUETableRUE_3': 3.5892865208038858, 'dummy_score': 0.7973347957356189}. Best is trial 0 with value: 0.7668463420921908.
[I 2025-05-22 02:11:20,105] Trial 2 finished with value: 0.7501892594061048 and parameters: {'TEFFMX': 32.153729912735834, 'TSUM