In [1]:
import optim
import simplace
import simplace_runner
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
import os
from sklearn.metrics import mean_squared_error
import optuna
import yaml

In [2]:
def process_result():
    
    sim_dir = r'simulation-optuna\output\yearly'
    obs_dir = r'data\phenology_WW_1999_2021.csv'

    # Define the state name
    state = 'Brandenburg'

    # Read the location csv
    location_data = pd.read_csv(os.path.join('data', 'DE_DWD_UBN_Crop.csv'))
    location_data = location_data[['Cell_ID', 'NUTS_ID', 'NUTS_NAME', 'STATE_ID', 'STATE_NAME']]
    location_data.rename(columns={'Cell_ID': 'Location'}, inplace=True)
    location_data = location_data[location_data['STATE_NAME']==state]

    # Read the simulated data
    sim_data = pd.DataFrame()

    for path in tqdm(glob(os.path.join(sim_dir, '*.csv'))):
        df = pd.read_csv(path, delimiter=';')
        sim_data = pd.concat((sim_data, df), ignore_index=True)

    # Filter columns
    sim_variables = ['projectid', 'Year', 'AnthesisDOY', 'MaturityDOY']
    sim_data_filtered = sim_data[sim_variables]
    sim_data_filtered.rename(columns={'projectid': 'Location'}, inplace=True)
    sim_data_filtered = pd.merge(
        left=sim_data_filtered, 
        right=location_data,
        on='Location', 
        how='inner'
    )

    # Read the observed datasets
    obs_phen_data = pd.read_csv(os.path.join(obs_dir))
    obs_phen_data['AnthesisDOY'] = pd.to_datetime(obs_phen_data['Flowering_DOY']).dt.day_of_year
    obs_phen_data['MaturityDOY'] = pd.to_datetime(obs_phen_data['Harvest_DOY']).dt.day_of_year
    obs_phen_data = obs_phen_data[['STATE_ID', 'STATE_NAME', 'Year', 'AnthesisDOY', 'MaturityDOY']]
    obs_phen_data = obs_phen_data[obs_phen_data['STATE_NAME']==state]

    sim_data_grouped = sim_data_filtered[['Year', 'AnthesisDOY', 'MaturityDOY']].groupby(by='Year')
    sim_data_grouped_mean = sim_data_grouped.mean().reset_index()
    sim_data_grouped_std = sim_data_grouped.std().reset_index()
    sim_data_grouped_mean[['AnthesisDOY', 'MaturityDOY']] = sim_data_grouped_mean[['AnthesisDOY', 'MaturityDOY']].astype('int')
    sim_data_grouped_std = sim_data_grouped_std[['Year', 'AnthesisDOY', 'MaturityDOY']].rename(
        columns={'AnthesisDOY': 'AnthesisDOY_std', 'MaturityDOY': 'MaturityDOY_std'}
    )
    sim_data_grouped_mean = pd.merge(left=sim_data_grouped_mean, right=sim_data_grouped_std, on='Year', how='inner')
    sim_data_grouped_mean.rename(
        columns={col: f'{col}_sim' for col in sim_data_grouped_mean.columns[1:]},
        inplace=True
    )

    phen_comparison_df = pd.merge(
    left=obs_phen_data[obs_phen_data['STATE_NAME']==state], 
    right=sim_data_grouped_mean, 
    on='Year', 
    how='inner'
    )

    return phen_comparison_df

In [3]:
def loss_fn(df):
    loss = mean_squared_error(df['AnthesisDOY'], df['AnthesisDOY_sim'])
    return np.sqrt(loss)

In [4]:
xml_path = r'simulation-optuna/SimulationExperimentTemplateTest/data/crop/crop_cka_latest_USL_test.xml'
config_path = r'config.yaml'
sim_dir = r'simulation-optuna/output/yearly'  
obs_dir = r'phenology_WW_1999_2021.csv'

In [5]:
optimizer = optim.SimplaceOptimizer(xml_path, config_path, process_result_fn=process_result, loss_fn=loss_fn, device='local', log_file='log.txt')
study = optimizer.run_optimization(direction='minimize', n_trials=3, study_name='Simplace Optimizer', storage=None)

[I 2025-05-23 18:36:42,765] A new study created in memory with name: Simplace Optimizer
2025-05-23 18:36:42,780 - INFO - Trial 0: Suggested parameters - {'TEFFMX': 34.672872786831334, 'TSUM1': 1010, 'TSUM2': 1570, 'RUETableRUE_0': 2.984892407870185, 'RUETableRUE_1': 3.990758075928243, 'RUETableRUE_2': 3.6093703141180975, 'RUETableRUE_3': 3.6424502530251135}
2025-05-23 18:36:42,781 - INFO - Starting Simplace subprocess...
2025-05-23 18:38:09,418 - INFO - Simplace subprocess completed successfully.
2025-05-23 18:38:09,419 - INFO - Trial 0: Processing output...
100%|██████████| 3/3 [00:00<00:00, 325.63it/s]
2025-05-23 18:38:09,661 - INFO - Trial 0 completed with loss: 37.2862
2025-05-23 18:38:09,661 - INFO - Best is trial 0 with value: 37.2862.

[I 2025-05-23 18:38:09,661] Trial 0 finished with value: 37.28620213383521 and parameters: {'TEFFMX': 34.672872786831334, 'TSUM1': 1010, 'TSUM2': 1570, 'RUETableRUE_0': 2.984892407870185, 'RUETableRUE_1': 3.990758075928243, 'RUETableRUE_2': 3.6093