In [1]:
import json, os, glob
import random
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import xgboost as xgb
import optuna
from data_preparation.dataset_statewise import YDataset
import torch
import pickle as pkl
from data_preparation.utils import evalMetrics

## prepare data

In [2]:

# trucate time series
start_doy_idx = 11 
end_doy_idx = 38
kernel=5
feature_idx = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 14]

npy_path = '/app/dev/spatial_encoding/data/composite_npy'
label_path= '/app/dev/spatial_encoding/data/composite_npy/labels.json'
norm_path = '/app/dev/spatial_encoding/2024_08/data_preparation/min_max_L2_U98_hist.json'

# years
train_years = list(range(2003, 2020))
test_years_d = [2020]
test_years_nd = [2021]

## support vector regression

In [7]:
def svr_train(seedlist, num_trial, out_dir, exp_name='svr'):
    
    # prepare output_dir 
    os.makedirs(out_dir, exist_ok=True)
    exp_params = {}
    
    # make directory for experiment
    output_dir = os.path.join(out_dir, exp_name)
    os.makedirs(output_dir, exist_ok=True)

    # -- collector for metrics
    metrics_train = []
    metrics_val = []
    metrics_test = []   

    for s in seedlist:
        np.random.seed(s)
        random.seed(s)
        torch.manual_seed(s)

        # Initialize YieldDataset with various parameters
        train_dataset = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=train_years, mode='train', seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # Initialize YieldDataset with various parameters
        val_dataset = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=train_years, mode='validation', seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # Initialize YieldDataset with various parameters
        test_dataset_d = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=test_years_d, mode=None, seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        test_dataset_nd = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=test_years_nd, mode=None, seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # ----- create array for experiment settings
        X_t, y_t  = train_dataset.X.reshape(train_dataset.X.shape[0], -1), train_dataset.y
        X_v, y_v = val_dataset.X.reshape(val_dataset.X.shape[0], -1), val_dataset.y
        X_d, y_d = test_dataset_d.X.reshape(test_dataset_d.X.shape[0], -1), test_dataset_d.y
        X_nd, y_nd = test_dataset_nd.X.reshape(test_dataset_nd.X.shape[0], -1), test_dataset_nd.y


        # tune only once
        if s == seedlist[0]:

            # Support vector
            def objective(trial):
                params = {
                "C": trial.suggest_float("C", 0.2, 2.0, log=True),
                'kernel' : trial.suggest_categorical('kernel', ['rbf', 'linear', 'poly', 'sigmoid']), 
                'degree' : trial.suggest_int('degree', 2, 3),
                'gamma' : trial.suggest_categorical('gamma', ['auto', 'scale'])
                }

                clf = SVR(**params)
                clf.fit(X_t, y_t)

                # intermediate_value = clf.score(X_v, y_v)
                y_p = clf.predict(X_v)
                intermediate_value =  mean_squared_error(y_v, y_p)

                if trial.should_prune():
                    raise optuna.TrialPruned()

                return intermediate_value

            # optimize study
            study = optuna.create_study(direction="minimize")
            study.optimize(objective, n_trials=num_trial) 
            print(study.best_params)
            
            # save study
            study_df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
            study_df.to_csv(os.path.join(output_dir, 'study_params_'+'{}.csv'.format(exp_name)))
            

            # dump best parameters
            exp_params[exp_name] = study.best_params


        # use best params from tuning
        rf_opt = SVR(**study.best_params)
        rf_opt.fit(X_t, y_t)

        # predict training year
        y_pred_t = rf_opt.predict(X_t)

        # predict validation year
        y_pred_v = rf_opt.predict(X_v)

        #predict drought year
        y_pred_d = rf_opt.predict(X_d)
        pkl.dump(y_d, open(os.path.join(output_dir, 'y_true_test_data_{}.pkl'.format(test_years_d[0])), 'wb'))
        pkl.dump(y_pred_d, open(os.path.join(output_dir, 'y_pred_test_data_{}.pkl'.format(test_years_d[0])), 'wb'))


        #predict non-drought year
        y_pred_nd = rf_opt.predict(X_nd)
        pkl.dump(y_nd, open(os.path.join(output_dir, 'y_true_test_data_{}.pkl'.format(test_years_nd[0])), 'wb'))
        pkl.dump(y_pred_nd, open(os.path.join(output_dir, 'y_pred_test_data_{}.pkl'.format(test_years_nd[0])), 'wb'))


        # print('training metrics')
        mape, rmse, r2, r = evalMetrics(y_t, y_pred_t)
        metrics_train.append([exp_name, 'train', 0, str(s), mape, rmse, r2, r])
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))

        # print('training metrics')
        mape, rmse, r2, r = evalMetrics(y_v, y_pred_v)
        metrics_val.append([exp_name, 'validation', 0, str(s), mape, rmse, r2, r])
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))


        # # print('validation metrics d')
        mape, rmse, r2, r = evalMetrics(y_d, y_pred_d)
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))
        metrics_test.append([ exp_name,'test_d', test_years_d[0], str(s), mape, rmse, r2, r])

        # print('validation metrics nd')
        mape, rmse, r2, r = evalMetrics(y_nd, y_pred_nd)
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))
        metrics_test.append([ exp_name,'test nd', test_years_nd[0], str(s), mape, rmse, r2, r])


    
    results_df_train = pd.DataFrame(metrics_train, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    results_df_val = pd.DataFrame(metrics_val, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    results_df_test = pd.DataFrame(metrics_test, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    df_combined = pd.concat([results_df_train, results_df_val, results_df_test], ignore_index=True)
    df_combined.to_csv(os.path.join(output_dir, '{}.csv'.format(exp_name)))

    # save exp parameters
    with open(os.path.join(output_dir, 'params.json'), 'w') as file:
        file.write(json.dumps(exp_params, indent=4))

    # save geoid
    pkl.dump(test_dataset_d.geoid , open(os.path.join(output_dir, 'geoid_{}.pkl'.format(test_years_d[0])), 'wb'))
    pkl.dump(test_dataset_nd.geoid , open(os.path.join(output_dir, 'geoid_{}.pkl'.format(test_years_nd[0])), 'wb'))
        
    print(exp_name, 'done')

In [8]:
output_dir = '/app/dev/spatial_encoding/2024_08/results'              

seedlist = [3407, 42, 1234]
num_trial=20

svr_train(seedlist, num_trial, output_dir, exp_name='svr')

[32m[I 2024-09-04 06:54:30,827][0m A new study created in memory with name: no-name-9db4ac7a-ce68-4b03-b521-29ce81ede602[0m
[32m[I 2024-09-04 06:54:33,730][0m Trial 0 finished with value: 869.5141525746448 and parameters: {'C': 0.842720166836167, 'kernel': 'sigmoid', 'degree': 2, 'gamma': 'scale'}. Best is trial 0 with value: 869.5141525746448.[0m
[32m[I 2024-09-04 06:54:35,664][0m Trial 1 finished with value: 225.56470918691528 and parameters: {'C': 0.3769908876812393, 'kernel': 'linear', 'degree': 2, 'gamma': 'auto'}. Best is trial 1 with value: 225.56470918691528.[0m
[32m[I 2024-09-04 06:54:38,570][0m Trial 2 finished with value: 851.1953817996629 and parameters: {'C': 1.9984728888970515, 'kernel': 'sigmoid', 'degree': 2, 'gamma': 'scale'}. Best is trial 1 with value: 225.56470918691528.[0m
[32m[I 2024-09-04 06:54:40,509][0m Trial 3 finished with value: 238.64670182125488 and parameters: {'C': 0.27135359424122035, 'kernel': 'linear', 'degree': 3, 'gamma': 'auto'}. Best

{'C': 1.9885572128091131, 'kernel': 'linear', 'degree': 3, 'gamma': 'auto'}
svr done


## random forest block

In [5]:
def rf_train(seedlist, num_trial, out_dir, exp_name='rf'):
    
    # prepare output_dir 
    os.makedirs(out_dir, exist_ok=True)
    exp_params = {}
    
    # make directory for experiment
    output_dir = os.path.join(out_dir, exp_name)
    os.makedirs(output_dir, exist_ok=True)

    # -- collector for metrics
    metrics_train = []
    metrics_val = []
    metrics_test = []   

    for s in seedlist:
        np.random.seed(s)
        random.seed(s)
        torch.manual_seed(s)

        # Initialize YieldDataset with various parameters
        train_dataset = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=train_years, mode='train', seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # Initialize YieldDataset with various parameters
        val_dataset = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=train_years, mode='validation', seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # Initialize YieldDataset with various parameters
        test_dataset_d = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=test_years_d, mode=None, seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        test_dataset_nd = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=test_years_nd, mode=None, seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # ----- create array for experiment settings
        X_t, y_t  = train_dataset.X.reshape(train_dataset.X.shape[0], -1), train_dataset.y
        X_v, y_v = val_dataset.X.reshape(val_dataset.X.shape[0], -1), val_dataset.y
        X_d, y_d = test_dataset_d.X.reshape(test_dataset_d.X.shape[0], -1), test_dataset_d.y
        X_nd, y_nd = test_dataset_nd.X.reshape(test_dataset_nd.X.shape[0], -1), test_dataset_nd.y


        # tune only once
        if s == seedlist[0]:


            def objective(trial):

                params = {
                'n_estimators' : trial.suggest_int('n_estimators', 100, 500),
                'max_depth' : trial.suggest_int('max_depth', 2, 10),
                # 'max_features' : trial.suggest_categorical('max_features', ['auto', 'sqrt']), 
                'min_samples_split' : trial.suggest_int('min_samples_split', 3, 10),
                'bootstrap' : trial.suggest_categorical('bootstrap', [True, False]),
                'n_jobs' : trial.suggest_categorical('n_jobs', [-1]), 
                'random_state' : trial.suggest_categorical('random_state', [s])
                }

                clf = RandomForestRegressor(**params)
                clf.fit(X_t, y_t)

                # intermediate_value = clf.score(X_v, y_v)
                y_p = clf.predict(X_v)
                intermediate_value =  mean_squared_error(y_v, y_p)

                if trial.should_prune():
                    raise optuna.TrialPruned()

                return intermediate_value
            
            # optimize study
            study = optuna.create_study(direction="minimize")
            study.optimize(objective, n_trials=num_trial) 
            print(study.best_params)
            
            # save study
            study_df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
            study_df.to_csv(os.path.join(output_dir, 'study_params_'+'{}.csv'.format(exp_name)))
            

            # dump best parameters
            exp_params[exp_name] = study.best_params


        # use best params from tuning
        rf_opt = RandomForestRegressor(**study.best_params)
        rf_opt.fit(X_t, y_t)

        # predict training year
        y_pred_t = rf_opt.predict(X_t)

        # predict validation year
        y_pred_v = rf_opt.predict(X_v)

        #predict drought year
        y_pred_d = rf_opt.predict(X_d)
        pkl.dump(y_d, open(os.path.join(output_dir, 'y_true_test_data_{}.pkl'.format(test_years_d[0])), 'wb'))
        pkl.dump(y_pred_d, open(os.path.join(output_dir, 'y_pred_test_data_{}.pkl'.format(test_years_d[0])), 'wb'))


        #predict non-drought year
        y_pred_nd = rf_opt.predict(X_nd)
        pkl.dump(y_nd, open(os.path.join(output_dir, 'y_true_test_data_{}.pkl'.format(test_years_nd[0])), 'wb'))
        pkl.dump(y_pred_nd, open(os.path.join(output_dir, 'y_pred_test_data_{}.pkl'.format(test_years_nd[0])), 'wb'))


        # print('training metrics')
        mape, rmse, r2, r = evalMetrics(y_t, y_pred_t)
        metrics_train.append([exp_name, 'train', 0, str(s), mape, rmse, r2, r])
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))

        # print('training metrics')
        mape, rmse, r2, r = evalMetrics(y_v, y_pred_v)
        metrics_val.append([exp_name, 'validation', 0, str(s), mape, rmse, r2, r])
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))


        # # print('validation metrics d')
        mape, rmse, r2, r = evalMetrics(y_d, y_pred_d)
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))
        metrics_test.append([ exp_name,'test_d', test_years_d[0], str(s), mape, rmse, r2, r])

        # print('validation metrics nd')
        mape, rmse, r2, r = evalMetrics(y_nd, y_pred_nd)
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))
        metrics_test.append([ exp_name,'test nd', test_years_nd[0], str(s), mape, rmse, r2, r])


    
    results_df_train = pd.DataFrame(metrics_train, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    results_df_val = pd.DataFrame(metrics_val, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    results_df_test = pd.DataFrame(metrics_test, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    df_combined = pd.concat([results_df_train, results_df_val, results_df_test], ignore_index=True)
    df_combined.to_csv(os.path.join(output_dir, '{}.csv'.format(exp_name)))

    # save exp parameters
    with open(os.path.join(output_dir, 'params.json'), 'w') as file:
        file.write(json.dumps(exp_params, indent=4))

    # save geoid
    pkl.dump(test_dataset_d.geoid , open(os.path.join(output_dir, 'geoid_{}.pkl'.format(test_years_d[0])), 'wb'))
    pkl.dump(test_dataset_nd.geoid , open(os.path.join(output_dir, 'geoid_{}.pkl'.format(test_years_nd[0])), 'wb'))
        
    print(exp_name, 'done')

In [6]:
output_dir = '/app/dev/spatial_encoding/2024_08/results'              

seedlist = [3407, 42, 1234]
num_trial=20

rf_train(seedlist, num_trial, output_dir, exp_name='rf2')

[32m[I 2024-09-04 07:15:55,652][0m A new study created in memory with name: no-name-77e5c5d3-e62c-4dc9-898c-748a992b1298[0m
[32m[I 2024-09-04 07:15:58,349][0m Trial 0 finished with value: 145.28029711661642 and parameters: {'n_estimators': 197, 'max_depth': 7, 'min_samples_split': 6, 'bootstrap': True, 'n_jobs': -1, 'random_state': 3407}. Best is trial 0 with value: 145.28029711661642.[0m
[32m[I 2024-09-04 07:16:01,748][0m Trial 1 finished with value: 193.33745255818963 and parameters: {'n_estimators': 325, 'max_depth': 5, 'min_samples_split': 5, 'bootstrap': True, 'n_jobs': -1, 'random_state': 3407}. Best is trial 0 with value: 145.28029711661642.[0m
[32m[I 2024-09-04 07:16:02,519][0m Trial 2 finished with value: 371.05561307417145 and parameters: {'n_estimators': 135, 'max_depth': 2, 'min_samples_split': 3, 'bootstrap': True, 'n_jobs': -1, 'random_state': 3407}. Best is trial 0 with value: 145.28029711661642.[0m
[32m[I 2024-09-04 07:16:05,949][0m Trial 3 finished with v

{'n_estimators': 253, 'max_depth': 10, 'min_samples_split': 7, 'bootstrap': True, 'n_jobs': -1, 'random_state': 3407}
rf2 done


## xgboost

In [8]:
def xgb_train(seedlist, num_trial, out_dir, exp_name='xgb'):
    
    # prepare output_dir 
    os.makedirs(out_dir, exist_ok=True)
    exp_params = {}
    
    # make directory for experiment
    output_dir = os.path.join(out_dir, exp_name)
    os.makedirs(output_dir, exist_ok=True)

    # -- collector for metrics
    metrics_train = []
    metrics_val = []
    metrics_test = []   

    for s in seedlist:
        np.random.seed(s)
        random.seed(s)
        torch.manual_seed(s)

        # Initialize YieldDataset with various parameters
        train_dataset = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=train_years, mode='train', seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # Initialize YieldDataset with various parameters
        val_dataset = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=train_years, mode='validation', seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # Initialize YieldDataset with various parameters
        test_dataset_d = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=test_years_d, mode=None, seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        test_dataset_nd = YDataset(npy_path, label_path, norm_path= norm_path, 
                        lookup=test_years_nd, mode=None, seed=s, start_doy_idx=start_doy_idx, 
                        end_doy_idx=end_doy_idx, ignore_features= [9,10,11], 
                        kernel=kernel, feature_idx =feature_idx, 
                        smooth = True)

        # ----- create array for experiment settings
        X_t, y_t  = train_dataset.X.reshape(train_dataset.X.shape[0], -1), train_dataset.y
        X_v, y_v = val_dataset.X.reshape(val_dataset.X.shape[0], -1), val_dataset.y
        X_d, y_d = test_dataset_d.X.reshape(test_dataset_d.X.shape[0], -1), test_dataset_d.y
        X_nd, y_nd = test_dataset_nd.X.reshape(test_dataset_nd.X.shape[0], -1), test_dataset_nd.y
    



        # tune only once
        if s == seedlist[0]:

            def objective(trial):
                params = {
                    "verbosity": 0,
                    "objective": "reg:squarederror",
                    # use exact for small dataset.
                    # "tree_method": "exact",
                    # defines booster, gblinear for linear functions.
                    "booster": trial.suggest_categorical("booster", ["gbtree"]), #, "gblinear", "dart"
                    # L2 regularization weight.
                    "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
                    # L1 regularization weight.
                    "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
                    # sampling ratio for training data.
                    "subsample": trial.suggest_float("subsample", 0.6, 0.8), #0.6, 0.8
                    # sampling according to each tree.
                    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.3, 0.5), #0.6, 0.8
                    'n_jobs' : trial.suggest_categorical('n_jobs', [30])
                }

                if params["booster"] in ["gbtree", "dart"]:
                    # number of estimators
                    params['n_estimators'] = trial.suggest_int('n_estimators', 100, 300)
                    # maximum depth of the tree, signifies complexity of the tree.
                    params["max_depth"] = trial.suggest_int("max_depth", 2, 5, step=1) 
                    # minimum child weight, larger the term more conservative the tree.
                    params["min_child_weight"] = trial.suggest_int("min_child_weight", 3, 8)
                    params["eta"] = trial.suggest_float("eta", 1e-4, 1e-2, log=True)
                    # # defines how selective algorithm is.
                    # params["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
                    params["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

#                     if params["booster"] == "dart":
#                         params["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
#                         params["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
#                         # params["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
#                         # params["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)


                clf = xgb.XGBRegressor(**params)
                clf.fit(X_t, y_t, eval_set=[(X_v, y_v)], verbose=True)

                # intermediate_value = clf.score(X_v, y_v)
                y_p = clf.predict(X_v)
                intermediate_value =  mean_squared_error(y_v, y_p)

                if trial.should_prune():
                    raise optuna.TrialPruned()

                return intermediate_value

            # optimize study
            study = optuna.create_study(direction="minimize")
            study.optimize(objective, n_trials=num_trial, timeout=600) 
            print(study.best_params)
            
            # save study
            study_df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
            study_df.to_csv(os.path.join(output_dir, 'study_params_'+'{}.csv'.format(exp_name)))
            

            # dump best parameters
            exp_params[exp_name] = study.best_params



        xg_opt = xgb.XGBRegressor(**study.best_params)
        xg_opt.fit(X_t, y_t)

        # predict training year
        y_pred_t = xg_opt.predict(X_t)

        # predict validation year
        y_pred_v = xg_opt.predict(X_v)

        #predict drought year
        y_pred_d = xg_opt.predict(X_d)
        pkl.dump(y_d, open(os.path.join(output_dir, 'y_true_test_data_{}.pkl'.format(test_years_d[0])), 'wb'))
        pkl.dump(y_pred_d, open(os.path.join(output_dir, 'y_pred_test_data_{}.pkl'.format(test_years_d[0])), 'wb'))


        #predict non-drought year
        y_pred_nd = xg_opt.predict(X_nd)
        pkl.dump(y_nd, open(os.path.join(output_dir, 'y_true_test_data_{}.pkl'.format(test_years_nd[0])), 'wb'))
        pkl.dump(y_pred_nd, open(os.path.join(output_dir, 'y_pred_test_data_{}.pkl'.format(test_years_nd[0])), 'wb'))


        # print('training metrics')
        mape, rmse, r2, r = evalMetrics(y_t, y_pred_t)
        metrics_train.append([exp_name, 'train', 0, str(s), mape, rmse, r2, r])
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))

        # print('training metrics')
        mape, rmse, r2, r = evalMetrics(y_v, y_pred_v)
        metrics_val.append([exp_name, 'validation', 0, str(s), mape, rmse, r2, r])
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))


        # # print('validation metrics d')
        mape, rmse, r2, r = evalMetrics(y_d, y_pred_d)
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))
        metrics_test.append([ exp_name,'test_d', test_years_d[0], str(s), mape, rmse, r2, r])

        # print('validation metrics nd')
        mape, rmse, r2, r = evalMetrics(y_nd, y_pred_nd)
        # print('MAPE= {} , RMSE = {} , r2 ={} , R = {}'.format(mape, rmse, r2, r))
        metrics_test.append([ exp_name,'test nd', test_years_nd[0], str(s), mape, rmse, r2, r])


    
    results_df_train = pd.DataFrame(metrics_train, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    results_df_val = pd.DataFrame(metrics_val, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    results_df_test = pd.DataFrame(metrics_test, columns=['exp_name', 'mode', 'year','seed', 'MAPE', 'RMSE', 'R2', 'r'])
    df_combined = pd.concat([results_df_train, results_df_val, results_df_test], ignore_index=True)
    df_combined.to_csv(os.path.join(output_dir, '{}.csv'.format(exp_name)))

    # save exp parameters
    with open(os.path.join(output_dir, 'params.json'), 'w') as file:
        file.write(json.dumps(exp_params, indent=4))

    # save geoid
    pkl.dump(test_dataset_d.geoid , open(os.path.join(output_dir, 'geoid_{}.pkl'.format(test_years_d[0])), 'wb'))
    pkl.dump(test_dataset_nd.geoid , open(os.path.join(output_dir, 'geoid_{}.pkl'.format(test_years_nd[0])), 'wb'))
        
    print(exp_name, 'done')

In [9]:
output_dir = '/app/dev/spatial_encoding/2024_08/results'              

seedlist = [3407, 42, 1234]
num_trial=20

xgb_train(seedlist, num_trial, output_dir, exp_name='xgb_3')

[32m[I 2024-09-04 07:19:05,668][0m A new study created in memory with name: no-name-dbdb604d-2720-4db2-bf35-9194f75313c4[0m


[0]	validation_0-rmse:29.79261
[1]	validation_0-rmse:29.77793
[2]	validation_0-rmse:29.76299
[3]	validation_0-rmse:29.74747
[4]	validation_0-rmse:29.73317
[5]	validation_0-rmse:29.71836
[6]	validation_0-rmse:29.70410
[7]	validation_0-rmse:29.68904
[8]	validation_0-rmse:29.67469
[9]	validation_0-rmse:29.66002
[10]	validation_0-rmse:29.64498
[11]	validation_0-rmse:29.63013
[12]	validation_0-rmse:29.61524
[13]	validation_0-rmse:29.60183
[14]	validation_0-rmse:29.58709
[15]	validation_0-rmse:29.57237
[16]	validation_0-rmse:29.55833
[17]	validation_0-rmse:29.54356
[18]	validation_0-rmse:29.52989
[19]	validation_0-rmse:29.51575
[20]	validation_0-rmse:29.50101
[21]	validation_0-rmse:29.48622
[22]	validation_0-rmse:29.47195
[23]	validation_0-rmse:29.45825
[24]	validation_0-rmse:29.44384
[25]	validation_0-rmse:29.42882
[26]	validation_0-rmse:29.41428
[27]	validation_0-rmse:29.39975
[28]	validation_0-rmse:29.38556
[29]	validation_0-rmse:29.37076
[30]	validation_0-rmse:29.35626
[31]	validation_0-

[32m[I 2024-09-04 07:19:08,091][0m Trial 0 finished with value: 683.5522454455919 and parameters: {'booster': 'gbtree', 'lambda': 0.12189220735589204, 'alpha': 0.21995345897476404, 'subsample': 0.7885410333402273, 'colsample_bytree': 0.3717752337812597, 'n_jobs': 30, 'n_estimators': 277, 'max_depth': 4, 'min_child_weight': 6, 'eta': 0.000754809301343299, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 683.5522454455919.[0m


[0]	validation_0-rmse:29.72538
[1]	validation_0-rmse:29.64547
[2]	validation_0-rmse:29.56206
[3]	validation_0-rmse:29.47791
[4]	validation_0-rmse:29.40093
[5]	validation_0-rmse:29.31869
[6]	validation_0-rmse:29.24297
[7]	validation_0-rmse:29.16076
[8]	validation_0-rmse:29.08092
[9]	validation_0-rmse:29.00770
[10]	validation_0-rmse:28.92649
[11]	validation_0-rmse:28.84668
[12]	validation_0-rmse:28.76857
[13]	validation_0-rmse:28.69645
[14]	validation_0-rmse:28.61765
[15]	validation_0-rmse:28.53679
[16]	validation_0-rmse:28.46613
[17]	validation_0-rmse:28.39055
[18]	validation_0-rmse:28.31928
[19]	validation_0-rmse:28.24266
[20]	validation_0-rmse:28.16522
[21]	validation_0-rmse:28.09105
[22]	validation_0-rmse:28.01756
[23]	validation_0-rmse:27.95100
[24]	validation_0-rmse:27.87583
[25]	validation_0-rmse:27.80000
[26]	validation_0-rmse:27.72807
[27]	validation_0-rmse:27.65453
[28]	validation_0-rmse:27.58167
[29]	validation_0-rmse:27.50967
[30]	validation_0-rmse:27.43954
[31]	validation_0-

[32m[I 2024-09-04 07:19:09,864][0m Trial 1 finished with value: 333.3888720508462 and parameters: {'booster': 'gbtree', 'lambda': 0.010464083689380104, 'alpha': 3.6897447161452e-07, 'subsample': 0.679529343918663, 'colsample_bytree': 0.39209706484854806, 'n_jobs': 30, 'n_estimators': 242, 'max_depth': 3, 'min_child_weight': 8, 'eta': 0.00470456886008781, 'grow_policy': 'lossguide'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.77534
[1]	validation_0-rmse:29.74488
[2]	validation_0-rmse:29.71260
[3]	validation_0-rmse:29.68028
[4]	validation_0-rmse:29.65020
[5]	validation_0-rmse:29.61841
[6]	validation_0-rmse:29.58849
[7]	validation_0-rmse:29.55659
[8]	validation_0-rmse:29.52555
[9]	validation_0-rmse:29.49571
[10]	validation_0-rmse:29.46380
[11]	validation_0-rmse:29.43179
[12]	validation_0-rmse:29.40041
[13]	validation_0-rmse:29.37159
[14]	validation_0-rmse:29.34012
[15]	validation_0-rmse:29.30759
[16]	validation_0-rmse:29.27827
[17]	validation_0-rmse:29.24767
[18]	validation_0-rmse:29.21964
[19]	validation_0-rmse:29.18846
[20]	validation_0-rmse:29.15741
[21]	validation_0-rmse:29.12706
[22]	validation_0-rmse:29.09680
[23]	validation_0-rmse:29.06747
[24]	validation_0-rmse:29.03750
[25]	validation_0-rmse:29.00640
[26]	validation_0-rmse:28.97584
[27]	validation_0-rmse:28.94521
[28]	validation_0-rmse:28.91513
[29]	validation_0-rmse:28.88525
[30]	validation_0-rmse:28.85507
[31]	validation_0-

[32m[I 2024-09-04 07:19:10,991][0m Trial 2 finished with value: 664.119834934845 and parameters: {'booster': 'gbtree', 'lambda': 1.8338513244875968e-05, 'alpha': 5.262423625382603e-08, 'subsample': 0.7813070701255943, 'colsample_bytree': 0.40386281695218673, 'n_jobs': 30, 'n_estimators': 147, 'max_depth': 3, 'min_child_weight': 5, 'eta': 0.0018010472800814637, 'grow_policy': 'lossguide'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.78554
[1]	validation_0-rmse:29.76494
[2]	validation_0-rmse:29.74429
[3]	validation_0-rmse:29.72288
[4]	validation_0-rmse:29.70424
[5]	validation_0-rmse:29.68352
[6]	validation_0-rmse:29.66316
[7]	validation_0-rmse:29.64228
[8]	validation_0-rmse:29.62186
[9]	validation_0-rmse:29.60099
[10]	validation_0-rmse:29.58072
[11]	validation_0-rmse:29.55968
[12]	validation_0-rmse:29.54242
[13]	validation_0-rmse:29.52486
[14]	validation_0-rmse:29.50442
[15]	validation_0-rmse:29.48375
[16]	validation_0-rmse:29.46669
[17]	validation_0-rmse:29.44697
[18]	validation_0-rmse:29.42758
[19]	validation_0-rmse:29.40821
[20]	validation_0-rmse:29.38741
[21]	validation_0-rmse:29.36682
[22]	validation_0-rmse:29.34651
[23]	validation_0-rmse:29.32759
[24]	validation_0-rmse:29.30794
[25]	validation_0-rmse:29.28884
[26]	validation_0-rmse:29.26793
[27]	validation_0-rmse:29.24801
[28]	validation_0-rmse:29.22800
[29]	validation_0-rmse:29.20891
[30]	validation_0-rmse:29.18865
[31]	validation_0-

[32m[I 2024-09-04 07:19:12,619][0m Trial 3 finished with value: 624.9541765551152 and parameters: {'booster': 'gbtree', 'lambda': 0.7196435811203767, 'alpha': 0.002308071365618072, 'subsample': 0.7649862664836082, 'colsample_bytree': 0.4870892979222391, 'n_jobs': 30, 'n_estimators': 295, 'max_depth': 2, 'min_child_weight': 6, 'eta': 0.0013918547248349107, 'grow_policy': 'depthwise'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.79365
[1]	validation_0-rmse:29.77972
[2]	validation_0-rmse:29.76591
[3]	validation_0-rmse:29.75147
[4]	validation_0-rmse:29.73777
[5]	validation_0-rmse:29.72407
[6]	validation_0-rmse:29.71019
[7]	validation_0-rmse:29.69587
[8]	validation_0-rmse:29.68233
[9]	validation_0-rmse:29.66864
[10]	validation_0-rmse:29.65436
[11]	validation_0-rmse:29.64030
[12]	validation_0-rmse:29.62646
[13]	validation_0-rmse:29.61308
[14]	validation_0-rmse:29.60018
[15]	validation_0-rmse:29.58611
[16]	validation_0-rmse:29.57282
[17]	validation_0-rmse:29.55907
[18]	validation_0-rmse:29.54601
[19]	validation_0-rmse:29.53260
[20]	validation_0-rmse:29.51892
[21]	validation_0-rmse:29.50478
[22]	validation_0-rmse:29.49113
[23]	validation_0-rmse:29.47823
[24]	validation_0-rmse:29.46457
[25]	validation_0-rmse:29.45017
[26]	validation_0-rmse:29.43664
[27]	validation_0-rmse:29.42285
[28]	validation_0-rmse:29.40921
[29]	validation_0-rmse:29.39531
[30]	validation_0-rmse:29.38134
[31]	validation_0-

[32m[I 2024-09-04 07:19:14,779][0m Trial 4 finished with value: 688.1350150197596 and parameters: {'booster': 'gbtree', 'lambda': 0.11870210093287978, 'alpha': 0.37966428841910177, 'subsample': 0.7457256022822072, 'colsample_bytree': 0.31290489654673864, 'n_jobs': 30, 'n_estimators': 284, 'max_depth': 4, 'min_child_weight': 7, 'eta': 0.0007208670287933451, 'grow_policy': 'depthwise'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.79618
[1]	validation_0-rmse:29.78578
[2]	validation_0-rmse:29.77522
[3]	validation_0-rmse:29.76420
[4]	validation_0-rmse:29.75464
[5]	validation_0-rmse:29.74400
[6]	validation_0-rmse:29.73363
[7]	validation_0-rmse:29.72284
[8]	validation_0-rmse:29.71228
[9]	validation_0-rmse:29.70155
[10]	validation_0-rmse:29.69099
[11]	validation_0-rmse:29.68009
[12]	validation_0-rmse:29.67107
[13]	validation_0-rmse:29.66207
[14]	validation_0-rmse:29.65151
[15]	validation_0-rmse:29.64075
[16]	validation_0-rmse:29.63199
[17]	validation_0-rmse:29.62180
[18]	validation_0-rmse:29.61165
[19]	validation_0-rmse:29.60154
[20]	validation_0-rmse:29.59068
[21]	validation_0-rmse:29.57997
[22]	validation_0-rmse:29.56942
[23]	validation_0-rmse:29.55950
[24]	validation_0-rmse:29.54933
[25]	validation_0-rmse:29.53931
[26]	validation_0-rmse:29.52833
[27]	validation_0-rmse:29.51815
[28]	validation_0-rmse:29.50764
[29]	validation_0-rmse:29.49757
[30]	validation_0-rmse:29.48698
[31]	validation_0-

[32m[I 2024-09-04 07:19:15,783][0m Trial 5 finished with value: 791.3247015678943 and parameters: {'booster': 'gbtree', 'lambda': 4.0045404956226215e-07, 'alpha': 3.7236039057360786e-08, 'subsample': 0.698784338464727, 'colsample_bytree': 0.48945876348825457, 'n_jobs': 30, 'n_estimators': 172, 'max_depth': 2, 'min_child_weight': 3, 'eta': 0.0007092564392866663, 'grow_policy': 'depthwise'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.79198
[1]	validation_0-rmse:29.77710
[2]	validation_0-rmse:29.76135
[3]	validation_0-rmse:29.74558
[4]	validation_0-rmse:29.73074
[5]	validation_0-rmse:29.71514
[6]	validation_0-rmse:29.70055
[7]	validation_0-rmse:29.68490
[8]	validation_0-rmse:29.66956
[9]	validation_0-rmse:29.65498
[10]	validation_0-rmse:29.63933
[11]	validation_0-rmse:29.62370
[12]	validation_0-rmse:29.60827
[13]	validation_0-rmse:29.59406
[14]	validation_0-rmse:29.57841
[15]	validation_0-rmse:29.56238
[16]	validation_0-rmse:29.54787
[17]	validation_0-rmse:29.53266
[18]	validation_0-rmse:29.51871
[19]	validation_0-rmse:29.50318
[20]	validation_0-rmse:29.48806
[21]	validation_0-rmse:29.47290
[22]	validation_0-rmse:29.45780
[23]	validation_0-rmse:29.44325
[24]	validation_0-rmse:29.42803
[25]	validation_0-rmse:29.41249
[26]	validation_0-rmse:29.39731
[27]	validation_0-rmse:29.38209
[28]	validation_0-rmse:29.36699
[29]	validation_0-rmse:29.35190
[30]	validation_0-rmse:29.33683
[31]	validation_0-

[32m[I 2024-09-04 07:19:17,402][0m Trial 6 finished with value: 716.6432251524453 and parameters: {'booster': 'gbtree', 'lambda': 7.864830542996705e-08, 'alpha': 0.0037469964854943702, 'subsample': 0.7505937051657764, 'colsample_bytree': 0.4002355600671512, 'n_jobs': 30, 'n_estimators': 219, 'max_depth': 3, 'min_child_weight': 5, 'eta': 0.0008761631359522452, 'grow_policy': 'lossguide'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.80427
[1]	validation_0-rmse:29.80142
[2]	validation_0-rmse:29.79852
[3]	validation_0-rmse:29.79556
[4]	validation_0-rmse:29.79274
[5]	validation_0-rmse:29.78985
[6]	validation_0-rmse:29.78694
[7]	validation_0-rmse:29.78405
[8]	validation_0-rmse:29.78121
[9]	validation_0-rmse:29.77836
[10]	validation_0-rmse:29.77548
[11]	validation_0-rmse:29.77258
[12]	validation_0-rmse:29.76964
[13]	validation_0-rmse:29.76687
[14]	validation_0-rmse:29.76394
[15]	validation_0-rmse:29.76103
[16]	validation_0-rmse:29.75824
[17]	validation_0-rmse:29.75539
[18]	validation_0-rmse:29.75252
[19]	validation_0-rmse:29.74963
[20]	validation_0-rmse:29.74669
[21]	validation_0-rmse:29.74382
[22]	validation_0-rmse:29.74106
[23]	validation_0-rmse:29.73817
[24]	validation_0-rmse:29.73528
[25]	validation_0-rmse:29.73228
[26]	validation_0-rmse:29.72943
[27]	validation_0-rmse:29.72650
[28]	validation_0-rmse:29.72364
[29]	validation_0-rmse:29.72074
[30]	validation_0-rmse:29.71780
[31]	validation_0-

[32m[I 2024-09-04 07:19:18,978][0m Trial 7 finished with value: 863.8473068725937 and parameters: {'booster': 'gbtree', 'lambda': 1.6136924065162066e-07, 'alpha': 1.2327122412256514e-06, 'subsample': 0.7172958662570347, 'colsample_bytree': 0.4646573937368295, 'n_jobs': 30, 'n_estimators': 146, 'max_depth': 5, 'min_child_weight': 8, 'eta': 0.0001357754217579402, 'grow_policy': 'depthwise'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.77621
[1]	validation_0-rmse:29.74544
[2]	validation_0-rmse:29.71335
[3]	validation_0-rmse:29.68125
[4]	validation_0-rmse:29.65013
[5]	validation_0-rmse:29.61879
[6]	validation_0-rmse:29.58737
[7]	validation_0-rmse:29.55510
[8]	validation_0-rmse:29.52398
[9]	validation_0-rmse:29.49336
[10]	validation_0-rmse:29.46189
[11]	validation_0-rmse:29.42970
[12]	validation_0-rmse:29.39816
[13]	validation_0-rmse:29.36836
[14]	validation_0-rmse:29.33615
[15]	validation_0-rmse:29.30620
[16]	validation_0-rmse:29.27667
[17]	validation_0-rmse:29.24507
[18]	validation_0-rmse:29.21553
[19]	validation_0-rmse:29.18421
[20]	validation_0-rmse:29.15261
[21]	validation_0-rmse:29.12223
[22]	validation_0-rmse:29.09282
[23]	validation_0-rmse:29.06256
[24]	validation_0-rmse:29.03208
[25]	validation_0-rmse:29.00034
[26]	validation_0-rmse:28.96906
[27]	validation_0-rmse:28.93837
[28]	validation_0-rmse:28.90889
[29]	validation_0-rmse:28.87797
[30]	validation_0-rmse:28.84654
[31]	validation_0-

[32m[I 2024-09-04 07:19:23,843][0m Trial 8 finished with value: 516.5557854932644 and parameters: {'booster': 'gbtree', 'lambda': 3.569616657650857e-07, 'alpha': 2.509852040385433e-05, 'subsample': 0.6519790688548109, 'colsample_bytree': 0.41357268360028177, 'n_jobs': 30, 'n_estimators': 276, 'max_depth': 5, 'min_child_weight': 3, 'eta': 0.0014975919037236544, 'grow_policy': 'lossguide'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.80175
[1]	validation_0-rmse:29.79636
[2]	validation_0-rmse:29.79083
[3]	validation_0-rmse:29.78534
[4]	validation_0-rmse:29.77995
[5]	validation_0-rmse:29.77453
[6]	validation_0-rmse:29.76905
[7]	validation_0-rmse:29.76350
[8]	validation_0-rmse:29.75813
[9]	validation_0-rmse:29.75270
[10]	validation_0-rmse:29.74715
[11]	validation_0-rmse:29.74154
[12]	validation_0-rmse:29.73602
[13]	validation_0-rmse:29.73080
[14]	validation_0-rmse:29.72525
[15]	validation_0-rmse:29.71975
[16]	validation_0-rmse:29.71445
[17]	validation_0-rmse:29.70896
[18]	validation_0-rmse:29.70361
[19]	validation_0-rmse:29.69815
[20]	validation_0-rmse:29.69258
[21]	validation_0-rmse:29.68717
[22]	validation_0-rmse:29.68192
[23]	validation_0-rmse:29.67661
[24]	validation_0-rmse:29.67123
[25]	validation_0-rmse:29.66561
[26]	validation_0-rmse:29.66023
[27]	validation_0-rmse:29.65478
[28]	validation_0-rmse:29.64941
[29]	validation_0-rmse:29.64393
[30]	validation_0-rmse:29.63845
[31]	validation_0-

[32m[I 2024-09-04 07:19:26,022][0m Trial 9 finished with value: 826.9027284031748 and parameters: {'booster': 'gbtree', 'lambda': 0.00019788357028912405, 'alpha': 3.8957446584156933e-07, 'subsample': 0.779703829800337, 'colsample_bytree': 0.46769241810843004, 'n_jobs': 30, 'n_estimators': 198, 'max_depth': 5, 'min_child_weight': 5, 'eta': 0.00025702200585130347, 'grow_policy': 'depthwise'}. Best is trial 1 with value: 333.3888720508462.[0m


[0]	validation_0-rmse:29.64419
[1]	validation_0-rmse:29.48373
[2]	validation_0-rmse:29.32009
[3]	validation_0-rmse:29.15891
[4]	validation_0-rmse:29.01063
[5]	validation_0-rmse:28.85456
[6]	validation_0-rmse:28.70538
[7]	validation_0-rmse:28.54971
[8]	validation_0-rmse:28.39700
[9]	validation_0-rmse:28.25419
[10]	validation_0-rmse:28.10077
[11]	validation_0-rmse:27.94950
[12]	validation_0-rmse:27.80081
[13]	validation_0-rmse:27.66630
[14]	validation_0-rmse:27.51863
[15]	validation_0-rmse:27.37811
[16]	validation_0-rmse:27.24767
[17]	validation_0-rmse:27.10992
[18]	validation_0-rmse:26.98065
[19]	validation_0-rmse:26.85331
[20]	validation_0-rmse:26.71933
[21]	validation_0-rmse:26.59056
[22]	validation_0-rmse:26.45818
[23]	validation_0-rmse:26.33951
[24]	validation_0-rmse:26.21238
[25]	validation_0-rmse:26.08138
[26]	validation_0-rmse:25.95563
[27]	validation_0-rmse:25.83064
[28]	validation_0-rmse:25.70318
[29]	validation_0-rmse:25.58135
[30]	validation_0-rmse:25.45674
[31]	validation_0-

[32m[I 2024-09-04 07:19:27,747][0m Trial 10 finished with value: 209.15414794657727 and parameters: {'booster': 'gbtree', 'lambda': 0.001655933148489609, 'alpha': 1.7656447717844103e-05, 'subsample': 0.6002457239658584, 'colsample_bytree': 0.3439319618019138, 'n_jobs': 30, 'n_estimators': 234, 'max_depth': 3, 'min_child_weight': 8, 'eta': 0.009339799780203377, 'grow_policy': 'lossguide'}. Best is trial 10 with value: 209.15414794657727.[0m


[0]	validation_0-rmse:29.65657
[1]	validation_0-rmse:29.50796
[2]	validation_0-rmse:29.35627
[3]	validation_0-rmse:29.20643
[4]	validation_0-rmse:29.06858
[5]	validation_0-rmse:28.92339
[6]	validation_0-rmse:28.78486
[7]	validation_0-rmse:28.63998
[8]	validation_0-rmse:28.49748
[9]	validation_0-rmse:28.36452
[10]	validation_0-rmse:28.22141
[11]	validation_0-rmse:28.08303
[12]	validation_0-rmse:27.94457
[13]	validation_0-rmse:27.82579
[14]	validation_0-rmse:27.68784
[15]	validation_0-rmse:27.55561
[16]	validation_0-rmse:27.43448
[17]	validation_0-rmse:27.30536
[18]	validation_0-rmse:27.18398
[19]	validation_0-rmse:27.06378
[20]	validation_0-rmse:26.93801
[21]	validation_0-rmse:26.81412
[22]	validation_0-rmse:26.68946
[23]	validation_0-rmse:26.58140
[24]	validation_0-rmse:26.45793
[25]	validation_0-rmse:26.33424
[26]	validation_0-rmse:26.21614
[27]	validation_0-rmse:26.09862
[28]	validation_0-rmse:25.97829
[29]	validation_0-rmse:25.86426
[30]	validation_0-rmse:25.75003
[31]	validation_0-

[32m[I 2024-09-04 07:19:29,517][0m Trial 11 finished with value: 223.10587136697234 and parameters: {'booster': 'gbtree', 'lambda': 0.0014796880937692411, 'alpha': 1.4889285842547077e-05, 'subsample': 0.6051330091405839, 'colsample_bytree': 0.3468125364203523, 'n_jobs': 30, 'n_estimators': 230, 'max_depth': 3, 'min_child_weight': 8, 'eta': 0.008644683127198542, 'grow_policy': 'lossguide'}. Best is trial 10 with value: 209.15414794657727.[0m


[0]	validation_0-rmse:29.63375
[1]	validation_0-rmse:29.46307
[2]	validation_0-rmse:29.29464
[3]	validation_0-rmse:29.12333
[4]	validation_0-rmse:28.96560
[5]	validation_0-rmse:28.79964
[6]	validation_0-rmse:28.64107
[7]	validation_0-rmse:28.47684
[8]	validation_0-rmse:28.31522
[9]	validation_0-rmse:28.16379
[10]	validation_0-rmse:28.00133
[11]	validation_0-rmse:27.84489
[12]	validation_0-rmse:27.68782
[13]	validation_0-rmse:27.54901
[14]	validation_0-rmse:27.39301
[15]	validation_0-rmse:27.24478
[16]	validation_0-rmse:27.10714
[17]	validation_0-rmse:26.96511
[18]	validation_0-rmse:26.82875
[19]	validation_0-rmse:26.69378
[20]	validation_0-rmse:26.55237
[21]	validation_0-rmse:26.41693
[22]	validation_0-rmse:26.27753
[23]	validation_0-rmse:26.15270
[24]	validation_0-rmse:26.01658
[25]	validation_0-rmse:25.87891
[26]	validation_0-rmse:25.74702
[27]	validation_0-rmse:25.61611
[28]	validation_0-rmse:25.48256
[29]	validation_0-rmse:25.35515
[30]	validation_0-rmse:25.22479
[31]	validation_0-

[32m[I 2024-09-04 07:19:31,398][0m Trial 12 finished with value: 197.61703704472308 and parameters: {'booster': 'gbtree', 'lambda': 0.0006479776917988095, 'alpha': 7.071310990057505e-05, 'subsample': 0.6013904066297826, 'colsample_bytree': 0.3309432759813565, 'n_jobs': 30, 'n_estimators': 240, 'max_depth': 3, 'min_child_weight': 7, 'eta': 0.009953987535067708, 'grow_policy': 'lossguide'}. Best is trial 12 with value: 197.61703704472308.[0m


[0]	validation_0-rmse:29.66291
[1]	validation_0-rmse:29.53893
[2]	validation_0-rmse:29.40537
[3]	validation_0-rmse:29.26850
[4]	validation_0-rmse:29.15189
[5]	validation_0-rmse:29.02804
[6]	validation_0-rmse:28.91821
[7]	validation_0-rmse:28.78811
[8]	validation_0-rmse:28.66429
[9]	validation_0-rmse:28.55375
[10]	validation_0-rmse:28.43099
[11]	validation_0-rmse:28.30132
[12]	validation_0-rmse:28.19574
[13]	validation_0-rmse:28.09235
[14]	validation_0-rmse:27.98874
[15]	validation_0-rmse:27.86687
[16]	validation_0-rmse:27.76397
[17]	validation_0-rmse:27.64860
[18]	validation_0-rmse:27.54687
[19]	validation_0-rmse:27.45128
[20]	validation_0-rmse:27.34948
[21]	validation_0-rmse:27.23845
[22]	validation_0-rmse:27.12707
[23]	validation_0-rmse:27.03357
[24]	validation_0-rmse:26.92398
[25]	validation_0-rmse:26.81867
[26]	validation_0-rmse:26.71183
[27]	validation_0-rmse:26.60648
[28]	validation_0-rmse:26.49807
[29]	validation_0-rmse:26.40105
[30]	validation_0-rmse:26.29175
[31]	validation_0-

[32m[I 2024-09-04 07:19:32,781][0m Trial 13 finished with value: 273.5997920316079 and parameters: {'booster': 'gbtree', 'lambda': 1.6533247682380633e-05, 'alpha': 0.0012860289180923294, 'subsample': 0.6048252086638578, 'colsample_bytree': 0.30197619075287574, 'n_jobs': 30, 'n_estimators': 252, 'max_depth': 2, 'min_child_weight': 7, 'eta': 0.009300058593401345, 'grow_policy': 'lossguide'}. Best is trial 12 with value: 197.61703704472308.[0m


[0]	validation_0-rmse:29.73617
[1]	validation_0-rmse:29.66271
[2]	validation_0-rmse:29.58752
[3]	validation_0-rmse:29.51281
[4]	validation_0-rmse:29.44311
[5]	validation_0-rmse:29.36895
[6]	validation_0-rmse:29.29971
[7]	validation_0-rmse:29.22747
[8]	validation_0-rmse:29.15645
[9]	validation_0-rmse:29.08803
[10]	validation_0-rmse:29.01563
[11]	validation_0-rmse:28.94407
[12]	validation_0-rmse:28.87346
[13]	validation_0-rmse:28.81005
[14]	validation_0-rmse:28.74087
[15]	validation_0-rmse:28.67315
[16]	validation_0-rmse:28.60846
[17]	validation_0-rmse:28.53957
[18]	validation_0-rmse:28.47414
[19]	validation_0-rmse:28.40868
[20]	validation_0-rmse:28.34155
[21]	validation_0-rmse:28.27506
[22]	validation_0-rmse:28.20843
[23]	validation_0-rmse:28.14573
[24]	validation_0-rmse:28.08038
[25]	validation_0-rmse:28.01172
[26]	validation_0-rmse:27.94537
[27]	validation_0-rmse:27.87957
[28]	validation_0-rmse:27.81477
[29]	validation_0-rmse:27.74813
[30]	validation_0-rmse:27.68418
[31]	validation_0-

[32m[I 2024-09-04 07:19:34,216][0m Trial 14 finished with value: 552.3334032334301 and parameters: {'booster': 'gbtree', 'lambda': 0.001410424588954646, 'alpha': 0.000175550772375756, 'subsample': 0.636474034487663, 'colsample_bytree': 0.33725849861681356, 'n_jobs': 30, 'n_estimators': 106, 'max_depth': 4, 'min_child_weight': 7, 'eta': 0.003723980673327944, 'grow_policy': 'lossguide'}. Best is trial 12 with value: 197.61703704472308.[0m


[0]	validation_0-rmse:29.74045
[1]	validation_0-rmse:29.67464
[2]	validation_0-rmse:29.60652
[3]	validation_0-rmse:29.53886
[4]	validation_0-rmse:29.47636
[5]	validation_0-rmse:29.41006
[6]	validation_0-rmse:29.34798
[7]	validation_0-rmse:29.28083
[8]	validation_0-rmse:29.21539
[9]	validation_0-rmse:29.15374
[10]	validation_0-rmse:29.08633
[11]	validation_0-rmse:29.02213
[12]	validation_0-rmse:28.95652
[13]	validation_0-rmse:28.89961
[14]	validation_0-rmse:28.83507
[15]	validation_0-rmse:28.77077
[16]	validation_0-rmse:28.71239
[17]	validation_0-rmse:28.64861
[18]	validation_0-rmse:28.58946
[19]	validation_0-rmse:28.53056
[20]	validation_0-rmse:28.46804
[21]	validation_0-rmse:28.40756
[22]	validation_0-rmse:28.34569
[23]	validation_0-rmse:28.29085
[24]	validation_0-rmse:28.22891
[25]	validation_0-rmse:28.16508
[26]	validation_0-rmse:28.10358
[27]	validation_0-rmse:28.04362
[28]	validation_0-rmse:27.98253
[29]	validation_0-rmse:27.92238
[30]	validation_0-rmse:27.86323
[31]	validation_0-

[32m[I 2024-09-04 07:19:36,121][0m Trial 15 finished with value: 427.9100129156499 and parameters: {'booster': 'gbtree', 'lambda': 0.0001525905784291988, 'alpha': 5.2255394970749885e-06, 'subsample': 0.6352055254837079, 'colsample_bytree': 0.3413956601540635, 'n_jobs': 30, 'n_estimators': 200, 'max_depth': 3, 'min_child_weight': 7, 'eta': 0.0038431811977193784, 'grow_policy': 'lossguide'}. Best is trial 12 with value: 197.61703704472308.[0m


[0]	validation_0-rmse:29.69831
[1]	validation_0-rmse:29.58679
[2]	validation_0-rmse:29.47556
[3]	validation_0-rmse:29.36083
[4]	validation_0-rmse:29.25416
[5]	validation_0-rmse:29.14483
[6]	validation_0-rmse:29.03674
[7]	validation_0-rmse:28.92567
[8]	validation_0-rmse:28.81887
[9]	validation_0-rmse:28.71536
[10]	validation_0-rmse:28.60547
[11]	validation_0-rmse:28.49744
[12]	validation_0-rmse:28.39211
[13]	validation_0-rmse:28.29302
[14]	validation_0-rmse:28.18899
[15]	validation_0-rmse:28.09022
[16]	validation_0-rmse:27.99437
[17]	validation_0-rmse:27.89380
[18]	validation_0-rmse:27.79641
[19]	validation_0-rmse:27.70258
[20]	validation_0-rmse:27.59907
[21]	validation_0-rmse:27.50075
[22]	validation_0-rmse:27.40199
[23]	validation_0-rmse:27.31061
[24]	validation_0-rmse:27.21501
[25]	validation_0-rmse:27.11423
[26]	validation_0-rmse:27.01696
[27]	validation_0-rmse:26.91984
[28]	validation_0-rmse:26.82554
[29]	validation_0-rmse:26.72914
[30]	validation_0-rmse:26.63374
[31]	validation_0-

[32m[I 2024-09-04 07:19:39,028][0m Trial 16 finished with value: 231.15311241965668 and parameters: {'booster': 'gbtree', 'lambda': 4.939345570729658e-06, 'alpha': 0.00010279068226740529, 'subsample': 0.6609648695075347, 'colsample_bytree': 0.3674292072138376, 'n_jobs': 30, 'n_estimators': 257, 'max_depth': 4, 'min_child_weight': 8, 'eta': 0.005736483030247414, 'grow_policy': 'lossguide'}. Best is trial 12 with value: 197.61703704472308.[0m


[0]	validation_0-rmse:29.75800
[1]	validation_0-rmse:29.71522
[2]	validation_0-rmse:29.66895
[3]	validation_0-rmse:29.62145
[4]	validation_0-rmse:29.57960
[5]	validation_0-rmse:29.53595
[6]	validation_0-rmse:29.49706
[7]	validation_0-rmse:29.45092
[8]	validation_0-rmse:29.40642
[9]	validation_0-rmse:29.36641
[10]	validation_0-rmse:29.32083
[11]	validation_0-rmse:29.27328
[12]	validation_0-rmse:29.23416
[13]	validation_0-rmse:29.19540
[14]	validation_0-rmse:29.15673
[15]	validation_0-rmse:29.11074
[16]	validation_0-rmse:29.07213
[17]	validation_0-rmse:29.02773
[18]	validation_0-rmse:28.98901
[19]	validation_0-rmse:28.95051
[20]	validation_0-rmse:28.91222
[21]	validation_0-rmse:28.86862
[22]	validation_0-rmse:28.82465
[23]	validation_0-rmse:28.78710
[24]	validation_0-rmse:28.74328
[25]	validation_0-rmse:28.70014
[26]	validation_0-rmse:28.65569
[27]	validation_0-rmse:28.61350
[28]	validation_0-rmse:28.56903
[29]	validation_0-rmse:28.52929
[30]	validation_0-rmse:28.48469
[31]	validation_0-

[32m[I 2024-09-04 07:19:40,212][0m Trial 17 finished with value: 527.1883963028054 and parameters: {'booster': 'gbtree', 'lambda': 0.0023659535154435802, 'alpha': 0.0005339041233585167, 'subsample': 0.6237758986771733, 'colsample_bytree': 0.3231280623153926, 'n_jobs': 30, 'n_estimators': 213, 'max_depth': 2, 'min_child_weight': 6, 'eta': 0.0031683099348420816, 'grow_policy': 'lossguide'}. Best is trial 12 with value: 197.61703704472308.[0m


[0]	validation_0-rmse:29.76653
[1]	validation_0-rmse:29.72621
[2]	validation_0-rmse:29.68483
[3]	validation_0-rmse:29.64371
[4]	validation_0-rmse:29.60560
[5]	validation_0-rmse:29.56509
[6]	validation_0-rmse:29.52740
[7]	validation_0-rmse:29.48597
[8]	validation_0-rmse:29.44545
[9]	validation_0-rmse:29.40761
[10]	validation_0-rmse:29.36626
[11]	validation_0-rmse:29.32479
[12]	validation_0-rmse:29.28422
[13]	validation_0-rmse:29.24675
[14]	validation_0-rmse:29.20679
[15]	validation_0-rmse:29.16727
[16]	validation_0-rmse:29.13073
[17]	validation_0-rmse:29.09096
[18]	validation_0-rmse:29.05428
[19]	validation_0-rmse:29.01759
[20]	validation_0-rmse:28.97885
[21]	validation_0-rmse:28.94055
[22]	validation_0-rmse:28.90164
[23]	validation_0-rmse:28.86764
[24]	validation_0-rmse:28.82814
[25]	validation_0-rmse:28.78878
[26]	validation_0-rmse:28.75050
[27]	validation_0-rmse:28.71171
[28]	validation_0-rmse:28.67250
[29]	validation_0-rmse:28.63523
[30]	validation_0-rmse:28.59674
[31]	validation_0-

[32m[I 2024-09-04 07:19:41,634][0m Trial 18 finished with value: 564.2653428278401 and parameters: {'booster': 'gbtree', 'lambda': 0.01794834184890126, 'alpha': 0.033399514566515145, 'subsample': 0.6003604414846564, 'colsample_bytree': 0.3667668385081121, 'n_jobs': 30, 'n_estimators': 187, 'max_depth': 3, 'min_child_weight': 4, 'eta': 0.0023262898796230958, 'grow_policy': 'lossguide'}. Best is trial 12 with value: 197.61703704472308.[0m


[0]	validation_0-rmse:29.69095
[1]	validation_0-rmse:29.57332
[2]	validation_0-rmse:29.45521
[3]	validation_0-rmse:29.33340
[4]	validation_0-rmse:29.22017
[5]	validation_0-rmse:29.10491
[6]	validation_0-rmse:28.98805
[7]	validation_0-rmse:28.87212
[8]	validation_0-rmse:28.75897
[9]	validation_0-rmse:28.64939
[10]	validation_0-rmse:28.53356
[11]	validation_0-rmse:28.42174
[12]	validation_0-rmse:28.31087
[13]	validation_0-rmse:28.20701
[14]	validation_0-rmse:28.09769
[15]	validation_0-rmse:27.99342
[16]	validation_0-rmse:27.89192
[17]	validation_0-rmse:27.78641
[18]	validation_0-rmse:27.68358
[19]	validation_0-rmse:27.58451
[20]	validation_0-rmse:27.47864
[21]	validation_0-rmse:27.37476
[22]	validation_0-rmse:27.27228
[23]	validation_0-rmse:27.17413
[24]	validation_0-rmse:27.07200
[25]	validation_0-rmse:26.96586
[26]	validation_0-rmse:26.86533
[27]	validation_0-rmse:26.76393
[28]	validation_0-rmse:26.66322
[29]	validation_0-rmse:26.56213
[30]	validation_0-rmse:26.46217
[31]	validation_0-

[32m[I 2024-09-04 07:19:44,936][0m Trial 19 finished with value: 215.91689079497448 and parameters: {'booster': 'gbtree', 'lambda': 0.00030548461775321996, 'alpha': 0.016041956970801474, 'subsample': 0.6664236376810446, 'colsample_bytree': 0.3514398938132568, 'n_jobs': 30, 'n_estimators': 263, 'max_depth': 4, 'min_child_weight': 7, 'eta': 0.0060816740120911246, 'grow_policy': 'lossguide'}. Best is trial 12 with value: 197.61703704472308.[0m


{'booster': 'gbtree', 'lambda': 0.0006479776917988095, 'alpha': 7.071310990057505e-05, 'subsample': 0.6013904066297826, 'colsample_bytree': 0.3309432759813565, 'n_jobs': 30, 'n_estimators': 240, 'max_depth': 3, 'min_child_weight': 7, 'eta': 0.009953987535067708, 'grow_policy': 'lossguide'}
xgb_3 done
