In [1]:
from typing import List
import optuna
import pymysql
import numpy as np
from optuna.pruners import BasePruner, MedianPruner
from optuna.study import Study
from optuna.trial import Trial, FrozenTrial, TrialState
import logging


pymysql.install_as_MySQLdb()

storage = "mysql://optuna_test:8fpftF5bEFCXXtE4@59.68.29.90:3306/optuna_test"
study_name: str="optuna_qm2"


class RepeatPruner(BasePruner):
    """reference: https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/003_efficient_optimization_algorithms.html#pruning
    Args:
        BasePruner (_type_): _description_
    """
    def prune(self, study, trial)-> bool:
        # type: (Study, FrozenTrial) -> bool
        print(f'entering prune with {trial}')
        logging.info(f"enteringing prune")
        # trials: List[FrozenTrial] = study.get_trials(deepcopy=False)
        # 只从TrialState.PRUNED, TrialState.COMPLETE 比较参数是否相同，相同则去掉。
        trials: List[FrozenTrial] = study.get_trials(deepcopy=False, states=[TrialState.PRUNED, TrialState.COMPLETE])
        
        numbers=np.array([t.number for t in trials])
        bool_params= np.array([trial.params==t.params for t in trials]).astype(bool)
        #Don´t evaluate function if another with same params has been/is being evaluated before this one
        if np.sum(bool_params) > 1:
            if trial.number > np.min(numbers[bool_params]):
                print(f'pruning trial {trial.params}')
                return True
        
        return False

study: Study = optuna.create_study(storage=storage, study_name=study_name, load_if_exists=True, pruner=RepeatPruner())

def objective(trial: Trial)-> float:
    logging.info(f"entering objective")
    # x: float = trial.suggest_float("x", -10, 10)
    x: float = trial.suggest_int('x', 1, 5)
    
    if trial.should_prune():
        raise optuna.TrialPruned()
    return (x - 2) ** 2

# study = optuna.load_study(
#     study_name="optuna_test", storage=storage
# )
# study.optimize(objective, n_trials=10)


  from .autonotebook import tqdm as notebook_tqdm
[32m[I 2023-01-02 17:29:01,045][0m Using an existing study with name 'optuna_qm2' instead of creating a new one.[0m


In [2]:
study = optuna.create_study(storage=storage, study_name=study_name, load_if_exists=True)
print(study.best_params, study.best_value)
print(study.best_trial)

print(type(study.best_trial.user_attrs))

[32m[I 2023-01-02 17:29:07,107][0m Using an existing study with name 'optuna_qm2' instead of creating a new one.[0m


{'attension': 'location', 'att_rnn': 'none', 'dropout_qm': 0.04227927709303497, 'learning_rate': 3.956289765011354e-05, 'num_dropout': 3, 'rdrop_coef ': 0.09303041898774123, 'rnn': 'none', 'use_cls': 2, 'warmup_proportion': 0.02004791292960465, 'weight_decay': 0.08800486348628722} 0.41790000000000005
FrozenTrial(number=130, values=[0.41790000000000005], datetime_start=datetime.datetime(2023, 1, 1, 3, 54, 25), datetime_complete=datetime.datetime(2023, 1, 1, 4, 23, 58), params={'attension': 'location', 'att_rnn': 'none', 'dropout_qm': 0.04227927709303497, 'learning_rate': 3.956289765011354e-05, 'num_dropout': 3, 'rdrop_coef ': 0.09303041898774123, 'rnn': 'none', 'use_cls': 2, 'warmup_proportion': 0.02004791292960465, 'weight_decay': 0.08800486348628722}, distributions={'attension': CategoricalDistribution(choices=('additive', 'location')), 'att_rnn': CategoricalDistribution(choices=('lstm', 'gru', 'none')), 'dropout_qm': FloatDistribution(high=0.4, log=False, low=0.0, step=None), 'learni

In [7]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_att_rnn,params_attension,params_dropout_qm,params_learning_rate,params_num_dropout,params_rdrop_coef,params_rnn,params_use_cls,params_warmup_proportion,params_weight_decay,user_attrs_test_accuracy,state
0,0,,2022-12-29 17:50:11,NaT,NaT,lstm,additive,0.288760,0.000008,1,0.112430,lstm,1,0.074513,0.012965,,RUNNING
1,1,,2022-12-29 17:52:28,NaT,NaT,none,additive,0.043396,0.000033,1,0.047986,gru,2,0.145524,0.010276,,RUNNING
2,2,,2022-12-29 17:53:29,2022-12-29 19:55:36,0 days 02:02:07,gru,location,0.257045,0.000044,2,0.021831,lstm,2,0.048927,0.179797,,FAIL
3,3,,2022-12-29 17:57:09,2022-12-29 19:50:03,0 days 01:52:54,none,additive,0.205334,0.000049,1,0.136082,none,1,0.024653,0.113042,,FAIL
4,4,,2022-12-29 20:05:45,2022-12-29 20:07:19,0 days 00:01:34,none,additive,0.021823,0.000013,3,0.155974,gru,3,0.140231,0.008767,,FAIL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,229,0.5024,2023-01-01 16:06:19,2023-01-01 16:28:55,0 days 00:22:36,none,location,0.206793,0.000039,1,0.081216,none,2,0.015370,0.097614,0.5014,COMPLETE
230,230,0.4908,2023-01-01 16:15:19,2023-01-01 16:35:46,0 days 00:20:27,none,location,0.007813,0.000022,1,0.052408,none,0,0.031085,0.082359,0.4917,COMPLETE
231,231,0.5003,2023-01-01 16:28:55,2023-01-01 16:51:22,0 days 00:22:27,none,location,0.092464,0.000042,2,0.075573,none,0,0.028722,0.081431,0.5016,COMPLETE
232,232,,2023-01-01 16:35:46,NaT,NaT,none,location,0.195494,0.000008,1,0.090011,none,2,0.138468,0.000266,,RUNNING


In [4]:
from optuna.trial import TrialState

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

all_trials = study.get_trials(deepcopy=False, states=None)
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


Study statistics: 
  Number of finished trials:  234
  Number of pruned trials:  19
  Number of complete trials:  192
Best trial:
  Value:  0.41790000000000005
  Params: 
    attension: location
    att_rnn: none
    dropout_qm: 0.04227927709303497
    learning_rate: 3.956289765011354e-05
    num_dropout: 3
    rdrop_coef : 0.09303041898774123
    rnn: none
    use_cls: 2
    warmup_proportion: 0.02004791292960465
    weight_decay: 0.08800486348628722


In [6]:
#tutorial: https://broutonlab.com/blog/efficient-hyperparameter-optimization-with-optuna-framework 

from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

plot_optimization_history(study)

# plot_intermediate_values(study)
# plot_parallel_coordinate(study)
# plot_contour(study, params=['dropout_qm', 'learning_rate'])
# optuna.visualization.plot_param_importances(study)

# Problems fixes

##  NameError: name '_mysql' is not defined

* https://www.jianshu.com/p/1f0c8e3c438b
* import pymysql 
* pymysql.install_as_MySQLdb()



In [1]:
import os
os.path.expanduser('~/.rqalpha/bundle')

'C:\\Users\\73915/.rqalpha/bundle'