In [6]:
import lightgbm as lgb
import numpy as np
import pandas as pd
from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
# from ray.tune.schedulers import ASHAScheduler
from ray.tune.search import ConcurrencyLimiter

In [7]:
def fit_mod(space):

    # ELT
    X = pd.read_parquet('/Volumes/Extreme SSD/rematch1_predictor/full_training_data/train_x.parquet')
    Y = pd.read_parquet('/Volumes/Extreme SSD/rematch1_predictor/full_training_data/full_data_y.parquet')
    
    size_of_train_set = round(0.8 * X.shape[0])
    rows_for_train_set = np.random.choice(a=X.index, size=size_of_train_set, replace=False)
    rows_for_test_set = np.setdiff1d(X.index, rows_for_train_set)
    
    train_set = lgb.Dataset(X.loc[rows_for_train_set], Y.loc[rows_for_train_set])
    test_set = lgb.Dataset(X.loc[rows_for_test_set], Y.loc[rows_for_test_set])

    # Model
    gbm = lgb.train(
        space,
        train_set,
        valid_sets=[test_set],
    )
    binary_logloss = gbm.best_score['valid_0']['binary_logloss']
    auc = gbm.best_score['valid_0']['auc']
    train.report(
        {
            "binary_logloss": binary_logloss,
            "auc": auc
        }
    )

In [8]:
space = {
    'num_rounds': tune.randint(1, 500),
    'learning_rate': tune.uniform(0.0001, 1),
    'min_data_in_leaf': tune.randint(1, 200),
    'objective':'binary', 
    'early_stopping_round':2,
    'metrics':['binary_logloss', 'auc']
    }

In [9]:
# asha_scheduler = ASHAScheduler(
#     time_attr='training_iteration',
#     metric='binary_logloss',
#     mode='min',
#     max_t=1000,
#     grace_period=50,
#     reduction_factor=3,
#     brackets=1,
# )

search_alg = OptunaSearch(metric="binary_logloss", mode="min")
search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1)

In [5]:
tuner = tune.Tuner(
    fit_mod,
    tune_config=tune.TuneConfig(
        # scheduler=asha_scheduler,
        search_alg=search_alg,
        num_samples=1000
    ),
    param_space=space,
    run_config=train.RunConfig(
        storage_path="/Users/andrewbartnof/Documents/rmi/rematch_ferc_eia1/clean_data/model_full_gradient_boost/ray_tune", 
        name="gb_ray_tune"
    )
)
results = tuner.fit()

AssertionError: Optuna must be installed! Run `pip install optuna`.

In [None]:
# experiment_path = "/Users/andrewbartnof/Documents/rmi/rematch_ferc_eia1/clean_data/model_full_gradient_boost/ray_tune/gb_ray_tune"
# restored_tuner = tune.Tuner.restore(experiment_path, trainable=fit_mod)

In [None]:
# fn_results = '/Users/andrewbartnof/Documents/rmi/rematch_ferc_eia1/clean_data/model_full_gradient_boost/ray_tune/ray_tune_dataframe.csv'
# restored_tuner.get_results().get_dataframe().to_csv(fn_results)

In [11]:
# !jupyter nbconvert --to script ray_tune.ipynb

[NbConvertApp] Converting notebook ray_tune.ipynb to script
[NbConvertApp] Writing 2760 bytes to ray_tune.py
