In [2]:
from fugue_coiled import CoiledDaskClient

client = CoiledDaskClient(n_workers=8, software="fugue-env", environ={"WANDB_START_METHOD":"thread"})

Output()



In [None]:
# client.close()

In [3]:
import pandas as pd
from typing import Tuple, Dict, Any
from tune import Space, RandInt, Grid
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_percentage_error, make_scorer
from sklearn.datasets import fetch_california_housing, load_diabetes
from sklearn.model_selection import train_test_split

def get_housing(func:callable):
    data = func(as_frame=True)
    return train_test_split(data["data"].assign(target=data["target"]), test_size=0.2, random_state=0)
    
get_housing(fetch_california_housing)

[       MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
 12069  4.2386       6.0  7.723077   1.169231       228.0  3.507692     33.83   
 15925  4.3898      52.0  5.326622   1.100671      1485.0  3.322148     37.73   
 11162  3.9333      26.0  4.668478   1.046196      1022.0  2.777174     33.83   
 4904   1.4653      38.0  3.383495   1.009709       749.0  3.635922     34.01   
 4683   3.1765      52.0  4.119792   1.043403      1135.0  1.970486     34.08   
 ...       ...       ...       ...        ...         ...       ...       ...   
 13123  4.4125      20.0  6.000000   1.045662       712.0  3.251142     38.27   
 19648  2.9135      27.0  5.349282   0.933014       647.0  3.095694     37.48   
 9845   3.1977      31.0  3.641221   0.941476       704.0  1.791349     36.58   
 10799  5.6315      34.0  4.540598   1.064103      1052.0  2.247863     33.62   
 2732   1.3882      15.0  3.929530   1.100671      1024.0  3.436242     32.80   
 
        Longitude   target

# Define Objective Function and Search Space

In [4]:
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

# get training data
train, _ = get_housing(fetch_california_housing)

# define objective function
def objective(model:Any, **hp:Any) -> float:
    model_ins = model(**hp)
    x = train.iloc[:,:-1]
    y = train.iloc[:,-1]
    scores = cross_val_score(model_ins, x, y, cv=3, 
                             scoring=make_scorer(mean_absolute_percentage_error))
    return scores.mean()

# define search spaces
xgb_space = Space(model=XGBRegressor, n_estimators=Grid(100, 200, 300))
lgbm_space = Space(model=LGBMRegressor, n_estimators=RandInt(100, 300))

In [5]:
# get baseline scores
print("XGB Baseline:", objective(XGBRegressor))
print("LGBM Baseline", objective(LGBMRegressor))

XGB Baseline: 0.18111303006212223
LGBM Baseline 0.17842243141239011


# Tuning

1. Evaluate the objective function
2. Over a hybrid search space
3. Apply Hyperopt for Bayesian Optimization 
4. Run tuning jobs distributedly on Dask
5. Track tuning result with wandb

In [6]:
from tune import suggest_for_noniterative_objective

result = suggest_for_noniterative_objective(
    objective,
    xgb_space + lgbm_space,
    local_optimizer = "hyperopt:5",
    execution_engine = client,
    logger = "wandb:CA_housing_tuning_scipy"
)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/jovyan/.netrc
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/jovyan/.netrc
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/jovyan/.netrc


In [None]:
result[0]