In [None]:
from hypster import HypsterX

# Basic

In [None]:
data = ...

In [None]:
transformations = [?]

In [None]:
model = HypsterX(data)

In [None]:
model.fit(n_trials=100)

In [None]:
#Q: how to handle "predict" with FastAI stuff?
model.predict(X_test)

# Advanced

In [None]:
from hypster.constraints import *

In [None]:
from hypster.samplers import CmaEsSampler
from hypster.callbacks.pruners import SuccessiveHalvingPruner
from hypster.callbacks import EarlyStopping, ReduceLROnPlateau

## Define X

In [None]:
constraints = [CPUOnly, InterpretableFeatures, 
               ExplainableModel, FastTraining]

In [None]:
#Q: How should the user specify models & frameworks

In [None]:
models = [?]

In [None]:
#Q: How should we handle negation, i.e. No GPU (?)
#exclude = [GPU, NeuralNetworks] (?)

In [None]:
sampler = CmaEsSampler()

### Common Callbacks

In [None]:
pruner = SuccessiveHalvingPruner()

In [None]:
early_stopper = EarlyStopping(tol=1e-5, patience=5)

In [None]:
lr_reducer = ReduceLROnPlateau(lr_decay=0.1)

### Specific Callbacks

In [None]:
#Q: How to specify which framework this callbacks goes to?
#Q: How to avoid collisions in Callback names between packages?
fastai_cbs = FastAICallback([BatchLossFilter, fp16, ...])

In [None]:
cbs = [pruner, early_stopper, lr_reducer, fastai_cbs]

## Model

In [None]:
model = HyPSTERTabular(data, constraints, sampler, cbs)

In [None]:
model.fit(n_trials=100)

In [None]:
model.predict_proba(X_test)

# Advanced #2

In [None]:
from hypster.hps import *
from hypster.distributions import *

### Common HPs

In [None]:
#tree_depth, learning_rate, optimizers, regularization, ...

In [None]:
tree_depth = TreeDepthHP(min=2, max=6, dist=UniformIntDistribution) 

In [None]:
#Q: How to handle LR with distribution or LRFinder?
lr = LearningRateHP(start_lr, end_lr, dist(?)=LRFinder(...))

### Specific HPs

In [None]:
xgboost_l1_reg = LogUniformHP(model=xgboost, 
                              hp_name="l1_reg", 
                              start_value=0.1, end_value=0.5,
                              default=0.1)

# Build Model

In [None]:
hps = [lr, tree_depth, xgboost_l1_reg]

In [None]:
model = HyPSTERX(hp_override=hps)

In [None]:
model.fit(data, n_trials=100)

In [None]:
model.predict(X_test)

# FastAI Example

In [None]:
df = pd.read_csv(path/'adult.csv')

In [None]:
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
splits = RandomSplitter()(range_of(df))

In [None]:
to = TabularPandas(df, procs, cat_names, cont_names, y_names="salary", splits=splits)

In [None]:
trn_dl = TabDataLoader(to.train, bs=64, num_workers=0, shuffle=True, drop_last=True)
val_dl = TabDataLoader(to.valid, bs=128, num_workers=0)
dbunch = DataBunch(trn_dl, val_dl)
dbunch.show_batch()

In [None]:
model = TabularModel(get_emb_sz(to), len(to.cont_names), 2, [200,100])

In [None]:
opt_func = partial(Adam, wd=0.01, eps=1e-5)
learn = Learner(dbunch, model, CrossEntropyLossFlat(), opt_func=opt_func, metrics=accuracy)

# Test Results

In [None]:
test_dl = learn.dls.test_dl(test_df.drop(['salary'], axis=1))

In [None]:
probs = learn.get_preds(dl=test_dl)

In [None]:
probs = probs[0][:,1]

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
scorer = RocAuc()

In [None]:
roc_auc_score(test_df[dep_var], probs)

# Desired API

In [None]:
import numpy as np

## Preprocessing

In [None]:
fill = HpInt(start_range=1, end_range=100)

In [None]:
fill_strategy = HpOptions("fill_strategy", 
                          [FillStrategy.mode, 
                           FillStrategy.median, 
                           FillStrategy.constant(5, fill)])

In [None]:
# or less preferred

In [None]:
fill_dict = {"mode" : FillStrategy.mode, "median" : FillStrategy.median, "constant" : FillStrategy.constant(5, fill)}
fill_strategy = HpOptions("fill_strategy", ["mode", "median", "constant"], fill_dict)

In [None]:
add_col = HpBool("missing_col_bool")

In [None]:
imp = FillMissing(fill_strategy=fill_strategy, add_col=add_col)

In [None]:
FillMissing = prepare(FillMissing)

#### Option A

In [None]:
norm = Normalize(mean=HpFloat("norm_mean", start=2, end=10, dist="uniform"))

In [None]:
procs = [Categorify, imp, HpToggle(norm)]

#### Option B

In [None]:
norm = HpToggle(Normalize(mean=HpFloat(start=2, end=10, dist="uniform")))

In [None]:
procs = [Categorify, imp, norm]

## DataBunch

In [None]:
to = TabularPandas(train_df, 
                   y_block = CategoryBlock(), 
                   y_names = dep_var,
                   splits = RandomSplitter()(range_of(train_df)),
                   cat_names = cat_names,
                   cont_names = cont_names,
                   procs = procs)

#### Option A

In [None]:
bs_pow = HpInt(0, 8)

In [None]:
dls = to.dataloaders(batch_size=2**bs_pow)
#or
dls = to.dataloaders(batch_size=HpConst(2)**bs_pow)

#### Option B

In [None]:
dls = to.dataloaders(batch_size=HpBatchSizeFinder(...))

## Learner

In [None]:
from fastai2.metrics import *

In [None]:
cbs = [TrackerCallback(monitor="roc_auc_score")]

### #Layers + Layer Sizes

#### Option A

In [None]:
layer_size_hp = HpFuncInt(func=np.multiply, base_value=50, min_int=1, max_int=7)
layers = HpVarList(min_len=1, max_len=5, layer_size_hp)

#### Option B

In [None]:
layer_size_hp = HpInt(min=1, max=7)
layers = HpVarList(min_len=1, max_len=5, value=50 * layer_size_hp) 
#TODO: think of how to distinguish between same value for all items in list and different ones?

### Optimizer

In [None]:
opt_dict = {"SGD" : SGD, "ADAM" : Adam, "LAMB" : fastai2.optimizer.Lamb()}

In [None]:
optimizer = HpOptions(["SGD", "ADAM", "LAMB"], opt_dict)

In [None]:
if optimizer == "SGD":
    sqr_mom = Adam()
    Lamb(sqr_mom)

#### Option B

In [None]:
def opt_name_to_opt(name): if name.containts("SGD") return SGD else Adam

In [None]:
optimizer = HpOptions(["SGD", "ADAM", "LAMB"], opt_name_to_opt)

#### Option C

In [None]:
optimizer = HpOptions([SGD, Adam, Lamb])

### Init Learner

In [None]:
learn = tabular_learner(dls, metrics=RocAuc(),
                        layers=layers
                        
                        #loss_func,
                        opt_func = opt_func
                        cbs=cbs,
                        #moms=(0.95, 0.85, 0.95)
                        #wd=None, wd_bn_bias=False, train_bn=True
                        #emb_szs=[],
                       )

### LR Finder

In [None]:
lr = HpLrFinder(finder_type="fastai", which="steep", kwargs=...)

In [None]:
learn.fit_flat_cos(3, lr=lr)

In [None]:
learn.fit_one_cycle(1)