In [33]:
import fastai2
from fastai2.tabular.all import *
from fastai2.metrics import *

In [34]:
from sklearn.model_selection import train_test_split

In [35]:
import optuna

In [36]:
SEED = 42

# Read Data

In [5]:
path = untar_data(URLs.ADULT_SAMPLE)
path.ls()

(#3) [Path('/Users/giladrubin/.fastai/data/adult_sample/adult.csv'),Path('/Users/giladrubin/.fastai/data/adult_sample/export.pkl'),Path('/Users/giladrubin/.fastai/data/adult_sample/models')]

In [6]:
df = pd.read_csv(path/'adult.csv')
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
0,49,Private,101320,Assoc-acdm,12.0,Married-civ-spouse,,Wife,White,Female,0,1902,40,United-States,>=50k
1,44,Private,236746,Masters,14.0,Divorced,Exec-managerial,Not-in-family,White,Male,10520,0,45,United-States,>=50k
2,38,Private,96185,HS-grad,,Divorced,,Unmarried,Black,Female,0,0,32,United-States,<50k
3,38,Self-emp-inc,112847,Prof-school,15.0,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,40,United-States,>=50k
4,42,Self-emp-not-inc,82297,7th-8th,,Married-civ-spouse,Other-service,Wife,Black,Female,0,0,50,United-States,<50k


In [7]:
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']

In [8]:
cont_names = ['age', 'fnlwgt', 'education-num']

In [9]:
dep_var = "salary"

In [10]:
train_df, test_df = train_test_split(df, test_size=0.6, 
                                     random_state=SEED, 
                                     stratify=df[dep_var])

# Preprocessing

In [11]:
cat = Categorify()

In [12]:
imp = FillMissing(fill_strategy=FillStrategy.mode, 
                  add_col=True)

In [13]:
norm = Normalize()

In [14]:
procs = [cat, imp, norm]

# DataBunch

In [15]:
to = TabularPandas(train_df, 
                   y_block = CategoryBlock(), 
                   y_names = dep_var,
                   splits = RandomSplitter()(range_of(train_df)),
                   cat_names = cat_names,
                   cont_names = cont_names,
                   procs = procs)

In [16]:
dls = to.dataloaders(batch_size=32)

# Learner

In [20]:
import fastai2

In [21]:
cbs = [TrackerCallback(monitor="roc_auc_score"), ReduceLROnPlateau("roc_auc_score", patience=3)]

In [22]:
class HpTabularLearner(object):
    def __init__(self, dls, metrics, opt_func, layers):
        self.dls = dls
        self.opt_func = opt_func
        self.layers = layers
        self.metrics = metrics

In [23]:
raw_tabular_learner = fastai2.tabular.learner.tabular_learner

In [24]:
#@delegates(fastai2.tabular.learner.tabular_learner)
def tabular_learner(dls, metrics, opt_func, layers, **kwargs):
    return HpTabularLearner(dls=dls, metrics=metrics, opt_func=opt_func, layers=layers)
#     return fastai2.tabular.learner.tabular_learner(dls, 
#                                                    opt_func=opt_func, 
#                                                    layers=layers,
#                                                    **kwargs)

In [25]:
learn = tabular_learner(dls=dls, 
                   metrics=RocAuc(),
                   layers=[1, 3, 50, 100], 
                   opt_func=[Adam, SGD, QHAdam],
                   #emb_szs=[], 
                   #loss_func,
                   #cbs=cbs,
                   #moms=(0.95, 0.85, 0.95),
                   #wd=None, wd_bn_bias=False, train_bn=True
                  )

# Optuna

In [26]:
from copy import deepcopy

In [27]:
def suggest_cat_hp(trial, name, lst):
    names = [str(f) for f in lst]
    items_dict = dict(zip(names, lst))
    chosen_hp = trial.suggest_categorical(name, names)
    return items_dict[chosen_hp]

In [28]:
def suggest_var_list(trial, lst_len_name, value_name, min_len, max_len, min_size, max_size):
    lst_len = trial.suggest_int(lst_len_name, min_len, max_len)
    lst_values = []
    for i in range(lst_len):
        value = trial.suggest_int(f"{value_name}_{i}", min_size, max_size)
        lst_values.append(value)
    return lst_values

In [29]:
class Objective(object):
    def __init__(self, learn):
        self.learn = learn
    def __call__(self, trial):
        learn = deepcopy(self.learn)
        opt_func = suggest_cat_hp(trial, "optimizers", learn.opt_func)
        metrics = learn.metrics
        layers = suggest_var_list(trial, "n_layers", "layer_size", 
                                  learn.layers[0], learn.layers[1], learn.layers[2], learn.layers[3])
        cbs = [TrackerCallback(monitor="roc_auc_score"), ReduceLROnPlateau("roc_auc_score", patience=3)]
        learner = raw_tabular_learner(self.learn.dls, metrics=metrics, opt_func = opt_func, layers=layers, cbs=cbs)
        learner.fit_one_cycle(1)
        return learner.cbs[3].best

In [30]:
def study_fit(learner, n_trials):
    optuna.logging.set_verbosity(0)
    pruner = optuna.pruners.NopPruner()
    study = optuna.create_study(direction="maximize", pruner=pruner)
    objective = Objective(learner)
    study.optimize(objective, n_trials=n_trials, timeout=600)
    return study

In [37]:
study = study_fit(learn, 3)

epoch,train_loss,valid_loss,roc_auc_score,time
0,0.358414,0.360441,0.720969,00:11


epoch,train_loss,valid_loss,roc_auc_score,time
0,0.565544,0.559923,0.715305,00:11


epoch,train_loss,valid_loss,roc_auc_score,time
0,0.537677,0.541244,0.727452,00:10


In [38]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,params_layer_size_0,params_layer_size_1,params_layer_size_2,params_n_layers,params_optimizers,state
0,0,0.720969,2020-04-18 18:02:46.537192,2020-04-18 18:02:57.910488,96,81,,2,<function Adam at 0x1a28a8ac80>,COMPLETE
1,1,0.715305,2020-04-18 18:02:57.912408,2020-04-18 18:03:09.603327,94,74,73.0,3,<function SGD at 0x1a28a8a950>,COMPLETE
2,2,0.727452,2020-04-18 18:03:09.604640,2020-04-18 18:03:20.561747,83,83,,2,<function SGD at 0x1a28a8a950>,COMPLETE


In [None]:
#learn = ...
hps = HypsterExperiment(learn, learn.fit_one_cycle, n_trials=3) 
hps.fit()

In [84]:
import optuna

In [142]:
EPOCHS = 2

In [149]:
def objective(trial):
    #Q: How do I define fill value?
    cat = Categorify()
    fillstraFillStrategy.mode
    imp = FillMissing(fill_strategy=fill_strategy,
                      add_col=True)

    norm = Normalize(mean=5)
    procs = [cat, imp, norm]

    # DataBunch
    to = TabularPandas(train_df, 
                       y_block = CategoryBlock(), 
                       y_names = dep_var,
                       splits = RandomSplitter()(range_of(train_df)),
                       cat_names = cat_names,
                       cont_names = cont_names,
                       procs = procs)

    dls = to.dataloaders(batch_size=512)
    
    n_layers = trial.suggest_int("n_layers", 2, 5)
    layer_sizes = L()
    
    for i in range(n_layers):
        layer_size = trial.suggest_int("layer_size_{}".format(i), 1, 10)
        layer_sizes.append(50 * layer_size)

    learn = tabular_learner(dls, metrics=RocAuc(),
                            layers=layer_sizes, 
                            #emb_szs=[], 
                            #loss_func,
                            #opt_func, 
                            cbs=cbs,
                            #lr=0.001, moms=(0.95, 0.85, 0.95)
                            #wd=None, wd_bn_bias=False, train_bn=True
                           )
    
    learn.fit_flat_cos(EPOCHS)

    return learn.cbs[3].best

In [150]:
optuna.logging.set_verbosity(0)
pruner = optuna.pruners.NopPruner()
study = optuna.create_study(direction="maximize", pruner=pruner)
study.optimize(objective, n_trials=5, timeout=600)

epoch,train_loss,valid_loss,roc_auc_score,time
0,0.405053,0.476074,0.5,00:01
1,0.367246,0.395882,0.622816,00:01


epoch,train_loss,valid_loss,roc_auc_score,time
0,0.380378,0.460215,0.5386,00:02
1,0.356267,0.383154,0.686228,00:02


epoch,train_loss,valid_loss,roc_auc_score,time
0,0.383377,0.471449,0.507842,00:01
1,0.358828,0.389051,0.687395,00:01


epoch,train_loss,valid_loss,roc_auc_score,time
0,0.394768,0.472647,0.671344,00:01
1,0.363715,0.388233,0.732455,00:01


epoch,train_loss,valid_loss,roc_auc_score,time
0,0.383316,0.457935,0.591756,00:01
1,0.359925,0.386684,0.720578,00:01


In [151]:
print("Number of finished trials: {}".format(len(study.trials)))

Number of finished trials: 5


In [152]:
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))

Best trial:
  Value: 0.7324549813617556


In [153]:
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

  Params: 
    n_layers: 2
    layer_size_0: 7
    layer_size_1: 3


# Test Results

In [154]:
test_dl = learn.dls.test_dl(test_df.drop(['salary'], axis=1))

In [155]:
probs = learn.get_preds(dl=test_dl)

In [156]:
probs = probs[0][:,1]

In [157]:
from sklearn.metrics import roc_auc_score

In [158]:
scorer = RocAuc()

In [159]:
roc_auc_score(test_df[dep_var], probs)

0.8787596624870792

# Desired API

In [160]:
import numpy as np

## Preprocessing

In [None]:
fill = HpInt(start_range=1, end_range=100)

In [None]:
fill_strategy = HpOptions("fill_strategy", 
                          [FillStrategy.mode, 
                           FillStrategy.median, 
                           FillStrategy.constant(5, fill)])

In [None]:
# or less preferred

In [None]:
fill_dict = {"mode" : FillStrategy.mode, "median" : FillStrategy.median, "constant" : FillStrategy.constant(5, fill)}
fill_strategy = HpOptions("fill_strategy", ["mode", "median", "constant"], fill_dict)

In [None]:
add_col = HpBool("missing_col_bool")

In [64]:
imp = FillMissing(fill_strategy=fill_strategy, add_col=add_col)

#### Option A

In [65]:
norm = Normalize(mean=HpFloat("norm_mean", start=2, end=10, dist="uniform"))

In [66]:
procs = [Categorify, imp, HpToggle(norm)]

#### Option B

In [None]:
norm = HpToggle(Normalize(mean=HpFloat(start=2, end=10, dist="uniform")))

In [82]:
procs = [Categorify, imp, norm]

## DataBunch

In [67]:
to = TabularPandas(train_df, 
                   y_block = CategoryBlock(), 
                   y_names = dep_var,
                   splits = RandomSplitter()(range_of(train_df)),
                   cat_names = cat_names,
                   cont_names = cont_names,
                   procs = procs)

#### Option A

In [None]:
bs_pow = HpInt(0, 8)

In [122]:
dls = to.dataloaders(batch_size=2**bs_pow)
#or
dls = to.dataloaders(batch_size=HpConst(2)**bs_pow)

#### Option B

In [122]:
dls = to.dataloaders(batch_size=HpBatchSizeFinder(...))

## Learner

In [123]:
from fastai2.metrics import *

In [124]:
cbs = [TrackerCallback(monitor="roc_auc_score")]

### #Layers + Layer Sizes

#### Option A

In [None]:
layer_size_hp = HpFuncInt(func=np.multiply, base_value=50, min_int=1, max_int=7)
layers = HpVarList(min_len=1, max_len=5, layer_size_hp)

#### Option B

In [None]:
layer_size_hp = HpInt(min=1, max=7)
layers = HpVarList(min_len=1, max_len=5, value=50 * layer_size_hp) 
#TODO: think of how to distinguish between same value for all items in list and different ones?

### Optimizer

In [None]:
opt_dict = {"SGD" : SGD, "ADAM" : Adam, "LAMB" : fastai2.optimizer.Lamb()}

In [None]:
optimizer = HpOptions(["SGD", "ADAM", "LAMB"], opt_dict)

In [None]:
if optimizer == "SGD":
    sqr_mom = Adam()
    Lamb(sqr_mom)

#### Option B

In [None]:
def opt_name_to_opt(name): if name.containts("SGD") return SGD else Adam

In [None]:
optimizer = HpOptions(["SGD", "ADAM", "LAMB"], opt_name_to_opt)

#### Option C

In [None]:
optimizer = HpOptions([SGD, Adam, Lamb])

### Init Learner

In [None]:
learn = tabular_learner(dls, metrics=RocAuc(),
                        layers=layers
                        
                        #loss_func,
                        opt_func = opt_func
                        cbs=cbs,
                        #moms=(0.95, 0.85, 0.95)
                        #wd=None, wd_bn_bias=False, train_bn=True
                        #emb_szs=[],
                       )

### LR Finder

In [None]:
lr = HpLrFinder(finder_type="fastai", which="steep", kwargs=...)

In [220]:
learn.fit_flat_cos(3, lr=lr)

epoch,train_loss,valid_loss,roc_auc_score,time
0,0.365408,0.357305,0.679694,00:02
1,0.359781,0.342277,0.746188,00:02
2,0.353049,0.334768,0.758574,00:02
