- Add use-cases for HPs with FastAI
- Make learner work generically
- Make LRFinder HP
- Make ToggleHP Work

- Make it work with n_jobs > 1
- Work on HPO Phase - callbacks etc...
- Work on recipes
- Work on constraints

In [1]:
!pip install --upgrade transformers

Collecting transformers
  Downloading transformers-2.9.1-py3-none-any.whl (641 kB)
Collecting tokenizers==0.7.0
  Downloading tokenizers-0.7.0-cp37-cp37m-win_amd64.whl (1.1 MB)

ERROR: sentence-transformers 0.2.5 has requirement transformers==2.3.0, but you'll have transformers 2.9.1 which is incompatible.
ERROR: flair 0.4.4 has requirement ipython==7.6.1, but you'll have ipython 7.13.0 which is incompatible.
ERROR: flair 0.4.4 has requirement urllib3<1.25,>=1.20, but you'll have urllib3 1.25.9 which is incompatible.



Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.1.1
    Uninstalling tokenizers-0.1.1:
      Successfully uninstalled tokenizers-0.1.1
  Attempting uninstall: transformers
    Found existing installation: transformers 2.3.0
    Uninstalling transformers-2.3.0:
      Successfully uninstalled transformers-2.3.0
Successfully installed tokenizers-0.7.0 transformers-2.9.1


In [1]:
#default_exp sklearn

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
#export
from hypster.oo_hp import *
from hypster.hypster_prepare import *

import fastai2
from fastai2.tabular.all import *
from fastai2.metrics import *

from sklearn.model_selection import train_test_split

from copy import deepcopy

import optuna

In [4]:
#export
SEED = 42

# Read Data

In [5]:
#export
path = untar_data(URLs.ADULT_SAMPLE)
path.ls()

(#3) [Path('C:/Users/user/.fastai/data/adult_sample/adult.csv'),Path('C:/Users/user/.fastai/data/adult_sample/export.pkl'),Path('C:/Users/user/.fastai/data/adult_sample/models')]

In [6]:
#export
df = pd.read_csv(path/'adult.csv')
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
0,49,Private,101320,Assoc-acdm,12.0,Married-civ-spouse,,Wife,White,Female,0,1902,40,United-States,>=50k
1,44,Private,236746,Masters,14.0,Divorced,Exec-managerial,Not-in-family,White,Male,10520,0,45,United-States,>=50k
2,38,Private,96185,HS-grad,,Divorced,,Unmarried,Black,Female,0,0,32,United-States,<50k
3,38,Self-emp-inc,112847,Prof-school,15.0,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,40,United-States,>=50k
4,42,Self-emp-not-inc,82297,7th-8th,,Married-civ-spouse,Other-service,Wife,Black,Female,0,0,50,United-States,<50k


In [7]:
#export
df = df.sample(frac=0.1)

In [8]:
#export
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
dep_var = "salary"

In [9]:
#export
train_df, test_df = train_test_split(df, test_size=0.6, 
                                     random_state=SEED, 
                                     stratify=df[dep_var])

# Preprocessing

In [10]:
FillMissing = prepare(FillMissing)

In [11]:
fill_strategy = HpCategorical("fill_strategy", 
                              [FillStrategy.mode, 
                               FillStrategy.median, 
                               #FillStrategy.constant(5, fill)
                              ])

In [12]:
imp = FillMissing(fill_strategy=fill_strategy, add_col=False)

In [13]:
#export
Normalize = prepare(Normalize)
norm = Normalize(mean=HpFloat("mean_norm", 0.001, 10.4))

In [14]:
#export
procs = [Categorify, imp, norm]

# DataBunch

In [15]:
#export
TabularPandas = prepare(TabularPandas)

In [16]:
#export
to = TabularPandas(train_df,
                   y_block = CategoryBlock(), 
                   y_names = dep_var,
                   splits = RandomSplitter()(range_of(train_df)),
                   cat_names = cat_names,
                   cont_names = cont_names,
                   procs = procs
                  )

In [17]:
#export
#dls = to.dataloaders(batch_size=2 ** HpInt("batch_size", 5, 9))
#dls = to.dataloaders(batch_size=HpInt("batch_size", 16, 128, 16))

In [18]:
#export
dls = to.dataloaders(batch_size=32)

# Learner

In [19]:
#export
cbs = [TrackerCallback(monitor="roc_auc_score"), 
       ReduceLROnPlateau("roc_auc_score", patience=3)]

In [20]:
#export
start_mom = HpFloat("start_mom", 0.85, 0.99)

In [21]:
#export
tabular_learner = prepare(tabular_learner)

In [22]:
#export
learner = tabular_learner(dls,
                          metrics=RocAuc(),
                          opt_func=HpCategorical("optimizer", [Adam, SGD, QHAdam]),
                          layers=HpVarLenList("layers", 1, 4, HpInt("layer_size", 50, 300, 50), same_value=False),
                          cbs=cbs,
                          moms=(start_mom, start_mom-0.1, start_mom), 
                          wd_bn_bias=HpBool("wd_bn_bias"),
                          )

# Optuna

In [23]:
#export
lr = HpFloat("learning_rate", 1e-5, 1e-1, log=True)

In [24]:
import datetime

In [25]:
#export
def run_learner(fit_method, get_metric, n_trials=5): #learner
    class Objective():
        def __init__(self, fit_method, get_metric): #learner
            #self.learner   = learner
            self.fit_method = fit_method
            self.get_metric = get_metric
            
        def __call__(self, trial): 
            #learner = self.learner.sample(trial)
            self.fit_method.sample(trial)
            res = self.get_metric.sample(trial)
            #print(self.fit_method.base_call)
            #print(self.get_metric.base_call.base_call)
            print(res)
            return res

    objective = Objective(fit_method, get_metric) #learner
    optuna.logging.set_verbosity(0)
    pruner = optuna.pruners.NopPruner()
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    study = optuna.create_study(direction="maximize", study_name = now, pruner=pruner)
    study.optimize(objective, n_trials=n_trials, n_jobs=1, timeout=600)
    return study

In [33]:
#export
study = run_learner(#learner    = learner,
                    fit_method = learner.fit_one_cycle(2, lr),
                    get_metric = learner.tracker.best,
                    n_trials   = 3
                   )

epoch,train_loss,valid_loss,roc_auc_score,time
0,0.596456,0.385042,0.607875,00:00
1,0.498358,0.379385,0.712044,00:00


0.7120438256858062


epoch,train_loss,valid_loss,roc_auc_score,time
0,0.469326,0.481746,0.734245,00:00
1,0.421761,0.373316,0.703188,00:00


0.734244995469149


epoch,train_loss,valid_loss,roc_auc_score,time
0,0.888938,3.609398,0.64128,00:00
1,0.707554,0.381767,0.638273,00:00


0.6412801713485461


In [34]:
#export
print("Number of finished trials: {}".format(len(study.trials)))

Number of finished trials: 3


In [35]:
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))

Best trial:
  Value: 0.734244995469149


In [36]:
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

  Params: 
    fill_strategy: median
    mean_norm: 5.389236192600328
    optimizer: QHAdam
    layers: 1
    layer_size_1: 50
    start_mom: 0.8780905302848517
    wd_bn_bias: False
    learning_rate: 0.02942944304692902


In [37]:
#export
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,params_fill_strategy,params_layer_size_1,params_layer_size_2,params_layer_size_3,params_layers,params_learning_rate,params_mean_norm,params_optimizer,params_start_mom,params_wd_bn_bias,state
0,0,0.712044,2020-05-09 23:43:49.586812,2020-05-09 23:43:50.439779,mode,200,,,1,0.071879,0.693551,Adam,0.986672,False,COMPLETE
1,1,0.734245,2020-05-09 23:43:50.440778,2020-05-09 23:43:51.370779,median,50,,,1,0.029429,5.389236,QHAdam,0.878091,False,COMPLETE
2,2,0.64128,2020-05-09 23:43:51.370779,2020-05-09 23:43:52.814780,mode,300,250.0,300.0,3,0.022867,10.211889,QHAdam,0.893017,True,COMPLETE


In [31]:
from nbdev.export import notebook2script

In [32]:
notebook2script()

Converted 00_core.ipynb.
Converted 01_api.ipynb.
Converted 02_oo_hp.ipynb.
Converted 03_hypster_prepare.ipynb.
Converted 04_tabular_api.ipynb.
Converted 05_learner_api.ipynb.
Converted fastai_adult_tutorial.ipynb.
Converted index.ipynb.
