In [None]:
#|hide
#| eval: false
! [ -e /content ] && pip install -Uqq fastai  # upgrade fastai on colab

In [None]:
#|export
from __future__ import annotations
from fastai.basics import *
from fastai.tabular.core import *
from fastai.tabular.model import *
from fastai.tabular.data import *

In [None]:
#|hide
from nbdev.showdoc import *

In [None]:
#|default_exp tabular.learner

# Tabular learner

> The function to immediately get a `Learner` ready to train for tabular data

The main function you probably want to use in this module is `tabular_learner`. It will automatically create a `TabularModel` suitable for your data and infer the right loss function. See the [tabular tutorial](http://docs.fast.ai/tutorial.tabular.html) for an example of use in context.

## Main functions

In [None]:
#|export
class TabularLearner(Learner):
    "`Learner` for tabular data"
    def predict(self, 
        row:pd.Series, # Features to be predicted
    ):
        "Predict on a single sample"
        dl = self.dls.test_dl(row.to_frame().T)
        dl.dataset.conts = dl.dataset.conts.astype(np.float32)
        inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
        b = (*tuplify(inp),*tuplify(dec_preds))
        full_dec = self.dls.decode(b)
        return full_dec,dec_preds[0],preds[0]

In [None]:
show_doc(TabularLearner, title_level=3)

---

[source](https://github.com/fastai/fastai/blob/main/fastai/tabular/learner.py#L16){target="_blank" style="float:right; font-size:smaller"}

### TabularLearner

>      TabularLearner (dls:fastai.data.core.DataLoaders, model:Callable,
>                      loss_func:Optional[Callable]=None, opt_func:fastai.optimi
>                      zer.Optimizer|fastai.optimizer.OptimWrapper=<function
>                      Adam>, lr:float|slice=0.001, splitter:Callable=<function
>                      trainable_params>, cbs:fastai.callback.core.Callback|coll
>                      ections.abc.MutableSequence|None=None, metrics:Union[Call
>                      able,collections.abc.MutableSequence,NoneType]=None,
>                      path:str|pathlib.Path|None=None,
>                      model_dir:str|pathlib.Path='models',
>                      wd:float|int|None=None, wd_bn_bias:bool=False,
>                      train_bn:bool=True, moms:tuple=(0.95, 0.85, 0.95),
>                      default_cbs:bool=True)

*`Learner` for tabular data*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| dls | DataLoaders |  | `DataLoaders` containing fastai or PyTorch `DataLoader`s |
| model | Callable |  | PyTorch model for training or inference |
| loss_func | Optional | None | Loss function. Defaults to `dls` loss |
| opt_func | fastai.optimizer.Optimizer \| fastai.optimizer.OptimWrapper | Adam | Optimization function for training |
| lr | float \| slice | 0.001 | Default learning rate |
| splitter | Callable | trainable_params | Split model into parameter groups. Defaults to one parameter group |
| cbs | fastai.callback.core.Callback \| collections.abc.MutableSequence \| None | None | `Callback`s to add to `Learner` |
| metrics | Union | None | `Metric`s to calculate on validation set |
| path | str \| pathlib.Path \| None | None | Parent directory to save, load, and export models. Defaults to `dls` `path` |
| model_dir | str \| pathlib.Path | models | Subdirectory to save and load models |
| wd | float \| int \| None | None | Default weight decay |
| wd_bn_bias | bool | False | Apply weight decay to normalization and bias parameters |
| train_bn | bool | True | Train frozen normalization layers |
| moms | tuple | (0.95, 0.85, 0.95) | Default momentum for schedulers |
| default_cbs | bool | True | Include default `Callback`s |

It works exactly as a normal `Learner`, the only difference is that it implements a `predict` method specific to work on a row of data.

In [None]:
#|export
@delegates(Learner.__init__)
def tabular_learner(
        dls:TabularDataLoaders,
        layers:list=None, # Size of the layers generated by `LinBnDrop`
        emb_szs:list=None, # Tuples of `n_unique, embedding_size` for all categorical features
        config:dict=None, # Config params for TabularModel from `tabular_config`
        n_out:int=None, # Final output size of the model
        y_range:Tuple=None, # Low and high for the final sigmoid function
        **kwargs
):
    "Get a `Learner` using `dls`, with `metrics`, including a `TabularModel` created using the remaining params."
    if config is None: config = tabular_config()
    if layers is None: layers = [200,100]
    to = dls.train_ds
    emb_szs = get_emb_sz(dls.train_ds, {} if emb_szs is None else emb_szs)
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
    model = TabularModel(emb_szs, len(dls.cont_names), n_out, layers, y_range=y_range, **config)
    return TabularLearner(dls, model, **kwargs)

If your data was built with fastai, you probably won't need to pass anything to `emb_szs` unless you want to change the default of the library (produced by `get_emb_sz`), same for `n_out` which should be automatically inferred. `layers` will default to `[200,100]` and is passed to `TabularModel` along with the `config`.

Use `tabular_config` to create a `config` and customize the model used. There is just easy access to `y_range` because this argument is often used.

All the other arguments are passed to `Learner`.

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
dls = TabularDataLoaders.from_df(df, path, procs=procs, cat_names=cat_names, cont_names=cont_names, 
                                 y_names="salary", valid_idx=list(range(800,1000)), bs=64)
learn = tabular_learner(dls)

In [None]:
show_doc(TabularLearner.predict)

---

[source](https://github.com/fastai/fastai/blob/main/fastai/tabular/learner.py#L18){target="_blank" style="float:right; font-size:smaller"}

### TabularLearner.predict

>      TabularLearner.predict (row:pandas.core.series.Series)

*Predict on a single sample*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| row | Series | Features to be predicted |

We can pass in an individual row of data into our `TabularLearner`'s `predict` method. It's output is slightly different from the other `predict` methods, as this one will always return the input as well:

In [None]:
row, clas, probs = learn.predict(df.iloc[0])

In [None]:
row.show()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary
0,Private,Assoc-acdm,Married-civ-spouse,#na#,Wife,White,False,49.0,101320.001685,12.0,<50k


In [None]:
clas, probs

(tensor(0), tensor([0.5264, 0.4736]))

In [None]:
#|hide
#test y_range is passed
learn = tabular_learner(dls, y_range=(0,32))
assert isinstance(learn.model.layers[-1], SigmoidRange)
test_eq(learn.model.layers[-1].low, 0)
test_eq(learn.model.layers[-1].high, 32)

learn = tabular_learner(dls, config = tabular_config(y_range=(0,32)))
assert isinstance(learn.model.layers[-1], SigmoidRange)
test_eq(learn.model.layers[-1].low, 0)
test_eq(learn.model.layers[-1].high, 32)

In [None]:
#|export
@dispatch
def show_results(x:Tabular, y:Tabular, samples, outs, ctxs=None, max_n=10, **kwargs):
    df = x.all_cols[:max_n]
    for n in x.y_names: df[n+'_pred'] = y[n][:max_n].values
    display_df(df)

## Export -

In [None]:
#|hide
from nbdev import nbdev_export
nbdev_export()