In [None]:
#export
from fastai2.basics import *
from fastai2.tabular.core import *
from fastai2.tabular.model import *

In [None]:
from nbdev.showdoc import *
from fastai2.tabular.data import *

In [None]:
#default_exp tabular.learner

# Tabular learner

> The function to immediately get a `Learner` ready to train for tabular data

## Main functions

In [None]:
#export
class TabularLearner(Learner):
    "`Learner` for tabular data"
    def predict(self, row):
        tst_to = self.dls.valid_ds.new(pd.DataFrame(row).T)
        tst_to.process()
        dl = self.dls.valid.new(tst_to)
        inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
        i = getattr(self.dls, 'n_inp', -1)
        b = (*tuplify(inp),*tuplify(dec_preds))
        full_dec = self.dls.decode((*tuplify(inp),*tuplify(dec_preds)))
        return full_dec,dec_preds[0],preds[0]

In [None]:
#export
@delegates(Learner.__init__)
def tabular_learner(dls, layers=None, emb_szs=None, config=None, n_out=None, y_range=None,
                    ps=None, embed_p=0., use_bn=True, bn_final=False, bn_cont=True, **kwargs):
    "Get a `Learner` using `data`, with `metrics`, including a `TabularModel` created using the remaining params."
    if config is None: config = tabular_config()
    if layers is None: layers = [200,100]
    to = dls.train_ds
    emb_szs = get_emb_sz(dls.train_ds, {} if emb_szs is None else emb_szs)
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be infered from data, set `dls.c` or pass `n_out`"
    model = TabularModel(emb_szs, len(dls.cont_names), n_out, layers, ps=ps, embed_p=embed_p,
                        y_range=y_range, use_bn=use_bn, bn_final=bn_final, bn_cont=bn_cont, **config)
    return TabularLearner(dls, model, **kwargs)

In [None]:
#export
@typedispatch
def show_results(x:Tabular, y:Tabular, samples, outs, ctxs=None, max_n=10, **kwargs):
    df = x.all_cols[:max_n]
    for n in x.y_names: df[n+'_pred'] = y[n][:max_n].values
    display_df(df)

## Integration example with training

In [None]:
from fastai2.callback.all import *

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [None]:
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
splits = RandomSplitter()(range_of(df))

In [None]:
to = TabularPandas(df, procs, cat_names, cont_names, y_names="salary", splits=splits)

In [None]:
dls = to.dataloaders(bs=64)
dls.show_batch()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary
0,Private,HS-grad,Never-married,Other-service,Own-child,White,False,20.000001,174684.999658,9.0,<50k
1,Local-gov,Bachelors,Never-married,Craft-repair,Not-in-family,White,False,31.0,158290.999241,13.0,>=50k
2,Local-gov,Bachelors,Never-married,Prof-specialty,Unmarried,White,False,40.0,321186.996924,13.0,<50k
3,Private,9th,Divorced,Handlers-cleaners,Not-in-family,White,False,49.0,166857.000251,5.0,<50k
4,Private,Some-college,Divorced,Machine-op-inspct,Unmarried,White,False,48.0,50402.001607,10.0,<50k
5,Private,HS-grad,Never-married,Other-service,Own-child,White,False,33.0,80058.004994,9.0,<50k
6,Self-emp-not-inc,Some-college,Never-married,Craft-repair,Own-child,White,False,28.0,155621.001214,10.0,<50k
7,Private,Some-college,Divorced,Adm-clerical,Not-in-family,White,False,39.0,235258.999197,10.0,<50k
8,Local-gov,Bachelors,Married-civ-spouse,#na#,Husband,White,True,58.999999,196012.999938,10.0,>=50k
9,Private,Assoc-voc,Married-civ-spouse,Sales,Husband,White,False,48.0,160220.000426,11.0,>=50k


In [None]:
type(dls)

fastai2.tabular.data.TabularDataLoaders

In [None]:
type(dls.train_ds)

fastai2.tabular.core.TabularPandas

In [None]:
dls.train_ds.classes

{'workclass': (#10) ['#na#',' ?',' Federal-gov',' Local-gov',' Never-worked',' Private',' Self-emp-inc',' Self-emp-not-inc',' State-gov',' Without-pay'],
 'education': (#17) ['#na#',' 10th',' 11th',' 12th',' 1st-4th',' 5th-6th',' 7th-8th',' 9th',' Assoc-acdm',' Assoc-voc'...],
 'marital-status': (#8) ['#na#',' Divorced',' Married-AF-spouse',' Married-civ-spouse',' Married-spouse-absent',' Never-married',' Separated',' Widowed'],
 'occupation': (#16) ['#na#',' ?',' Adm-clerical',' Armed-Forces',' Craft-repair',' Exec-managerial',' Farming-fishing',' Handlers-cleaners',' Machine-op-inspct',' Other-service'...],
 'relationship': (#7) ['#na#',' Husband',' Not-in-family',' Other-relative',' Own-child',' Unmarried',' Wife'],
 'race': (#6) ['#na#',' Amer-Indian-Eskimo',' Asian-Pac-Islander',' Black',' Other',' White'],
 'education-num_na': (#3) ['#na#',False,True]}

In [None]:
learn = tabular_learner(dls, [200,100], metrics=accuracy)

In [None]:
learn.fit_one_cycle(1)

epoch,train_loss,valid_loss,accuracy,time
0,0.352627,0.36337,0.827703,00:05


In [None]:
learn.show_results()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary,salary_pred
0,5.0,10.0,3.0,5.0,1.0,5.0,1.0,-0.111873,-0.927747,1.145388,1.0,1.0
1,3.0,10.0,1.0,12.0,5.0,5.0,1.0,-0.405421,-0.654687,1.145388,0.0,0.0
2,5.0,13.0,3.0,11.0,1.0,5.0,1.0,0.181674,-0.110434,1.537422,1.0,1.0
3,7.0,12.0,3.0,15.0,1.0,5.0,1.0,-0.332034,-0.355038,-0.422752,0.0,0.0
4,5.0,16.0,5.0,14.0,2.0,2.0,1.0,-0.625581,0.788141,-0.030717,0.0,0.0
5,5.0,16.0,3.0,7.0,1.0,5.0,1.0,-0.698968,0.863516,-0.030717,0.0,0.0
6,5.0,1.0,5.0,13.0,4.0,5.0,1.0,-1.57961,0.056453,-1.598856,0.0,0.0
7,2.0,12.0,5.0,3.0,4.0,5.0,1.0,-1.065902,0.014428,-0.422752,0.0,0.0
8,5.0,3.0,5.0,4.0,4.0,5.0,1.0,-1.506223,-0.574069,-0.814786,0.0,0.0


In [None]:
#TODO: check what's wrong
#learn.predict(df.iloc[0])

In [None]:
test_df = df.copy()
test_df.drop(['salary'], axis=1, inplace=True)
dl = learn.dls.test_dl(test_df)

In [None]:
learn.get_preds(dl=dl)

(tensor([[0.4798, 0.5202],
         [0.4611, 0.5389],
         [0.9882, 0.0118],
         ...,
         [0.5789, 0.4211],
         [0.6913, 0.3087],
         [0.5746, 0.4254]]), None)

## Export -

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_torch_core.ipynb.
Converted 01_layers.ipynb.
Converted 02_data.load.ipynb.
Converted 03_data.core.ipynb.
Converted 04_data.external.ipynb.
Converted 05_data.transforms.ipynb.
Converted 06_data.block.ipynb.
Converted 07_vision.core.ipynb.
Converted 08_vision.data.ipynb.
Converted 09_vision.augment.ipynb.
Converted 09b_vision.utils.ipynb.
Converted 09c_vision.widgets.ipynb.
Converted 10_tutorial.pets.ipynb.
Converted 11_vision.models.xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_callback.core.ipynb.
Converted 13a_learner.ipynb.
Converted 13b_metrics.ipynb.
Converted 14_callback.schedule.ipynb.
Converted 14a_callback.data.ipynb.
Converted 15_callback.hook.ipynb.
Converted 15a_vision.models.unet.ipynb.
Converted 16_callback.progress.ipynb.
Converted 17_callback.tracker.ipynb.
Converted 18_callback.fp16.ipynb.
Converted 19_callback.mixup.ipynb.
Converted 20_interpret.ipynb.
Converted 20a_distributed.ipynb.
Converted 21_vision.learner.ipynb.
Converted 22_tutorial.ima