In [None]:
#export
from fastai2.basics import *
from fastai2.tabular.core import *
from fastai2.tabular.model import *

In [None]:
from nbdev.showdoc import *
from fastai2.tabular.data import *

In [None]:
#default_exp tabular.learner

# Tabular learner

> The function to immediately get a `Learner` ready to train for tabular data

## Main functions

In [None]:
#export
class TabularLearner(Learner):
    "`Learner` for tabular data"
    def predict(self, row):
        tst_to = self.dls.valid_ds.new(pd.DataFrame(row).T)
        tst_to.process()
        tst_to.conts = tst_to.conts.astype(np.float32)
        dl = self.dls.valid.new(tst_to)
        inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
        i = getattr(self.dls, 'n_inp', -1)
        b = (*tuplify(inp),*tuplify(dec_preds))
        full_dec = self.dls.decode((*tuplify(inp),*tuplify(dec_preds)))
        return full_dec,dec_preds[0],preds[0]

In [None]:
#export
@delegates(Learner.__init__)
def tabular_learner(dls, layers=None, emb_szs=None, config=None, n_out=None, y_range=None,
                    ps=None, embed_p=0., use_bn=True, bn_final=False, bn_cont=True, **kwargs):
    "Get a `Learner` using `data`, with `metrics`, including a `TabularModel` created using the remaining params."
    if config is None: config = tabular_config()
    if layers is None: layers = [200,100]
    to = dls.train_ds
    emb_szs = get_emb_sz(dls.train_ds, {} if emb_szs is None else emb_szs)
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be infered from data, set `dls.c` or pass `n_out`"
    model = TabularModel(emb_szs, len(dls.cont_names), n_out, layers, ps=ps, embed_p=embed_p,
                        y_range=y_range, use_bn=use_bn, bn_final=bn_final, bn_cont=bn_cont, **config)
    return TabularLearner(dls, model, **kwargs)

In [None]:
#export
@typedispatch
def show_results(x:Tabular, y:Tabular, samples, outs, ctxs=None, max_n=10, **kwargs):
    df = x.all_cols[:max_n]
    for n in x.y_names: df[n+'_pred'] = y[n][:max_n].values
    display_df(df)

## Integration example with training

In [None]:
from fastai2.callback.all import *

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [None]:
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
splits = RandomSplitter()(range_of(df))

In [None]:
to = TabularPandas(df, procs, cat_names, cont_names, y_names="salary", splits=splits)

In [None]:
dls = to.dataloaders(bs=64)
dls.show_batch()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary
0,Private,Assoc-voc,Widowed,Adm-clerical,Not-in-family,White,False,75.0,314208.993873,11.0,<50k
1,Private,Some-college,Never-married,Handlers-cleaners,Own-child,White,False,23.000001,335570.004598,10.0,<50k
2,Private,Some-college,Never-married,Adm-clerical,Not-in-family,White,False,28.0,181775.999969,10.0,<50k
3,Private,HS-grad,Married-civ-spouse,Farming-fishing,Husband,White,False,44.0,75011.996556,9.0,<50k
4,Private,HS-grad,Never-married,Transport-moving,Not-in-family,White,False,33.0,361497.001084,9.0,<50k
5,Private,HS-grad,Married-civ-spouse,Machine-op-inspct,Husband,White,False,30.0,167832.000507,9.0,<50k
6,Private,Masters,Married-civ-spouse,Exec-managerial,Husband,White,False,49.0,187369.999917,14.0,>=50k
7,Private,HS-grad,Married-civ-spouse,Transport-moving,Husband,White,False,62.000001,29828.004881,9.0,<50k
8,Private,Bachelors,Never-married,Adm-clerical,Own-child,White,False,31.0,198451.99974,13.0,<50k
9,Private,7th-8th,Divorced,Exec-managerial,Not-in-family,White,False,46.0,182715.000069,4.0,<50k


In [None]:
dls.train_ds.classes

{'workclass': (#10) ['#na#',' ?',' Federal-gov',' Local-gov',' Never-worked',' Private',' Self-emp-inc',' Self-emp-not-inc',' State-gov',' Without-pay'],
 'education': (#17) ['#na#',' 10th',' 11th',' 12th',' 1st-4th',' 5th-6th',' 7th-8th',' 9th',' Assoc-acdm',' Assoc-voc'...],
 'marital-status': (#8) ['#na#',' Divorced',' Married-AF-spouse',' Married-civ-spouse',' Married-spouse-absent',' Never-married',' Separated',' Widowed'],
 'occupation': (#16) ['#na#',' ?',' Adm-clerical',' Armed-Forces',' Craft-repair',' Exec-managerial',' Farming-fishing',' Handlers-cleaners',' Machine-op-inspct',' Other-service'...],
 'relationship': (#7) ['#na#',' Husband',' Not-in-family',' Other-relative',' Own-child',' Unmarried',' Wife'],
 'race': (#6) ['#na#',' Amer-Indian-Eskimo',' Asian-Pac-Islander',' Black',' Other',' White'],
 'education-num_na': (#3) ['#na#',False,True]}

In [None]:
learn = tabular_learner(dls, [200,100], metrics=accuracy)

In [None]:
learn.fit_one_cycle(1)

epoch,train_loss,valid_loss,accuracy,time
0,0.3565,0.360542,0.834306,00:05


In [None]:
learn.show_results()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary,salary_pred
0,5.0,16.0,1.0,2.0,5.0,5.0,1.0,-0.920449,0.700603,-0.031446,0.0,0.0
1,5.0,16.0,5.0,13.0,3.0,5.0,1.0,-1.360174,-0.391936,-0.031446,0.0,0.0
2,5.0,12.0,5.0,14.0,3.0,5.0,1.0,0.545303,0.046084,-0.424512,0.0,0.0
3,3.0,10.0,5.0,11.0,2.0,3.0,1.0,-0.407436,-1.008089,1.14775,0.0,0.0
4,2.0,13.0,3.0,11.0,1.0,5.0,1.0,0.105578,0.862088,1.540816,1.0,1.0
5,5.0,10.0,5.0,13.0,4.0,2.0,1.0,-0.554011,-0.194969,1.14775,0.0,0.0
6,7.0,10.0,3.0,5.0,6.0,5.0,1.0,0.911741,0.089618,1.14775,1.0,1.0
7,5.0,1.0,1.0,8.0,5.0,3.0,1.0,0.545303,2.970283,-1.603709,0.0,0.0
8,5.0,4.0,3.0,7.0,1.0,5.0,1.0,1.351467,-0.781836,-3.175972,0.0,0.0


In [None]:
learn.predict(df.iloc[0])

(   workclass  education  marital-status  occupation  relationship  race  \
 0        5.0        8.0             3.0         0.0           6.0   5.0   
 
    education-num_na       age    fnlwgt  education-num  salary  
 0               1.0  0.765166 -0.840112       0.754685     0.0  ,
 tensor(0),
 tensor([0.5126, 0.4874]))

In [None]:
test_df = df.copy()
test_df.drop(['salary'], axis=1, inplace=True)
dl = learn.dls.test_dl(test_df)

In [None]:
learn.get_preds(dl=dl)

(tensor([[0.4798, 0.5202],
         [0.4611, 0.5389],
         [0.9882, 0.0118],
         ...,
         [0.5789, 0.4211],
         [0.6913, 0.3087],
         [0.5746, 0.4254]]), None)

## Export -

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()