In [1]:
#export
from fastai2.basics import *
from fastai2.tabular.core import *
from fastai2.tabular.model import *

In [None]:
from nbdev.showdoc import *

In [None]:
#default_exp tabular.learner

# Tabular learner

> The function to immediately get a `Learner` ready to train for tabular data

## Main functions

In [2]:
#export
class TabularLearner(Learner):
    "`Learner` for tabular data"
    def predict(self, row):
        tst_to = self.dls.valid_ds.new(pd.DataFrame(row).T)
        tst_to.process()
        dl = self.dls.valid.new(tst_to)
        inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
        i = getattr(self.dls, 'n_inp', -1)
        b = (*tuplify(inp),*tuplify(dec_preds))
        full_dec = self.dls.decode((*tuplify(inp),*tuplify(dec_preds)))
        return full_dec,dec_preds[0],preds[0]

In [3]:
#export
@delegates(Learner.__init__)
def tabular_learner(dls, layers=None, emb_szs=None, config=None, n_out=None, y_range=None, 
                    ps=None, embed_p=0., use_bn=True, bn_final=False, **kwargs):
    "Get a `Learner` using `data`, with `metrics`, including a `TabularModel` created using the remaining params."
    if config is None: config = tabular_config()
    if layers is None: layers = [200,100]
    to = dls.train_ds
    emb_szs = get_emb_sz(dls.train_ds, {} if emb_szs is None else emb_szs)
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be infered from data, set `dls.c` or pass `n_out`"
    model = TabularModel(emb_szs, len(dls.cont_names), n_out, layers, y_range=y_range,
                         ps=ps, embed_p=embed_p, use_bn=use_bn, bn_final=bn_final, **config)
    return TabularLearner(dls, model, **kwargs)

In [4]:
#export
@typedispatch
def show_results(x:Tabular, y:Tabular, samples, outs, ctxs=None, max_n=10, **kwargs):
    df = x.all_cols[:max_n]
    for n in x.y_names: df[n+'_pred'] = y[n][:max_n].values
    display_df(df)

## Integration example with training

In [5]:
from fastai2.callback.all import *

In [6]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [7]:
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
splits = RandomSplitter()(range_of(df))

In [8]:
to = TabularPandas(df, procs, cat_names, cont_names, y_names="salary", splits=splits)

In [9]:
dls = to.dataloaders(bs=64)
dls.show_batch()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,age_na,fnlwgt_na,education-num_na,age,fnlwgt,education-num,salary
0,Private,HS-grad,Married-civ-spouse,Craft-repair,Husband,White,False,False,False,54.999999,117298.99989,9.0,>=50k
1,Private,Some-college,Married-civ-spouse,Adm-clerical,Wife,White,False,False,False,51.0,245873.00117,10.0,<50k
2,?,HS-grad,Married-civ-spouse,?,Husband,Black,False,False,False,50.0,260579.001577,9.0,<50k
3,Private,HS-grad,Never-married,Sales,Own-child,Black,False,False,False,32.0,48458.001949,9.0,<50k
4,Private,9th,Married-civ-spouse,Craft-repair,Husband,White,False,False,False,37.0,203827.999672,5.0,<50k
5,Local-gov,HS-grad,Separated,Adm-clerical,Unmarried,Black,False,False,False,47.0,543161.994897,9.0,<50k
6,Private,Some-college,Never-married,Adm-clerical,Own-child,White,False,False,False,25.0,197727.999871,10.0,<50k
7,Private,Some-college,Married-civ-spouse,Machine-op-inspct,Husband,White,False,False,False,25.0,570820.989423,10.0,<50k
8,Private,Some-college,Married-civ-spouse,Sales,Husband,White,False,False,False,41.0,118853.001011,10.0,<50k
9,Private,Some-college,Never-married,Handlers-cleaners,Not-in-family,Black,False,False,False,34.0,591710.990432,10.0,<50k


In [10]:
type(dls.train_ds)

fastai2.tabular.core.TabularPandas

In [11]:
dls.train_ds.classes

{'workclass': (#10) ['#na#',' ?',' Federal-gov',' Local-gov',' Never-worked',' Private',' Self-emp-inc',' Self-emp-not-inc',' State-gov',' Without-pay'],
 'education': (#17) ['#na#',' 10th',' 11th',' 12th',' 1st-4th',' 5th-6th',' 7th-8th',' 9th',' Assoc-acdm',' Assoc-voc'...],
 'marital-status': (#8) ['#na#',' Divorced',' Married-AF-spouse',' Married-civ-spouse',' Married-spouse-absent',' Never-married',' Separated',' Widowed'],
 'occupation': (#16) ['#na#',' ?',' Adm-clerical',' Armed-Forces',' Craft-repair',' Exec-managerial',' Farming-fishing',' Handlers-cleaners',' Machine-op-inspct',' Other-service'...],
 'relationship': (#7) ['#na#',' Husband',' Not-in-family',' Other-relative',' Own-child',' Unmarried',' Wife'],
 'race': (#6) ['#na#',' Amer-Indian-Eskimo',' Asian-Pac-Islander',' Black',' Other',' White'],
 'age_na': (#2) ['#na#',False],
 'fnlwgt_na': (#2) ['#na#',False],
 'education-num_na': (#3) ['#na#',False,True]}

In [12]:
learn = tabular_learner(dls, [200,100], metrics=accuracy)

In [13]:
learn.fit_one_cycle(1)

epoch,train_loss,valid_loss,accuracy,time
0,0.357797,0.356309,0.833077,00:11


In [14]:
learn.show_results()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,age_na,fnlwgt_na,education-num_na,age,fnlwgt,education-num,salary,salary_pred
0,2.0,16.0,3.0,2.0,1.0,5.0,1.0,1.0,1.0,1.566953,-0.662321,-0.031748,1.0,1.0
1,5.0,13.0,3.0,11.0,6.0,5.0,1.0,1.0,1.0,0.834567,0.291341,1.528647,0.0,1.0
2,5.0,3.0,5.0,2.0,4.0,5.0,1.0,1.0,1.0,-1.582307,1.353004,-0.811946,0.0,0.0
3,5.0,11.0,5.0,11.0,2.0,5.0,1.0,1.0,1.0,0.468374,-0.384724,2.308845,1.0,1.0
4,3.0,10.0,1.0,2.0,5.0,5.0,1.0,1.0,1.0,0.028942,-1.256372,1.138548,0.0,0.0
5,7.0,12.0,5.0,4.0,2.0,5.0,1.0,1.0,1.0,-0.410489,0.320155,-0.421847,0.0,0.0
6,5.0,9.0,3.0,5.0,1.0,5.0,1.0,1.0,1.0,-0.630205,-0.162849,0.358351,0.0,1.0
7,5.0,3.0,6.0,9.0,2.0,5.0,1.0,1.0,1.0,-0.264012,-0.782944,-0.811946,0.0,0.0
8,5.0,13.0,7.0,11.0,5.0,5.0,1.0,1.0,1.0,3.104964,0.114091,1.528647,0.0,0.0


In [15]:
learn.predict(df.iloc[0])

(   workclass  education  marital-status  occupation  relationship  race  \
 0        5.0        8.0             3.0         0.0           6.0   5.0   
 
    age_na  fnlwgt_na  education-num_na       age    fnlwgt  education-num  \
 0     1.0        1.0               1.0  0.761329 -0.838534        0.74845   
 
    salary  
 0     1.0  ,
 tensor(1),
 tensor([0.4309, 0.5691]))

In [None]:
#TODO: Fix prod for tabular
#learn.export()
#learn1 = torch.load('export.pkl')
#learn1.predict(df.iloc[0])

## Export -

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_torch_core.ipynb.
Converted 01_layers.ipynb.
Converted 02_data.load.ipynb.
Converted 03_data.core.ipynb.
Converted 04_data.external.ipynb.
Converted 05_data.transforms.ipynb.
Converted 06_data.block.ipynb.
Converted 07_vision.core.ipynb.
Converted 08_vision.data.ipynb.
Converted 09_vision.augment.ipynb.
Converted 09b_vision.utils.ipynb.
Converted 09c_vision.widgets.ipynb.
Converted 10_tutorial.pets.ipynb.
Converted 11_vision.models.xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_learner.ipynb.
Converted 13a_metrics.ipynb.
Converted 14_callback.schedule.ipynb.
Converted 14a_callback.data.ipynb.
Converted 15_callback.hook.ipynb.
Converted 15a_vision.models.unet.ipynb.
Converted 16_callback.progress.ipynb.
Converted 17_callback.tracker.ipynb.
Converted 18_callback.fp16.ipynb.
Converted 19_callback.mixup.ipynb.
Converted 20_interpret.ipynb.
Converted 20a_distributed.ipynb.
Converted 21_vision.learner.ipynb.
Converted 22_tutorial.imagenette.ipynb.
Converted 23_tutoria