# Tabular models

In [1]:
from fastai import *
from fastai.tabular import *

Tabular data should be in a Pandas `DataFrame`.

In [2]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [3]:
dep_var = '>=50k'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [FillMissing, Categorify, Normalize]

In [4]:
test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)

In [5]:
data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(list(range(800,1000)))
                           .label_from_df(cols=dep_var)
                           .add_test(test, label=0)
                           .databunch())

In [17]:
data.show_batch(rows=10)

workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,target
Private,HS-grad,Never-married,Handlers-cleaners,Other-relative,White,False,-0.9959,2.5897,-0.4224,0
Private,HS-grad,Married-civ-spouse,Sales,Husband,White,False,0.5434,-0.5386,-0.4224,1
Private,HS-grad,Married-civ-spouse,Craft-repair,Husband,White,False,1.0564,-1.5862,-0.4224,0
Private,HS-grad,Married-civ-spouse,Craft-repair,Husband,White,False,0.6166,0.1419,-0.4224,0
Self-emp-not-inc,Assoc-voc,Married-civ-spouse,Farming-fishing,Husband,White,False,1.7894,-0.6830,0.3599,0
Private,Some-college,Married-civ-spouse,Machine-op-inspct,Husband,White,False,0.0303,-0.0388,-0.0312,0
?,Assoc-voc,Divorced,?,Own-child,White,False,-0.1896,-0.0239,0.3599,0
Private,HS-grad,Divorced,Farming-fishing,Not-in-family,White,False,-0.2629,-0.7507,-0.4224,0
?,Some-college,Married-civ-spouse,?,Husband,White,False,2.1559,0.6215,-0.0312,1
Private,Some-college,Never-married,Machine-op-inspct,Own-child,White,False,-1.1425,0.3341,-0.0312,0


In [23]:
??tabular_learner

In [18]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)

In [22]:
learn.fit(2, 1e-3)

epoch,train_loss,valid_loss,accuracy
1,0.347746,0.371575,0.840000
2,0.351751,0.368382,0.845000
,,,


## Inference

In [27]:
rows = df.iloc[0]

In [28]:
learn.predict(rows)

(Category 1, tensor(1), tensor([0.4341, 0.5659]))

In [30]:
learn.get_preds()

[tensor([[0.9452, 0.0548],
         [0.1520, 0.8480],
         [0.9422, 0.0578],
         [0.9846, 0.0154],
         [0.5822, 0.4178],
         [0.5528, 0.4472],
         [0.7397, 0.2603],
         [0.9926, 0.0074],
         [0.9875, 0.0125],
         [0.5860, 0.4140],
         [0.3432, 0.6568],
         [0.2640, 0.7360],
         [0.8563, 0.1437],
         [0.8241, 0.1759],
         [0.9492, 0.0508],
         [0.7819, 0.2181],
         [0.6698, 0.3302],
         [0.3416, 0.6584],
         [0.6321, 0.3679],
         [0.6923, 0.3077],
         [0.9988, 0.0012],
         [0.9580, 0.0420],
         [0.9999, 0.0001],
         [0.6612, 0.3388],
         [0.8173, 0.1827],
         [0.6730, 0.3270],
         [0.9361, 0.0639],
         [0.5842, 0.4158],
         [0.7359, 0.2641],
         [0.8275, 0.1725],
         [1.0000, 0.0000],
         [0.9507, 0.0493],
         [0.7105, 0.2895],
         [0.8766, 0.1234],
         [0.9866, 0.0134],
         [0.9830, 0.0170],
         [0.4238, 0.5762],
 