<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Deep-Learning-for-Tabular-Dataset" data-toc-modified-id="Deep-Learning-for-Tabular-Dataset-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Deep Learning for Tabular Dataset</a></span><ul class="toc-item"><li><span><a href="#Adult" data-toc-modified-id="Adult-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Adult</a></span></li></ul></li></ul></div>

In [1]:
import pandas as pd
from fastai.tabular import untar_data, URLs

# Deep Learning for Tabular Dataset

## Adult

In [2]:
path = untar_data(URLs.ADULT_SAMPLE)
path

PosixPath('/Users/mingyuliu/.fastai/data/adult_sample')

In [3]:
df = pd.read_csv(path/'adult.csv')
print('dimension:', df.shape)
df.head()

dimension: (32561, 15)


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
0,49,Private,101320,Assoc-acdm,12.0,Married-civ-spouse,,Wife,White,Female,0,1902,40,United-States,>=50k
1,44,Private,236746,Masters,14.0,Divorced,Exec-managerial,Not-in-family,White,Male,10520,0,45,United-States,>=50k
2,38,Private,96185,HS-grad,,Divorced,,Unmarried,Black,Female,0,0,32,United-States,<50k
3,38,Self-emp-inc,112847,Prof-school,15.0,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,40,United-States,>=50k
4,42,Self-emp-not-inc,82297,7th-8th,,Married-civ-spouse,Other-service,Wife,Black,Female,0,0,50,United-States,<50k


In [4]:
from fastai.tabular import FillMissing, Categorify, Normalize, TabularDataBunch

procs = [FillMissing, Categorify, Normalize]
valid_idx = range(len(df)-2000, len(df))

dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation',
             'relationship', 'race', 'sex', 'native-country']

data = TabularDataBunch.from_df(path, df, dep_var, valid_idx=valid_idx, procs=procs, cat_names=cat_names)
print(data.train_ds.cont_names)

['age', 'education-num', 'capital-gain', 'fnlwgt', 'hours-per-week', 'capital-loss']


In [5]:
(cat_x,cont_x),y = next(iter(data.train_dl))
for o in (cat_x, cont_x, y):
    print(o[:5])

tensor([[ 7, 12,  5,  4,  4,  5,  2, 40,  1],
        [ 5, 12,  5,  6,  4,  5,  2, 40,  1],
        [ 7, 10,  3,  5,  1,  5,  2, 40,  1],
        [ 5, 10,  3,  5,  1,  5,  2, 40,  1],
        [ 5, 10,  5,  9,  2,  5,  2, 27,  1]])
tensor([[-1.4370, -0.4216, -0.1459, -1.4554, -0.0358, -0.2168],
        [-1.4370, -0.4216, -0.1459,  0.4468, -0.0358, -0.2168],
        [ 1.3456,  1.1457,  0.4432,  0.9636, -2.2172, -0.2168],
        [ 0.4669,  1.1457, -0.1459, -0.2055, -0.6821, -0.2168],
        [-0.7780,  1.1457, -0.1459,  2.3844,  0.2066, -0.2168]])
tensor([0, 0, 1, 0, 0])


In [6]:
from fastai.tabular import tabular_learner, accuracy

learn = tabular_learner(data, layers=[200,100], emb_szs={'native-country': 10}, metrics=accuracy)
learn.fit_one_cycle(3, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.350215,0.334032,0.844,00:04
1,0.332847,0.321462,0.845,00:04
2,0.312094,0.315243,0.8455,00:03


In [7]:
learn.predict(df.iloc[0])

(Category >=50k, tensor(1), tensor([0.3201, 0.6799]))

In [24]:
import os

# syntax to save and load the model for inference
model_dir = os.getcwd()
model_checkpoint = os.path.join(model_dir, 'export.pkl')
learn.export(model_checkpoint)

In [25]:
from fastai.basic_train import load_learner

learn2 = load_learner(model_dir, 'export.pkl')

In [26]:
learn2.predict(df.iloc[0])

(Category >=50k, tensor(1), tensor([0.3201, 0.6799]))

https://docs.fast.ai/tutorial.inference.html