# Train a model to predict a high income earner based on socioeconomic background

In [1]:
from fastai.tabular.all import *
# Uses the Adult dataset from the paper "Scaling up the accuracy of Naive-Bayes Classifiers
# initialize the data for the model using the data loader
path = untar_data(URLs.ADULT_SAMPLE)

dls = TabularDataLoaders.from_csv(
    path/'adult.csv',
    path=path,
    y_names="salary",
    # categorical values (values that are of discrete set of choices such as occupation)
    cat_names=['workclass','education','marital-status','occupation','relationship', 'race'],
    # continuous values (values that represent a changing quantity such as age)
    cont_names=['age', 'fnlwgt', 'education-num'],
    procs=[Categorify, FillMissing, Normalize]
)

In [2]:
# initialize the model with the data loaded in the data loader

learn = tabular_learner(dls,metrics=accuracy)

In [3]:
# There is no pretrained model available for this task (in general, pretrained models are not widely
# available for any tabular modeling tasks, so we do not use fine_tune
learn.fit_one_cycle(3)
learn.show_results()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary,salary_pred
0,5.0,8.0,1.0,13.0,5.0,5.0,1.0,-0.554441,1.267947,0.753639,0.0,0.0
1,5.0,16.0,3.0,4.0,1.0,1.0,1.0,0.616247,-0.497615,-0.027918,0.0,0.0
2,2.0,2.0,5.0,2.0,2.0,3.0,1.0,-1.578794,-0.85375,-1.200254,0.0,0.0
3,5.0,12.0,5.0,15.0,5.0,5.0,1.0,-0.700777,0.95348,-0.418697,0.0,0.0
4,3.0,16.0,5.0,11.0,2.0,5.0,1.0,1.128424,1.667044,-0.027918,0.0,0.0
5,5.0,16.0,5.0,2.0,4.0,5.0,1.0,-1.139786,1.078305,-0.027918,0.0,0.0
6,5.0,6.0,7.0,9.0,2.0,5.0,1.0,2.445449,0.752601,-2.372589,0.0,0.0
7,5.0,16.0,5.0,4.0,2.0,5.0,1.0,-1.212954,0.194588,-0.027918,0.0,0.0
8,5.0,12.0,3.0,4.0,1.0,5.0,1.0,-0.188601,-0.259278,-0.418697,0.0,0.0


None