In [11]:
from fastai.tabular.all import *

path = untar_data(URLs.ADULT_SAMPLE)
path

Path('/Users/mohitsakhuja/.fastai/data/adult_sample')

In [12]:
dls = TabularDataLoaders.from_csv(
    path / "adult.csv",
    path=path,
    y_names="salary",
    cat_names=[
        "workclass",
        "education",
        "marital-status",
        "occupation",
        "relationship",
        "race",
    ],
    cont_names=["age", "fnlwgt", "education-num"],
    procs=[Categorify, FillMissing, Normalize],
    seed=42,
    bs=512,
)

dls.show_batch(max_n=6)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  to[n].fillna(self.na_dict[n], inplace=True)


Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary
0,Self-emp-not-inc,HS-grad,Never-married,Farming-fishing,Not-in-family,White,False,40.0,34036.99539,9.0,<50k
1,Private,7th-8th,Married-civ-spouse,Other-service,Husband,Black,False,55.999999,104944.999084,4.0,<50k
2,Private,HS-grad,Never-married,Machine-op-inspct,Not-in-family,White,False,18.0,155751.999251,9.0,<50k
3,Self-emp-not-inc,HS-grad,Married-civ-spouse,Sales,Husband,White,False,37.0,203827.999345,9.0,<50k
4,Private,Some-college,Never-married,Sales,Own-child,White,False,21.0,188923.000004,10.0,<50k
5,Private,Bachelors,Married-civ-spouse,Prof-specialty,Husband,White,False,38.0,165471.999483,13.0,>=50k
6,Private,HS-grad,Divorced,#na#,Not-in-family,White,True,47.0,47496.000933,10.0,<50k
7,Private,11th,Never-married,Sales,Own-child,White,False,17.0,368700.001722,7.0,<50k
8,Private,HS-grad,Divorced,Exec-managerial,Not-in-family,White,False,46.0,159869.00142,9.0,<50k
9,Private,Some-college,Never-married,Sales,Not-in-family,White,False,26.0,135844.999656,10.0,<50k


In [13]:
# Check and set device type as per architecture
import torch

device_type = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)
device = torch.device(device_type)

device_type

'mps'

In [14]:
learner = tabular_learner(
    dls,
    metrics=accuracy,
)

learner.model.to(device)

learner.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.606004,0.52586,0.794226,00:01
1,0.495381,0.421484,0.820792,00:00
2,0.429912,0.401857,0.824171,00:00
3,0.399717,0.391583,0.829699,00:00


In [15]:
learner.show_results()

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary,salary_pred
0,7.0,12.0,3.0,15.0,1.0,5.0,1.0,0.6942,0.184014,-0.419243,0.0,0.0
1,5.0,13.0,1.0,5.0,2.0,5.0,1.0,0.6942,-1.557821,1.53636,1.0,0.0
2,5.0,15.0,5.0,11.0,2.0,2.0,1.0,-0.260909,-0.86731,1.92748,1.0,1.0
3,1.0,6.0,5.0,1.0,2.0,5.0,1.0,-0.995608,0.276967,-2.374847,0.0,0.0
4,5.0,1.0,5.0,15.0,2.0,5.0,1.0,-1.509897,1.756529,-1.592605,0.0,0.0
5,5.0,10.0,5.0,5.0,2.0,5.0,1.0,-0.481318,-0.52992,1.145239,1.0,0.0
6,5.0,1.0,5.0,9.0,4.0,5.0,1.0,-1.583367,-1.297045,-1.592605,0.0,0.0
7,5.0,16.0,3.0,15.0,1.0,5.0,1.0,-0.628258,-1.23513,-0.028123,0.0,0.0
8,3.0,12.0,1.0,2.0,2.0,5.0,1.0,2.677888,-1.503511,-0.419243,0.0,0.0
