In [None]:
#|default_exp models.TabModel

# TabModel

This is an implementation created by Ignacio Oguiza (oguiza@timeseriesAI.co) based on fastai's TabularModel. 

We built it so that it's easy to change the head of the model, something that is particularly interesting when building hybrid models.

In [None]:
#|export
from tsai.imports import *
# from tsai.data.tabular import *
from tsai.models.layers import *
from tsai.models.utils import *

In [None]:
#|export
class TabModel(Sequential): # Sequential accepts multiple inputs
    "Basic model for tabular data."
    def __init__(self, emb_szs, n_cont, c_out, layers=None, fc_dropout=None, embed_p=0., y_range=None, use_bn=True, bn_final=False, bn_cont=True,
                 lin_first=False, act=nn.ReLU(inplace=True), skip=False):

        # Backbone
        backbone = TabBackbone(emb_szs, n_cont, embed_p=embed_p, bn_cont=bn_cont)
        
        # Head
        head = TabHead(emb_szs, n_cont, c_out, layers=layers, fc_dropout=fc_dropout, y_range=y_range, use_bn=use_bn, bn_final=bn_final, lin_first=lin_first, 
                 act=act, skip=skip)

        super().__init__(OrderedDict([('backbone', backbone), ('head', head)]))


class TabBackbone(Module):
    def __init__(self, emb_szs, n_cont, embed_p=0., bn_cont=True):
        self.embeds = nn.ModuleList([Embedding(ni, nf) for ni,nf in emb_szs])
        self.emb_drop = nn.Dropout(embed_p)
        self.bn_cont = nn.BatchNorm1d(n_cont) if bn_cont else None
        n_emb = sum(e.embedding_dim for e in self.embeds)
        self.n_emb,self.n_cont = n_emb,n_cont

    def forward(self, x_cat, x_cont=None):
        if self.n_emb != 0:
            x = [e(x_cat[:,i]) for i,e in enumerate(self.embeds)]
            x = torch.cat(x, 1)
            x = self.emb_drop(x)
        if self.n_cont != 0:
            if self.bn_cont is not None: x_cont = self.bn_cont(x_cont)
            x = torch.cat([x, x_cont], 1) if self.n_emb != 0 else x_cont
        return x


class TabHead(Module):
    "Basic head for tabular data."
    def __init__(self, emb_szs, n_cont, c_out, layers=None, fc_dropout=None, y_range=None, use_bn=True, bn_final=False, lin_first=False, 
                 act=nn.ReLU(inplace=True), skip=False):
        
        # Head
        if layers is None: layers = [200,100]
        ps = ifnone(fc_dropout, [0]*len(layers))
        if not is_listy(ps): ps = [ps]*len(layers)
        n_emb = np.sum([emb_dim for _, emb_dim in emb_szs]).astype(int)
        sizes = [n_emb + n_cont] + layers + [c_out]
        actns = [act for _ in range(len(sizes)-2)] + [None]
        _layers = [LinBnDrop(sizes[i], sizes[i+1], bn=use_bn and (i!=len(actns)-1 or bn_final), p=p, act=a) for i,(p,a) in enumerate(zip(ps+[0.],actns))]
        if y_range is not None: _layers.append(SigmoidRange(*y_range))
        self.head = nn.Sequential(*_layers)
        self.head_nf = layers[-1]
        self.shortcut = nn.Linear(n_emb + n_cont, c_out) if skip else None

    def forward(self, x):
        if self.shortcut is not None: res = x
        x = self.head(x)
        if self.shortcut is not None: x = x + self.shortcut(res)
        return x

In [None]:
from fastai.tabular.core import *
from tsai.data.tabular import *

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
# df['salary'] = np.random.rand(len(df)) # uncomment to simulate a cont dependent variable
procs = [Categorify, FillMissing, Normalize]
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
y_names = ['salary']
y_block = RegressionBlock() if isinstance(df['salary'].values[0], float) else CategoryBlock()
splits = RandomSplitter()(range_of(df))
pd.options.mode.chained_assignment=None
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names, y_names=y_names, y_block=y_block, splits=splits, inplace=True, 
                   reduce_memory=False)
to.show(5)
tab_dls = to.dataloaders(bs=16, val_bs=32)
b = first(tab_dls.train)
test_eq((b[0].shape, b[1].shape, b[2].shape), (torch.Size([16, 7]), torch.Size([16, 3]), torch.Size([16, 1])))

Unnamed: 0,workclass,education,marital-status,occupation,relationship,race,education-num_na,age,fnlwgt,education-num,salary
20505,Private,HS-grad,Married-civ-spouse,Sales,Husband,White,False,47.0,197836.0,9.0,<50k
28679,Private,HS-grad,Married-civ-spouse,Craft-repair,Husband,White,False,28.0,65078.0,9.0,>=50k
11669,Private,HS-grad,Never-married,Adm-clerical,Not-in-family,White,False,38.0,202683.0,9.0,<50k
29079,Self-emp-not-inc,Bachelors,Married-civ-spouse,Prof-specialty,Husband,White,False,41.0,168098.0,13.0,<50k
7061,Private,HS-grad,Married-civ-spouse,Adm-clerical,Husband,White,False,31.0,243442.0,9.0,<50k


In [None]:
tab_model = build_tabular_model(TabModel, dls=tab_dls)
b = first(tab_dls.train)
test_eq(tab_model.to(b[0].device)(*b[:-1]).shape, (tab_dls.bs, tab_dls.c))
learn = Learner(tab_dls, tab_model, splitter=ts_splitter)
p1 = count_parameters(learn.model)
learn.freeze()
p2 = count_parameters(learn.model)
learn.unfreeze()
p3 = count_parameters(learn.model)
assert p1 == p3
assert p1 > p2 > 0

In [None]:
#|eval: false
#|hide
from tsai.export import get_nb_name; nb_name = get_nb_name(locals())
from tsai.imports import create_scripts; create_scripts(nb_name)

<IPython.core.display.Javascript object>

/Users/nacho/notebooks/tsai/nbs/120_models.TabModel.ipynb saved at 2022-11-09 13:12:57
Correct notebook to script conversion! 😃
Wednesday 09/11/22 13:12:59 CET
