In [8]:
from fastai.tabular import *
from pathlib import Path
import pandas as pd
from fastai import *
from fastai.tabular import *
from fastai.callbacks import *
from fastai.text import *
from fastai.metrics import accuracy


In [2]:
## Creating a tab+ NLP model 

load previous models 


# NLP model 

In [None]:
learn_nlp = load_learner('.', 'final_nlp.pkl')

## Load tabular learner 

In [54]:
learn_tab = load_learner('.','final_tab.pkl')

In [55]:
learn_tab

Learner(data=TabularDataBunch;

Train: LabelList (0 items)
x: TabularList

y: CategoryList

Path: .;

Valid: LabelList (0 items)
x: TabularList

y: CategoryList

Path: .;

Test: None, model=TabularModel(
  (embeds): ModuleList(
    (0): Embedding(3, 3)
    (1): Embedding(1997, 113)
    (2): Embedding(279, 37)
    (3): Embedding(56, 15)
    (4): Embedding(13, 7)
    (5): Embedding(2, 2)
  )
  (emb_drop): Dropout(p=0.05, inplace=False)
  (bn_cont): BatchNorm1d(0, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): Linear(in_features=177, out_features=446, bias=True)
    (1): ReLU(inplace=True)
    (2): BatchNorm1d(446, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=446, out_features=24, bias=True)
  )
), opt_func=functools.partial(<class 'torch.optim.adam.Adam'>, betas=(0.9, 0.99)), loss_func=LabelSmoothingCrossEntropy(), metrics=[<function accuracy at 0x7fcd30

# Grab up to ReLU

In [59]:
learn_tab.model.layers = learn_tab.model.layers[:-3]
learn_tab.model

TabularModel(
  (embeds): ModuleList(
    (0): Embedding(3, 3)
    (1): Embedding(1997, 113)
    (2): Embedding(279, 37)
    (3): Embedding(56, 15)
    (4): Embedding(13, 7)
    (5): Embedding(2, 2)
  )
  (emb_drop): Dropout(p=0.05, inplace=False)
  (bn_cont): BatchNorm1d(0, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential()
)

# Concat dataset

In [4]:
class ConcatDataset(Dataset):
    def __init__(self, x1, x2, y): 
        self.x1,self.x2,self.y = x1,x2,y
    def __len__(self): 
        return len(self.y)
    def __getitem__(self, i): 
        return (self.x1[i], self.x2[i]), self.y[i]


# Create ConcatDatasets for tab and NLP 

In [None]:
train_ds = ConcatDataset(data_tab.train_ds.x, data_nlp.train_ds.x, data_tab.train_ds.y)
valid_ds = ConcatDataset(data_tab.valid_ds.x, data_nlp.valid_ds.x, data_tab.valid_ds.y)

In [None]:
# 

In [5]:
def collate(batch):    
    x,y = list(zip(*batch))
    x1,x2 = list(zip(*x))
    x1 = to_data(x1)
    x1 = list(zip(*x1))
    x1 = torch.stack(x1[0]), torch.stack(x1[1])
    x2, y = pad_collate(list(zip(x2, y)), pad_idx=1, pad_first=True)
    return (x1, x2), y

In [None]:
bs = bs

#Go through the text data by order of length with a bit of randomness.
train_sampler = SortishSampler(data_nlp.train_ds.x, key=lambda t: len(data_nlp.train_ds[t][0].data), bs=bs//2)


# Go through the text data by order of length.
valid_sampler = SortSampler(data_nlp.valid_ds.x, key=lambda t: len(data_nlp.valid_ds[t][0].data))

In [None]:
train_dl = DataLoader(train_ds, bs//2, sampler=train_sampler)
valid_dl = DataLoader(valid_ds, bs, sampler=valid_sampler)
data = DataBunch(train_dl, valid_dl, device=defaults.device, collate_fn=my_collate, path='.')

In [6]:
class ConcatModel(nn.Module):
    def __init__(self, model_tab, model_nlp, layers, drops): 
        super().__init__()
        self.model_tab = model_tab
        self.model_nlp = model_nlp
        lst_layers = []
        activs = [nn.ReLU(inplace=True),] * (len(layers)-2) + [None]
        for n_in,n_out,p,actn in zip(layers[:-1], layers[1:], drops, activs): 
            lst_layers += bn_drop_lin(n_in, n_out, p=p, actn=actn) # https://docs.fast.ai/layers.html#bn_drop_lin
        self.layers = nn.Sequential(*lst_layers)

    def forward(self, *x):
        x_tab = self.model_tab(*x[0])
        x_nlp = self.model_nlp(x[1])[0]
        x = torch.cat([x_tab, x_nlp], dim=1)
        return self.layers(x)   

## Layer groups 

Layer groups allow us to use the fastai freeze layers approach to train some layers more or turn off training on some layers. 

```flatten_model``` allows you to acccess the model as an array making it easy to grab the bits you want

In [34]:
m = simple_cnn([3,6,12])
m

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU(inplace=True)
  )
  (1): Sequential(
    (0): Conv2d(6, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU(inplace=True)
  )
  (2): Sequential(
    (0): AdaptiveAvgPool2d(output_size=1)
    (1): Flatten()
  )
)

In [35]:
m[0]

Sequential(
  (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (1): ReLU(inplace=True)
)

In [36]:
print(flatten_model(m)[0])
print('-*-')
print(flatten_model(m)[1])


Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
-*-
ReLU(inplace=True)


In [32]:

layer_groups = [nn.Sequential(*flatten_model(learn_nlp.layer_groups[0])), #https://docs.fast.ai/torch_core.html#%3Clambda%3E
                nn.Sequential(*flatten_model(learn_nlp.layer_groups[1])),
                nn.Sequential(*flatten_model(learn_nlp.layer_groups[2])),
                nn.Sequential(*flatten_model(learn_nlp.layer_groups[3])),
                nn.Sequential(*(flatten_model(learn_nlp.layer_groups[4]) + 
                                flatten_model(model.mod_tab) +
                                flatten_model(model.layers)))] 


NameError: name 'learn_nlp' is not defined

In [None]:
loss_func = LabelSmoothingCrossEntropy()

In [None]:
learn = Learner(data, model, loss_func=loss_func, metrics=accuracy, layer_groups=layer_groups)