In [1]:
## Importing required functions
import torch
from torch import nn
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

## Generate dataset with linear property
X, y, coef = make_regression(
    n_samples=1500,
    n_features=4,  ## Using four features
    n_informative=4,
    noise=0.3,
    coef=True,
    random_state=0,
    bias=2)


## Making a train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

## Converting to Dataframe
df = pd.DataFrame(np.concatenate([X_train, X_test]))
df['target'] = pd.Series(np.concatenate([y_train, y_test]))
df['isTest'] = True
df.loc[:X_train.shape[0],'isTest']=False

df.head()

Unnamed: 0,0,1,2,3,target,isTest
0,0.651781,0.338225,0.001471,-0.766705,50.822657,False
1,0.962342,0.45731,0.770837,0.243168,149.871146,False
2,-1.144693,1.252002,-0.091267,-0.401571,-26.860053,False
3,0.487973,0.831351,-0.919651,2.642936,15.77322,False
4,2.069475,-0.446991,-0.939424,-1.052124,23.428575,False


In [2]:
from fastai.tabular.all import *
from nbdev import show_doc

In [7]:
show_doc(TabularPandas)

---

### TabularPandas

>      TabularPandas (df, procs=None, cat_names=None, cont_names=None,
>                     y_names=None, y_block=None, splits=None, do_setup=True,
>                     device=None, inplace=False, reduce_memory=True)

A `Tabular` object with transforms

In [17]:
dblck = TabularPandas(
    df = df,
    cont_names=[0,1,2,3],
    procs = None,
    y_names="target",
    splits=ColSplitter('isTest')(df)
)
dls = dblck.dataloaders()

In [41]:
print(dls.train_ds.xs.shape, dls.train_ds.targ.shape, dls.valid_ds.xs.shape, dls.valid_ds.targ.shape)

(1006, 4) (1006, 1) (494, 4) (494, 1)


In [64]:
class Linear(nn.Module):

    def __init__(self, n_in, n_out):
        super().__init__()
        self.linear = nn.Linear(n_in, n_out)

    def forward(self, x_cont, x_cat=None):
        return self.linear(x_cont)

## Initializing model
model = Linear(dls.xs.shape[1], 1)

In [77]:
show_doc(TabularModel)

---

### TabularModel

>      TabularModel (emb_szs:list, n_cont:int, out_sz:int, layers:list,
>                    ps:float|list=None, embed_p:float=0.0, y_range=None,
>                    use_bn:bool=True, bn_final:bool=False, bn_cont:bool=True,
>                    act_cls=ReLU(inplace=True), lin_first:bool=True)

Basic model for tabular data.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| emb_szs | list |  | Sequence of (num_embeddings, embedding_dim) for each categorical variable |
| n_cont | int |  | Number of continuous variables |
| out_sz | int |  | Number of outputs for final `LinBnDrop` layer |
| layers | list |  | Sequence of ints used to specify the input and output size of each `LinBnDrop` layer |
| ps | float \| list | None | Sequence of dropout probabilities for `LinBnDrop` |
| embed_p | float | 0.0 | Dropout probability for `Embedding` layer |
| y_range | NoneType | None | Low and high for `SigmoidRange` activation |
| use_bn | bool | True | Use `BatchNorm1d` in `LinBnDrop` layers |
| bn_final | bool | False | Use `BatchNorm1d` on final layer |
| bn_cont | bool | True | Use `BatchNorm1d` on continuous variables |
| act_cls | ReLU | ReLU(inplace=True) | Activation type for `LinBnDrop` layers |
| lin_first | bool | True | Linear layer is first or last in `LinBnDrop` layers |

In [157]:
learn = tabular_learner(dls, layers=[], config = {"use_bn":False, "bn_cont":False, "act_cls":None})

In [160]:
learn.model, learn.loss_func

(TabularModel(
   (embeds): ModuleList()
   (emb_drop): Dropout(p=0.0, inplace=False)
   (layers): Sequential(
     (0): LinBnDrop(
       (0): Linear(in_features=4, out_features=1, bias=True)
     )
   )
 ),
 FlattenedLoss of MSELoss())

In [159]:
learn.fit(10, lr=1e-3)

epoch,train_loss,valid_loss,time
0,13188.729492,12490.383789,00:00
1,13109.12207,12484.867188,00:00
2,13082.974609,12479.466797,00:00
3,13133.792969,12473.90625,00:00
4,13210.063477,12468.494141,00:00
5,13174.821289,12462.963867,00:00
6,13106.573242,12457.494141,00:00
7,13045.175781,12452.149414,00:00
8,13076.649414,12446.740234,00:00
9,13065.931641,12441.267578,00:00


In [162]:
coef

array([63.00614902, 44.14519607, 84.36475203,  9.33783172])

In [161]:
learn.model.state_dict()

OrderedDict([('layers.0.0.weight',
              tensor([[ 0.3929,  0.2314, -0.0646, -0.1009]], device='cuda:0')),
             ('layers.0.0.bias', tensor([-0.0458], device='cuda:0'))])