This file shows how the torch_model works. We go through all the functions in torch_model file so that understand what is going on. And we could make some adjustment in the future while the actual scene.

#### Prepare the data.
We use the prepare data function in data_utils file, which is going through step by step in "pytorch_example_data_utils_sbs".

In [13]:
from __future__ import print_function
import numpy as np
import pandas as pd
from utils.data_utils import prepare_data

In [17]:
df = pd.read_csv('data/adult.csv')
df['income_label'] = (df["income"].apply(lambda x: ">50K" in x)).astype(int)

wide_cols = ['age','hours-per-week','education', 'relationship','workclass',
             'occupation','native-country','gender']
crossed_cols = (['education', 'occupation'], ['native-country', 'occupation'])
embeddings_cols = [('education',10), ('relationship',8), ('workclass',10),
                   ('occupation',10),('native-country',10)]
continuous_cols = ["age","hours-per-week"]
target = 'income_label'
method = 'logistic'

wd_dataset = prepare_data(df, wide_cols,crossed_cols,embeddings_cols,continuous_cols,target,scale=True)

In [18]:
wd_dataset

{'train_dataset': train_dataset(wide=array([[23, 40,  0, ...,  0,  0,  0],
        [23, 40,  0, ...,  0,  0,  0],
        [42, 50,  0, ...,  0,  0,  0],
        ...,
        [26, 10,  0, ...,  0,  0,  0],
        [42, 66,  0, ...,  0,  0,  0],
        [32, 15,  0, ...,  0,  0,  0]]), deep=array([[ 3.        ,  5.        ,  0.        , ...,  0.        ,
         -1.14100392, -0.03408696],
        [11.        ,  0.        ,  0.        , ...,  0.        ,
         -1.14100392, -0.03408696],
        [ 9.        ,  1.        ,  0.        , ..., 27.        ,
          0.24480847,  0.77292975],
        ...,
        [ 7.        ,  2.        ,  0.        , ...,  3.        ,
         -0.92219144, -2.45513709],
        [ 3.        ,  1.        ,  3.        , ...,  0.        ,
          0.24480847,  2.06415648],
        [ 3.        ,  0.        ,  0.        , ...,  0.        ,
         -0.48456647, -2.05162874]]), labels=array([0, 0, 1, ..., 0, 0, 0])),
 'test_dataset': test_dataset(wide=array([[2

#### Build the Wide part

In [19]:
import torch.nn as nn
import torch.nn.functional as F

wide_dim = wd_dataset['train_dataset'].wide.shape[1]
n_class  = 1
wide_part = nn.Linear(wide_dim, n_class)

print(wide_part)

Linear(in_features=798, out_features=1, bias=True)


In [43]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.autograd import Variable
from torch.utils.data import DataLoader

class Wide(nn.Module):
    """
    Wide-side consists in simply in "pluging" the features into the output neuron(s)

    Parameters:
    ----------
    wide_dim: int. Number of features per observation
    method  : str. Regression, logistic or multiclass
    n_class : int. number of classes. Defaults to 1 if logistic or regression
    """
    def __init__(self, wide_dim, n_class):

        super(Wide, self).__init__()
        self.wide_dim = wide_dim
        self.n_class = n_class

        self.linear = nn.Linear(self.wide_dim, self.n_class)

    def forward(self,X):

        out = torch.sigmoid(self.linear(X))

        return out

In [23]:
wide_dim = wd_dataset['train_dataset'].wide.shape[1]
n_class  = 1
wide_model = Wide(wide_dim, n_class)
print(wide_model)

Wide(
  (linear): Linear(in_features=798, out_features=1, bias=True)
)


In [24]:
wd_dataset['train_dataset'].labels.reshape(-1, 1).shape

(34189, 1)

In [25]:
train_dataset = np.hstack([wd_dataset['train_dataset'].labels.reshape(-1, 1), wd_dataset['train_dataset'].wide])
train_dataset

array([[ 0, 23, 40, ...,  0,  0,  0],
       [ 0, 23, 40, ...,  0,  0,  0],
       [ 1, 42, 50, ...,  0,  0,  0],
       ...,
       [ 0, 26, 10, ...,  0,  0,  0],
       [ 0, 42, 66, ...,  0,  0,  0],
       [ 0, 32, 15, ...,  0,  0,  0]])

In [45]:
optimizer = torch.optim.Adam(wide_model.parameters())
batch_size = 64
n_epochs = 50
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
# from http://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
for epoch in range(n_epochs):
    total=0
    correct=0
    for i, batch in enumerate(train_loader):

        X_w = Variable(batch[:, 1:]).float()
        y = Variable(batch[:, 0]).float()

        optimizer.zero_grad()
        y_pred = wide_model(X_w)
        loss = F.binary_cross_entropy(y_pred, y.reshape(-1, 1))
        loss.backward()
        optimizer.step()

        total+= y.size(0)
        y_pred_cat = (y_pred > 0.5).squeeze(1).float()
        correct += float((y_pred_cat == y).sum().item())

    print ('Epoch {} of {}, Loss: {}, accuracy: {}'.format(epoch+1,
                                                           n_epochs, round(loss.item(),3), round(correct/total,4)))



Epoch 1 of 1000, Loss: 0.197, accuracy: 0.8372
Epoch 2 of 1000, Loss: 0.218, accuracy: 0.8364
Epoch 3 of 1000, Loss: 0.344, accuracy: 0.8375
Epoch 4 of 1000, Loss: 0.389, accuracy: 0.8369
Epoch 5 of 1000, Loss: 0.513, accuracy: 0.8368
Epoch 6 of 1000, Loss: 0.388, accuracy: 0.8357
Epoch 7 of 1000, Loss: 0.135, accuracy: 0.8382
Epoch 8 of 1000, Loss: 0.295, accuracy: 0.8373
Epoch 9 of 1000, Loss: 0.246, accuracy: 0.8374
Epoch 10 of 1000, Loss: 0.507, accuracy: 0.837
Epoch 11 of 1000, Loss: 0.308, accuracy: 0.8374
Epoch 12 of 1000, Loss: 0.15, accuracy: 0.838
Epoch 13 of 1000, Loss: 0.358, accuracy: 0.8373
Epoch 14 of 1000, Loss: 0.488, accuracy: 0.8366
Epoch 15 of 1000, Loss: 0.376, accuracy: 0.8372
Epoch 16 of 1000, Loss: 0.146, accuracy: 0.8378
Epoch 17 of 1000, Loss: 0.319, accuracy: 0.8381
Epoch 18 of 1000, Loss: 0.205, accuracy: 0.8382
Epoch 19 of 1000, Loss: 0.166, accuracy: 0.8371
Epoch 20 of 1000, Loss: 0.565, accuracy: 0.837
Epoch 21 of 1000, Loss: 0.323, accuracy: 0.8385
Epoch

#### Deep Part

In [49]:
print(wd_dataset['embeddings_input'])
print(wd_dataset['deep_column_idx'])

[('education', 16, 10), ('relationship', 6, 8), ('occupation', 15, 10), ('workclass', 9, 10), ('native-country', 42, 10)]
{'education': 0, 'relationship': 1, 'workclass': 2, 'occupation': 3, 'native-country': 4, 'age': 5, 'hours-per-week': 6}


In [50]:
col_name, unique_vals, n_emb = wd_dataset['embeddings_input'][0]
emb_layer = nn.Embedding(unique_vals, n_emb)
print(emb_layer)

Embedding(16, 10)


In [52]:
class Deep(nn.Module):
    """
    Deep-side, which consists in a series of embeddings and numerical
    features passed through a series of dense layers.

    Params:
    --------
    embeddings_input (tuple): 3-elements tuple with the embeddings "set-up" -
    (col_name, unique_values, embeddings dim)
    continuous_cols (list) : list with the name of the continuum columns
    deep_column_idx (dict) : dictionary where the keys are column names and the values
    their corresponding index in the deep-side input tensor
    hidden_layers (list) : list with the number of units per hidden layer
    n_class (int) : number of classes. Defaults to 1 if logistic or regression
    """
    def __init__(self,embeddings_input,continuous_cols,deep_column_idx,hidden_layers,n_class):

        super(Deep, self).__init__()
        self.deep_column_idx = deep_column_idx
        self.embeddings_input = embeddings_input
        self.continuous_cols = continuous_cols
        self.hidden_layers = hidden_layers
        self.n_class = n_class

        # build the embeddings that will be passed through the deep side
        for col,val,dim in self.embeddings_input:
            setattr(self, 'emb_layer_'+col, nn.Embedding(val, dim))

        # the input dimension to the 1st hidden layer will be the sum of the
        # embeddings dimensions plus the number of continuous features
        input_emb_dim = np.sum([emb[2] for emb in self.embeddings_input])
        self.linear_1 = nn.Linear(input_emb_dim+len(continuous_cols), self.hidden_layers[0])
        for i,h in enumerate(self.hidden_layers[1:],1):
            setattr(self, 'linear_'+str(i+1), nn.Linear( self.hidden_layers[i-1], self.hidden_layers[i] ))

        self.output = nn.Linear(self.hidden_layers[-1], n_class)

    def forward(self, X):

        emb = [getattr(self, 'emb_layer_'+col)(X[:,self.deep_column_idx[col]].long())
               for col,_,_ in self.embeddings_input]

        cont_idx = [self.deep_column_idx[col] for col in self.continuous_cols]
        cont = [X[:, cont_idx].float()]

        deep_inp = torch.cat(emb+cont, 1)

        x_deep = F.relu(self.linear_1(deep_inp))
        for i in range(1,len(self.hidden_layers)):
            x_deep = F.relu( getattr(self, 'linear_'+str(i+1))(x_deep) )

        out = torch.sigmoid(self.output(x_deep))

        return out

In [53]:
deep_column_idx = wd_dataset['deep_column_idx']
embeddings_input= wd_dataset['embeddings_input']
hidden_layers = [100,50]
deep_model = Deep(embeddings_input, continuous_cols, deep_column_idx, hidden_layers, n_class)

In [54]:
print(deep_model)

Deep(
  (emb_layer_education): Embedding(16, 10)
  (emb_layer_relationship): Embedding(6, 8)
  (emb_layer_occupation): Embedding(15, 10)
  (emb_layer_workclass): Embedding(9, 10)
  (emb_layer_native-country): Embedding(42, 10)
  (linear_1): Linear(in_features=50, out_features=100, bias=True)
  (linear_2): Linear(in_features=100, out_features=50, bias=True)
  (output): Linear(in_features=50, out_features=1, bias=True)
)


In [55]:
train_dataset = np.hstack([wd_dataset['train_dataset'].labels.reshape(-1, 1), wd_dataset['train_dataset'].deep])
train_dataset

array([[ 0.        ,  3.        ,  5.        , ...,  0.        ,
        -1.14100392, -0.03408696],
       [ 0.        , 11.        ,  0.        , ...,  0.        ,
        -1.14100392, -0.03408696],
       [ 1.        ,  9.        ,  1.        , ..., 27.        ,
         0.24480847,  0.77292975],
       ...,
       [ 0.        ,  7.        ,  2.        , ...,  3.        ,
        -0.92219144, -2.45513709],
       [ 0.        ,  3.        ,  1.        , ...,  0.        ,
         0.24480847,  2.06415648],
       [ 0.        ,  3.        ,  0.        , ...,  0.        ,
        -0.48456647, -2.05162874]])

In [57]:
optimizer = torch.optim.Adam(deep_model.parameters())
batch_size = 64
n_epochs = 10
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

for epoch in range(n_epochs):
    total=0
    correct=0
    for i, batch in enumerate(train_loader):

        X_d = Variable(batch[:, 1:])
        y = Variable(batch[:, 0]).float()

        optimizer.zero_grad()
        y_pred = deep_model(X_d)
        loss = F.binary_cross_entropy(y_pred, y.reshape(-1, 1))
        loss.backward()
        optimizer.step()

        total+= y.size(0)
        y_pred_cat = (y_pred > 0.5).squeeze(1).float()
        correct+= float((y_pred_cat == y).sum().item())

    print ('Epoch {} of {}, Loss: {}, accuracy: {}'.format(epoch+1,
                                                           n_epochs, round(loss.item(),3), round(correct/total,4)))



Epoch 1 of 10, Loss: 0.334, accuracy: 0.8242
Epoch 2 of 10, Loss: 0.148, accuracy: 0.839
Epoch 3 of 10, Loss: 0.177, accuracy: 0.8415
Epoch 4 of 10, Loss: 0.16, accuracy: 0.8433
Epoch 5 of 10, Loss: 0.137, accuracy: 0.844
Epoch 6 of 10, Loss: 0.286, accuracy: 0.844
Epoch 7 of 10, Loss: 0.439, accuracy: 0.8456
Epoch 8 of 10, Loss: 0.114, accuracy: 0.8462
Epoch 9 of 10, Loss: 0.237, accuracy: 0.8448
Epoch 10 of 10, Loss: 0.561, accuracy: 0.8456


#### Combine the two parts

In [58]:
class WideDeep(nn.Module):

    def __init__(self, wide_dim, embeddings_input, continuous_cols, deep_column_idx, hidden_layers, n_class):

        super(WideDeep, self).__init__()
        self.wide_dim = wide_dim
        self.deep_column_idx = deep_column_idx
        self.embeddings_input = embeddings_input
        self.continuous_cols = continuous_cols
        self.hidden_layers = hidden_layers
        self.n_class = n_class

        for col,val,dim in self.embeddings_input:
            setattr(self, 'emb_layer_'+col, nn.Embedding(val, dim))

        input_emb_dim = np.sum([emb[2] for emb in self.embeddings_input])
        self.linear_1 = nn.Linear(input_emb_dim+len(continuous_cols), self.hidden_layers[0])
        for i,h in enumerate(self.hidden_layers[1:],1):
            setattr(self, 'linear_'+str(i+1), nn.Linear( self.hidden_layers[i-1], self.hidden_layers[i] ))

        self.output = nn.Linear(self.hidden_layers[-1]+self.wide_dim, n_class)

    def forward(self, X_w, X_d):

        emb = [getattr(self, 'emb_layer_'+col)(X_d[:,self.deep_column_idx[col]].long())
               for col,_,_ in self.embeddings_input]

        cont_idx = [self.deep_column_idx[col] for col in self.continuous_cols]
        cont = [X_d[:, cont_idx].float()]

        deep_inp = torch.cat(emb+cont, 1)

        x_deep = F.relu(self.linear_1(deep_inp))
        for i in range(1,len(self.hidden_layers)):
            x_deep = F.relu( getattr(self, 'linear_'+str(i+1))(x_deep) )

        wide_deep_input = torch.cat([x_deep, X_w.float()], 1)

        out = torch.sigmoid(self.output(wide_deep_input))

        return out

In [59]:
wide_deep_model = WideDeep(wide_dim, embeddings_input, continuous_cols, deep_column_idx, hidden_layers, n_class)
wide_deep_model

WideDeep(
  (emb_layer_education): Embedding(16, 10)
  (emb_layer_relationship): Embedding(6, 8)
  (emb_layer_occupation): Embedding(15, 10)
  (emb_layer_workclass): Embedding(9, 10)
  (emb_layer_native-country): Embedding(42, 10)
  (linear_1): Linear(in_features=50, out_features=100, bias=True)
  (linear_2): Linear(in_features=100, out_features=50, bias=True)
  (output): Linear(in_features=848, out_features=1, bias=True)
)

In [60]:
class WideDeepLoader(Dataset):
    """Helper to facilitate loading the data to the pytorch models.

    Parameters:
    --------
    data: namedtuple with 3 elements - (wide_input_data, deep_inp_data, target)
    """
    def __init__(self, data):

        self.X_wide = data.wide
        self.X_deep = data.deep
        self.Y = data.labels

    def __getitem__(self, idx):

        xw = self.X_wide[idx]
        xd = self.X_deep[idx]
        y  = self.Y[idx]

        return xw, xd, y

    def __len__(self):
        return len(self.Y)


train_dataset = wd_dataset['train_dataset']
widedeep_dataset = WideDeepLoader(train_dataset)
train_loader = torch.utils.data.DataLoader(dataset=widedeep_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

In [63]:
optimizer = torch.optim.Adam(wide_deep_model.parameters())

batch_size = 64
n_epochs = 10
for epoch in range(n_epochs):
    total=0
    correct=0
    for i, (X_wide, X_deep, target) in enumerate(train_loader):
        X_d = Variable(X_deep)
        X_w = Variable(X_wide)
        y = Variable(target).float()

        optimizer.zero_grad()
        y_pred = wide_deep_model(X_w, X_d)
        loss = F.binary_cross_entropy(y_pred, y.reshape(-1, 1))
        loss.backward()
        optimizer.step()

        total+= y.size(0)
        y_pred_cat = (y_pred > 0.5).squeeze(1).float()
        correct+= float((y_pred_cat == y).sum().item())

    print ('Epoch {} of {}, Loss: {}, accuracy: {}'.format(epoch+1,
                                                           n_epochs, round(loss.item(),3), round(correct/total,4)))



Epoch 1 of 10, Loss: 0.184, accuracy: 0.8248
Epoch 2 of 10, Loss: 0.213, accuracy: 0.836
Epoch 3 of 10, Loss: 0.361, accuracy: 0.8395
Epoch 4 of 10, Loss: 0.269, accuracy: 0.8399
Epoch 5 of 10, Loss: 0.32, accuracy: 0.8419
Epoch 6 of 10, Loss: 0.232, accuracy: 0.843
Epoch 7 of 10, Loss: 0.264, accuracy: 0.8428
Epoch 8 of 10, Loss: 0.351, accuracy: 0.8417
Epoch 9 of 10, Loss: 0.3, accuracy: 0.845
Epoch 10 of 10, Loss: 0.183, accuracy: 0.8438
