# Wide&Deep

Wide&Deep 第一层是全连接层，把实值型变量和经过 Embedding 之后的类别型变量拼接在一起后输入到第一层，之后的几层都是 RelU 激活函数的全连接层。同时 Embedding 后的向量还会输入到交叉积层，进行两辆交叉，最终把交叉积层的输出和 RelU 的输出拼接到一起，送入最后的全连接输出层。

In [None]:
# build field dict

import os

BASEDIR = os.getcwd()

fields_dict = {}

with open(BASEDIR + '/assets/datasets/criteo_ctr/small_train.txt') as f:
    line = f.readline()
    line = line.strip('\n')

    while line:
        elems = line.split(' ')

        for i in range(1, len(elems)):
            field, feature, value = elems[i].split(':')
            if field not in fields_dict:
                fields_dict[field] = {}
                fields_dict[field]['field'] = {feature: 0}
                fields_dict[field]['last_idx'] = 0
            elif feature not in fields_dict[field]['field']:
                next_idx = fields_dict[field]['last_idx'] + 1
                fields_dict[field]['field'][feature] = next_idx
                fields_dict[field]['last_idx'] = next_idx
 
        line = f.readline()
        line = line.strip('\n')

for field in fields_dict.keys():
    next_idx = fields_dict[field]['last_idx'] + 1
    fields_dict[field]['field']['none'] = next_idx
    fields_dict[field]['last_idx'] = next_idx

field_cnt = len(fields_dict)
field_cnt

In [None]:
# build train data

X_train = []
y_train = []
idx = 0
with open(BASEDIR + '/assets/datasets/criteo_ctr/small_train.txt') as f:
    line = f.readline()
    line = line.strip('\n')

    while line:
        elems = line.split(' ')
        y_train.append(int(elems[0]))

        X_train.append([])
        for i in range(field_cnt):
            X_train[idx].append([])

        for i in range(1, len(elems)):
            field, feature, value = elems[i].split(':')
            field_idx = int(field)
            X_train[idx][field_idx].append(fields_dict[field]['field'][feature])

        for i in range(field_cnt):
            if len(X_train[idx][i]) == 0:
                X_train[idx][i].append(fields_dict[str(i)]['field']['none'])
       
        line = f.readline()
        line = line.strip('\n')
        idx += 1

In [None]:
# build Wide&Deep Network

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np


class CrossUnit(object):
    def __init__(self, INPUT_DIMENSION):
         super(CrossUnit, self).__init__()
         self.unit = nn.Linear(INPUT_DIMENSION, INPUT_DIMENSION, bias=True).double()

    def fowrward(self, input_features1, input_features2):
        x = torch.cross(input_features1, input_features2)
        return self.layer(x)

class CrossLayer(nn.Module):
    def __init__(self, FEATURE_NUM, EMBEDDING_SIZE):
        unit_num = FEATURE_NUM * (FEATURE_NUM - 1) / 2
        self.cross_layer = [CrossUnit(EMBEDDING_SIZE, EMBEDDING_SIZE) for _ in range(unit_num)]

    def forward(self, embedding_inputs):
        out = []
        step = len(embedding_inputs)
        for i in range(step - 1):
            for j in range(i + 1, step):
                out[i * step + j] = self.cross_layer[i * step + j].forward(embedding_inputs[i], embedding_inputs[j])

        return torch.cat(out, 1)


class WideAndDeepNetwork(nn.Module):

    def __init__(self, fields_dict, embedding_size):
        super(WideAndDeepNetwork, self).__init__()
        