# DeepFM

在 Wide&Deep 的模型之上用 FM 替换了原来的 Wide 的部分，就成为了现在的 DeepFM

In [1]:
# build train data

import os
import numpy as np
import torch

BASEDIR = os.getcwd()

fields_dict = {}
lines = None

with open(os.path.join(BASEDIR, 'assets/datasets/criteo_ctr/small_train.txt')) as f:
    lines = f.readlines()

fields_dict = {}
feature_dict = {}
for line in lines:
    line = line.strip('\n')

    for elem in line.split(' ')[1:]:
        field, feature, _ = elem.split(':')

        feature_mix = field + '_' + feature
        if feature_mix not in feature_dict:
            feature_dict[feature_mix] = len(feature_dict)

        if field not in fields_dict:
            fields_dict[field] = {'index': len(fields_dict), 'features': {}, 'last_idx': -1}

        if feature not in fields_dict[field]['features']:
            fields_dict[field]['features'][feature] = fields_dict[field]['last_idx'] + 1
            fields_dict[field]['last_idx'] = fields_dict[field]['last_idx'] + 1

for field in fields_dict.keys():
    if 'none' not in fields_dict[field]['features']:
        fields_dict[field]['features']['none'] = fields_dict[field]['last_idx'] + 1
        fields_dict[field]['last_idx'] = fields_dict[field]['last_idx'] + 1


def init_field_tensor(fields_dict):
    init_tensor = np.zeros((len(fields_dict), 1))
    for field in fields_dict.keys():
        init_tensor[fields_dict[field]['index']] = fields_dict[field]['last_idx']
    return init_tensor.astype(int)


X_train = []
y_train = []


row = []
col = []
val = []

row_idx = 0
for line in lines:
    line = line.strip('\n')
    elems = line.split(' ')
    y_train.append(float(elems[0]))

    init_tensors = init_field_tensor(fields_dict)
    for elem in elems[1:]:
        field, feature, _ = elem.split(':')
        field_idx = fields_dict[field]['index']
        feature_idx = fields_dict[field]['features'][feature]
        init_tensors[field_idx] = feature_idx

        feature_mix = field + '_' + feature
        row.append(row_idx)
        col.append(feature_dict[feature_mix])
        val.append(1)
    row_idx += 1
    X_train.append(init_tensors)

X_train = np.concatenate(X_train, 1)
y_train = np.array(y_train)

i = torch.LongTensor([row, col])
v = torch.DoubleTensor(val)
X_train_sparse = torch.sparse.DoubleTensor(i, v).to_dense().T

torch.Size([539, 200]) (18, 200)


In [None]:
# build embedding layer

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np


class FMBlock(nn.Module):
    def __init__(self, fields_dict, embedding_size):
        super(FMBlock, self)
        self.fields_dict = fields_dict
        self.field_cnt = len(self.fields_dict)
        self.embedding_size = embedding_size

        features = 0
        for field in self.fields_dict.keys():
            features += len(self.fields_dict[field]['features'])

        self.add_block = nn.Linear(features, 1, bias=True).double()
        self.out_dimension = 1 + self.field_cnt**2

    def forward(self, input_field_tensor, input_sparse_tensor):
        out1 = self.add_block(input_sparse_tensor)
        out2 = torch.zeros(self.field_cnt**2, input_field_tensor.shape[1])

        idx = 0
        for field1 in self.fields_dict.keys():
             for field2 in self.fields_dict.keys():
                 field1_tensor_idx = self.fields_dict[field1]['index']

                 field2_tensor_idx = self.fields_dict[field2]['index']
                 out[idx, :] = input_field_tensor[field1_tensor_idx:field1_tensor_idx+ self.embedding_size, :].matmul(input_field_tensor[field2_tensor_idx:field2_tensor_idx + self.embedding_size, :])
        return torch.cat([out1, out2], 1)


class FieldEmbeddingBlock(nn.Module):
    def __init__(self, fields_dict, embedding_size):
        super(FieldEmbeddingBlock, self).__init__()
        self.fields_dict = fields_dict
        self.fields_embedding = {}
        self.embedding_size = embedding_size

        for field in self.fields_dict.keys():
            field_idx = self.fields_dict[field]['index']
            self.fields_embedding[field_idx] = nn.Embedding(len(self.fields_dict[field]['features']),
                                                            self.embedding_size)

        self.out_dimension = input_field_tensor.shape[0] * self.embedding_size

    def forward(self, input_field_tensor):
        out = torch.zeros(input_field_tensor.shape[0] * self.embedding_size, input_field_tensor.shape[1])
        idx = 0
        for field in self.fields_dict.keys():
            field_idx = self.fields_dict[field]['index']
            out[idx:idx + self.embedding_size, :] = self.fields_embedding[field_idx](input_field_tensor[field_idx, :])
            idx += self.embedding_size
        return out.double()


class DeepFM(nn.Module):
    def __init__(self, fields_dict, embedding_size):
        super(DeepFM, self).__init__()
        self.fields_dict = fields_dict
        self.field_cnt = len(field_cnt)
        self.embedding_size = embedding_size

        self.field_embedding_block = FieldEmbeddingBlock(fields_dict, embedding_size)
        self.fm_block = FMBlock(fields_dict, embedding_size)

        self.deep_block = nn.Sequential(
            nn.Linear(self.embedding_size * self.field_cnt, 1024).double(),
            nn.Linear(1024, 1024, bias=True).double(),
            nn.ReLU(),
            nn.Linear(1024, 512, bias=True).double(),
            nn.ReLU(),
            nn.Linear(512, 256).double(),
            nn.ReLU(),
        )
        self.out_layer = nn.Linear(256 + self.fm_block.out_dimension, 1)

    def forward(self, input_field_tensor, input_sparse_tensor):
        embedding_out = self.field_embedding_block(input_field_tensor)
        fm_out = self.fm_block(embedding_out, input_sparse_tensor)
        deep_out = self.deep_block(embedding_out)
        out = torch.cat([fm_out, deep_out], 1)
        return F.sigmoid(out)


In [None]:
# PyTorch Version

import torch.optim as optim

device = torch.device('cpu')
LEARNING_RATE = 1e-3

EPOCH = 100
PRINT_STEP = EPOCH / 10
N = len(y_train)

HIDDEN_DIMENSION = 8
EMBEDDING_SIZE = 8
deep_fm = DeepFM(fields_dict, EMBEDDING_SIZE)

BATCH_SIZE = 8
loss_fn = nn.BCELoss()
optimizer = optim.Adam(deep_fm.parameters(), lr=LEARNING_RATE)

for epoch in range(EPOCH):

    index = np.random.randint(0, X_train.shape[0], size=BATCH_SIZE)
    X_batch = torch.from_numpy(X_train[:, index]).long()
    X_batch_sparse = X_train_sparse[:, index].long()
    y_batch = torch.from_numpy(y_train[index]).reshape(-1, BATCH_SIZE)

    y_hat = deep_fm(X_batch, X_batch_sparse).reshape(-1, BATCH_SIZE)
    loss = loss_fn(y_hat, y_batch)

    loss.backward()
    optimizer.step()

    if epoch % PRINT_STEP == 0:
        print('EPOCH: %d, loss: %f' % (epoch, loss))
