论文地址：https://arxiv.org/pdf/1708.05123.pdf  
<img src="./images/deep_cross.png" style="width:500;height:400px;">  
其实dnn结构相对是比较熟悉了，主要是cross这个结构：
$$\mathbf{x}_{l+1}=\mathbf{x}_{0} \mathbf{x}_{l}^{T} \mathbf{w}_{l}+\mathbf{b}_{l}+\mathbf{x}_{l}=f\left(\mathbf{x}_{l}, \mathbf{w}_{l}, \mathbf{b}_{l}\right)+\mathbf{x}_{l}$$

In [6]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer
from tensorflow.keras.regularizers import l2

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
import sys
sys.path.append('./util/')
from utils import load_data_embdding 

users, movies, ratings = load_data_embdding()

In [7]:
data1 = pd.merge(ratings.drop(columns = ['timestamp'],axis = 1), movies, how = 'left', on = 'movieid')
data = pd.merge(data1, users, how = 'left', on = 'userid')

X = data.drop(columns = ['userid', 'movieid', 'title', 'rating'])
Y = data['rating'].values
#genres, gender, age, occupationid四个需要embedding的特征，可以分别emb也可以合并以后emb，这里分别做emb
set_genres = []
for i in movies.index:
    set_genres += movies['genres'].iloc[i]
set_genres = list(set(set_genres))
dic_genres = dict([(j, i) for i,j in enumerate(set_genres)])
dic_genres['UNK'] = len(dic_genres)
X['genres'] = X['genres'].apply(lambda x: [dic_genres[i] for i in x])
x_genres = tf.keras.preprocessing.sequence.pad_sequences(list(X['genres'].values),
                                                        value = dic_genres['UNK'],
                                                        padding = 'post',
                                                        maxlen = 6)


dic_gender = {'F':0, 'M':1}
X['gender'] = X['gender'].apply(lambda x: [dic_gender[i] for i in x])
dic_age = {1:0, 56:1, 25:2, 45:3, 50:4, 35:5, 18:6}
X['age'] = X['age'].apply(lambda x: [dic_age[x]])
list_occ = list(pd.unique(data['occupationid']))
dic_occ = dict([(j, i) for i,j in enumerate(list_occ)])
X['occupationid'] = X['occupationid'].apply(lambda x: [dic_occ[x]])

x_gender = list(X['gender'].values)
x_age = list(X['age'].values)
x_occupationid = list(X['occupationid'].values)


train_x_genres, test_x_genres, train_y, test_y = train_test_split(np.array(x_genres), Y, random_state=11)
train_x_gender, test_x_gender = train_test_split(np.array(x_gender), random_state=11)
train_x_age, test_x_age = train_test_split(np.array(x_age), random_state=11)
train_x_occupationid, test_x_occupationid = train_test_split(np.array(x_occupationid), random_state=11)

In [135]:
class crossNet(Layer):
    def __init__(self, l2, layer_num, **kwargs):
        self.l2 = l2
        self.layer_num = layer_num
        super(crossNet, self).__init__(**kwargs)

    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.w = [self.add_weight(name='kernel' + str(i),
                                        shape=(input_dim, 1),
                                        initializer='glorot_uniform',
                                        regularizer=l2(self.l2),
                                        trainable=True) for i in range(self.layer_num)]
        self.b = [self.add_weight(name='bias' + str(i),
                                     shape=(input_dim, 1),
                                     initializer='Zeros',
                                     trainable=True) for i in range(self.layer_num)]

        super(crossNet, self).build(input_shape)
    

    def call(self, inputs, **kwargs):
        x0 = inputs
        input_dim = x0.shape[1]
        x0 = tf.reshape(x0, [-1, input_dim, 1]) #之所以改成三维就是为了在多次运算后batch维度的不变
        x_l = x0                                                            #(None, input_dim)
        for i in range(self.layer_num):
            xw_l = tf.matmul(tf.transpose(self.w[i]), x_l) 
            #(1,input_dim) * (None, input_dim, 1) -> (None, 1, 1)
            x_l = tf.matmul(x0, xw_l)  + self.b[i] + x_l #(None, input_dim, 1)
            #张量相乘，比如三维，第一维维度要一致，后面两维满足矩阵乘法，相加后面两维要一致
            #3维与2维，乘法后面两维满足矩阵乘法，相加后面两维要一致
        return tf.reshape(x_l, (-1,input_dim))
    
    def compute_output_shape(self, input_shape):
        return (None,self.units)

$$\mathbf{x}_{l+1}=\mathbf{x}_{0} \mathbf{x}_{l}^{T} \mathbf{w}_{l}+\mathbf{b}_{l}+\mathbf{x}_{l}=f\left(\mathbf{x}_{l}, \mathbf{w}_{l}, \mathbf{b}_{l}\right)+\mathbf{x}_{l}$$

In [69]:
m1 = tf.constant([[[1.,2,3],[1,2,1],[1,2,1]],[[1,2,3],[1,2,1],[1,2,1]]])
m2 = tf.constant([[1.,3,3],[2,1,3]])

tf.matmul(tf.ones((3,1)),tf.ones((2,1,3))) 
tf.matmul(tf.ones((2,3,3)),tf.ones((3,1))) + tf.ones((3,1))

In [115]:
#tf.tensordot(tf.ones((2,3,3)),tf.ones((2,3)),(1,0)) 
tf.tensordot(tf.expand_dims(tf.ones((2,3)), axis=2),tf.ones((3,1)),axes=(1, 0))
#tf.matmul(tf.ones((2,3,1)),tf.ones((2,1,1)))

<tf.Tensor: id=4590, shape=(2, 1, 1), dtype=float32, numpy=
array([[[3.]],

       [[3.]]], dtype=float32)>

In [84]:
input_genres = keras.layers.Input(shape=(6,), name="genres")  
embedding_genres = keras.layers.Embedding(output_dim=16, input_dim=len(dic_genres), input_length=6)(input_genres)

input_gender = keras.layers.Input(shape=(1,), name="gender")  
embedding_gender = keras.layers.Embedding(output_dim=16, input_dim=2, input_length=1)(input_gender)

input_age = keras.layers.Input(shape=(1,), name="age")  
embedding_age = keras.layers.Embedding(output_dim=16, input_dim=7, input_length=1)(input_age)

input_occ = keras.layers.Input(shape=(1,), name="occupationid")  
embedding_occ = keras.layers.Embedding(output_dim=16, input_dim=21, input_length=1)(input_occ)

embedding_combine = keras.layers.concatenate(inputs=[embedding_genres, embedding_gender, embedding_age,
                                                    embedding_occ], axis=1)
embedding_combine = keras.layers.GlobalAveragePooling1D()(embedding_combine) 

In [137]:
##cross_net
cn_layer = crossNet(0.001, 4)(embedding_combine)
cn_layer = keras.layers.Dense(1)(cn_layer)
##dnn
dnn_layer = keras.layers.Dense(64, activation = 'relu')(embedding_combine)
dnn_layer = keras.layers.BatchNormalization()(dnn_layer)
dnn_layer = keras.layers.Dense(32, activation = 'relu')(dnn_layer)
dnn_layer = keras.layers.BatchNormalization()(dnn_layer)
dnn_layer = keras.layers.Dense(1)(dnn_layer)
##deepfm
outputs = keras.layers.average([cn_layer, dnn_layer])  #因为是做回归，就用了average，分类的话得add后再接Activation

optimizer = keras.optimizers.RMSprop(learning_rate = 0.001)
model = tf.keras.Model(inputs = [input_genres, input_gender, input_age, input_occ], outputs = [outputs])

model.compile(loss='mean_squared_error',
        optimizer=optimizer,
        metrics=['mean_absolute_error', 'mean_squared_error'],
         )

In [139]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
checkpoint_path = "./model/deepcross.h5"
cp_callback = keras.callbacks.ModelCheckpoint(checkpoint_path,
                                              save_weights_only=True,
                                              save_best_only=True,
                                              verbose=1)

model.fit(
    [train_x_genres, train_x_gender, train_x_age, train_x_occupationid], train_y,
    epochs=100, 
    validation_data=([test_x_genres, test_x_gender, test_x_age, test_x_occupationid], test_y,),
    batch_size=256, shuffle=True,
    callbacks=[early_stopping, cp_callback]
)

Train on 750156 samples, validate on 250053 samples
Epoch 1/100
Epoch 00001: val_loss improved from inf to 1.17800, saving model to ./model/deepcross.h5
Epoch 2/100
Epoch 00002: val_loss improved from 1.17800 to 1.16190, saving model to ./model/deepcross.h5
Epoch 3/100
Epoch 00003: val_loss improved from 1.16190 to 1.16145, saving model to ./model/deepcross.h5
Epoch 4/100
Epoch 00004: val_loss improved from 1.16145 to 1.16042, saving model to ./model/deepcross.h5
Epoch 5/100
Epoch 00005: val_loss improved from 1.16042 to 1.15155, saving model to ./model/deepcross.h5
Epoch 6/100
Epoch 00006: val_loss improved from 1.15155 to 1.15091, saving model to ./model/deepcross.h5
Epoch 7/100
Epoch 00007: val_loss did not improve from 1.15091
Epoch 8/100
Epoch 00008: val_loss did not improve from 1.15091
Epoch 9/100
Epoch 00009: val_loss did not improve from 1.15091


<tensorflow.python.keras.callbacks.History at 0x7f69046f62e8>

In [140]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
genres (InputLayer)             [(None, 6)]          0                                            
__________________________________________________________________________________________________
gender (InputLayer)             [(None, 1)]          0                                            
__________________________________________________________________________________________________
age (InputLayer)                [(None, 1)]          0                                            
__________________________________________________________________________________________________
occupationid (InputLayer)       [(None, 1)]          0                                            
____________________________________________________________________________________________