ffm参考[美团的这个博客好了](https://tech.meituan.com/2016/03/03/deep-understanding-of-ffm-principles-and-practices.html)，公式上由于field的加入不能使用fm的技巧：  
$$y(\mathbf{x})=w_{0}+\sum_{i=1}^{n} w_{i} x_{i}+\sum_{i=1}^{n} \sum_{j=i+1}^{n}\left\langle\mathbf{v}_{i f_{j}}, \mathbf{v}_{j f_{i}}\right\rangle x_{i} x_{j}$$ 
参考：
1.https://www.jianshu.com/p/8b57473e385a  
2.https://www.csie.ntu.edu.tw/~r01922136/slides/ffm.pdf

In [1]:
import tensorflow as tf
from tensorflow.keras import backend as K

from tensorflow.keras.layers import Layer
from tensorflow.keras.regularizers import l2

import pandas as pd
from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters


In [2]:
import sys
sys.path.append('./util/')
from utils import load_data

users, movies, ratings = load_data()

In [3]:
data1 = pd.merge(ratings.drop(columns = ['timestamp'],axis = 1), movies, how = 'left', on = 'movieid')
data = pd.merge(data1, users, how = 'left', on = 'userid')

X = data.drop(columns = ['userid', 'movieid', 'genres', 'title', 'rating'])
Y = data['rating'].values

from sklearn import preprocessing
X_norm = preprocessing.scale(X)

train_x, test_x, train_y, test_y = train_test_split(X_norm, Y)

  


In [4]:
X.head(5)

Unnamed: 0,Thriller,War,Romance,Animation,Musical,Mystery,Adventure,Action,Horror,Sci-Fi,...,11,12,13,14,15,16,17,18,19,20
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# ffm需要预设一个feature2field的字典映射，也就是我们48维的特征其实是由genres，gender，age，occupationid四维特征one-hot来的
# genres -> 18
# gender -> 2
# age -> 7
# occupationid -> 21
feature2field = {}
for i, j in enumerate(range(18)):
    feature2field[j] = 0
for i, j in enumerate(range(18, 18 + 2)):
    feature2field[j] = 1
for i, j in enumerate(range(18 + 2, 18 + 2 + 7)):
    feature2field[j] = 2
for i, j in enumerate(range(18 + 2 + 7, 18 + 2 + 7 + 21)):
    feature2field[j] = 3

In [6]:
class FFM(Layer):
    def __init__(self, field, feature2field, k, l2, **kwargs):
        #self.units  = units
        self.field = field
        self.feature2field = feature2field
        self.k = k
        self.l2 = l2   #ffm容易过拟合，所以加了正则项
        super(FFM, self).__init__(**kwargs)

    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.w0 = self.add_weight(name = 'W0', 
                                 shape=(1,1),
                                 initializer='glorot_uniform',
                                 trainable=True,
                                 )
        self.w = self.add_weight(name = 'W', 
                                 shape=(input_dim, 1),
                                 initializer='glorot_uniform',
                                 trainable=True,
                                 regularizer=l2(self.l2)
                                )
        self.v = self.add_weight(name='V',
                                 shape=(input_dim, self.field, self.k),
                                 initializer='glorot_uniform',
                                 trainable=True,
                                 regularizer=l2(self.l2)
                                )

        super(FFM, self).build(input_shape)

    def call(self, inputs, **kwargs):
        input_dim = inputs.shape[-1]
        x = inputs
        linear_terms = tf.add(tf.matmul(x, self.w), self.w0) #(None, units)
        #tf.matmul(x, self.w) 刚好就是(wi*xi)的累加
        field_interactions = tf.constant(0, dtype='float32')
        for i in range(input_dim):
            for j in range(i+1, input_dim):
                vv = tf.reduce_sum(tf.multiply(self.v[i, self.feature2field[j]], self.v[j, self.feature2field[i]])) 
                #vv 的点积<v,v>
                xx =  tf.multiply(x[:,i], x[:,j]) 
                field_interaction = tf.multiply(vv, xx) #(None,1)
                field_interactions += field_interaction
                print (vv.shape,xx.shape, field_interaction.shape, field_interactions.shape)
        #field_interactions = tf.reshape(field_interactions, (inputs.shape[0], self.units))       
        output = tf.add(linear_terms, field_interactions) 
        print (output.shape)
        return output
    def compute_output_shape(self, input_shape):
        return (None,self.units)

In [7]:
input_shape = train_x.shape[1]
learning_rate = 0.01

linear_input = tf.keras.layers.Input(shape = (input_shape,), name = "linear")
ffm = FFM(4, feature2field, 2, 0.001)(linear_input)
logits = tf.keras.layers.Dense(1)(linear_input)

model = tf.keras.Model(inputs = [linear_input], outputs = [logits])

optimizer = tf.keras.optimizers.RMSprop(learning_rate = 0.001)
model.compile(loss='mean_squared_error',
            optimizer=optimizer,
            metrics=['mean_absolute_error', 'mean_squared_error'])

#model = model_linear(input_shape, learning_rate)
#model.summary()

EPOCHS = 10
model.fit(
    train_x, train_y,
    epochs=EPOCHS, 
    validation_data=(test_x, test_y,),
    batch_size=256, shuffle=True
)

() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
(

() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
(

() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
(

() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
() (None,) (None,) (None,)
(

<tensorflow.python.keras.callbacks.History at 0x7f60b68ed780>

这个ffm的mse跟lr效果差不多，但是比fm效果要差，看了别人写的代码改了几次还是一样，不知道是哪里出了问题。。。也可能是数据质量导致的吧