In [1]:
# coding: utf-8
import numpy as np
import tensorflow as tf

In [2]:
C = 10 # classes num
N = 784 # features num
fm_k = 10
learning_rate = 0.01
epochs = 2
batch_size = 128
num_steps = 2000
display_step = 100

$$\hat{y(x)} = w_0 + \sum\limits_{i=1}^n w_i x_i + \sum\limits_{i=1}^{n} \sum\limits_{j=i+1}^{n} <v_i, v_j> x_i x_j$$

$$\sum\limits_{i=1}^{n} \sum\limits_{j=i+1}^{n} <v_i, v_j> x_i x_j = \frac{1}{2} \sum\limits_{f=1}^{k} ((\sum\limits_{i=1}^{n} v_{i,f} x_i)^2 - \sum\limits_{i=1}^{n} v_{i,f}^2 x_i^2)$$

In [4]:
def fm(X, n_classes, n_features, k):
    W0 = tf.Variable(tf.zeros(n_classes), name='W0')
    W1 = tf.Variable(tf.zeros([n_features, n_classes]), name='W1')
    W2 = tf.Variable(tf.zeros([k, n_features, n_classes]), name='W2')
    
    # TODO: 将term的for循环写成矩阵计算
    # TODO: tensorflow对稀疏数据如何处理
    term2 = None
    for i in range(0, k):
        elem = tf.square(tf.matmul(X, W2[i])) - tf.matmul(tf.pow(X, 2), tf.square(W2[i]))
        term2 = term2 + elem if term2 is not None else elem
    logits = W0 + tf.matmul(X, W1) + term2
    return logits

In [5]:
def log_loss_fn(logits, labels, l2_reg=0):
    logits_loss = tf.losses.sparse_softmax_cross_entropy(labels, logits=logits)
    W1 = tf.get_default_graph().get_tensor_by_name('W1:0')
    W2 = tf.get_default_graph().get_tensor_by_name('W2:0')
    l2_loss = l2_reg * tf.nn.l2_loss(W1) + l2_reg * tf.nn.l2_loss(W2)
    return logits_loss + l2_loss, logits_loss, l2_loss

In [6]:
# def hinge_loss_fn(logits, labels, l2_reg=0):
# #     logits_loss = tf.losses.hinge_loss(labels, logits=logits)
#     labels = (tf.one_hot(labels, C) - 0.5) * 2
#     logits_loss = tf.reduce_mean(tf.maximum(0.0, 1 - logits * labels))
#     W = tf.get_default_graph().get_tensor_by_name('W:0')
#     l2_loss = l2_reg * tf.nn.l2_loss(W)
#     return logits_loss + l2_loss, logits_loss, l2_loss

In [7]:
def accuracy_fn(logits, labels):
    predictions = tf.nn.softmax(logits)
    is_correct = tf.equal(tf.argmax(predictions, axis=1), labels)
    accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
    return accuracy

In [8]:
def train_input_fn(features, labels, batch_size, shuffle_size=1000):
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
    dataset = dataset.shuffle(shuffle_size).repeat().batch(batch_size).prefetch(batch_size)
    return dataset.make_one_shot_iterator().get_next()

In [9]:
# TODO: 其实用图片进行FM测试不太好，后面找个好点的数据集
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
x_train, y_train = mnist.train.images, mnist.train.labels

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [10]:
class CumulateAvg:
    """记录每个step的accuracy、loss等数值，每隔display_step求一次平均值并打印
    """
    def __init__(self, var_list, display_step=100):
        self.var_list = var_list
        self.reset()
        self.display_step = display_step
        self.step = 0
        
    def reset(self):
        self.var_dict = {k: 0 for k in self.var_list}
        
    def avg_var(self):
        if self.step > 0:
            self.var_dict = {k: v / self.display_step for k, v in self.var_dict.items()}
        
    def cumulate(self, var_values):
        for i, v in enumerate(var_values):
            self.var_dict[self.var_list[i]] += v
        if self.step == 0 or (self.step + 1) % self.display_step == 0:
            self.avg_var()
            print_str = 'step: %d' % (self.step + 1)
            print_str += ''.join([', %s: %.4f' % (k, self.var_dict[k]) for k in self.var_list])
            print(print_str)
        self.step += 1

In [11]:
with tf.Graph().as_default() as g:
    with tf.Session().as_default() as sess:
        dataset = train_input_fn(x_train, y_train, batch_size)
        X, y = dataset
        X = tf.cast(X, tf.float32)
        y = tf.cast(y, tf.int64)
        
        logits = fm(X, C, N, fm_k)
        
        loss, logits_loss, l2_loss = log_loss_fn(logits, y, l2_reg=0.05)
#         loss, logits_loss, l2_loss = hinge_loss_fn(logits, y, l2_reg=0.05)

        accuracy = accuracy_fn(logits, y)
        train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())

        var_list = ['loss', 'accuracy', 'logits_loss', 'l2_loss']
        cumavg_obj = CumulateAvg(var_list, 100)
        for step in range(num_steps):
            res_list = sess.run([train_op, *[eval(x) for x in var_list]])
            cumavg_obj.cumulate(res_list[1:])

step: 1, loss: 2.3026, accuracy: 0.1172, logits_loss: 2.3026, l2_loss: 0.0000
step: 100, loss: 1.9000, accuracy: 0.6724, logits_loss: 1.8934, l2_loss: 0.0066
step: 200, loss: 1.4318, accuracy: 0.7980, logits_loss: 1.3993, l2_loss: 0.0326
step: 300, loss: 1.2280, accuracy: 0.8206, logits_loss: 1.1646, l2_loss: 0.0634
step: 400, loss: 1.0887, accuracy: 0.8360, logits_loss: 0.9967, l2_loss: 0.0920
step: 500, loss: 1.0046, accuracy: 0.8509, logits_loss: 0.8871, l2_loss: 0.1175
step: 600, loss: 0.9925, accuracy: 0.8463, logits_loss: 0.8541, l2_loss: 0.1384
step: 700, loss: 0.9828, accuracy: 0.8514, logits_loss: 0.8263, l2_loss: 0.1565
step: 800, loss: 0.9623, accuracy: 0.8496, logits_loss: 0.7904, l2_loss: 0.1720
step: 900, loss: 0.8980, accuracy: 0.8750, logits_loss: 0.7126, l2_loss: 0.1855
step: 1000, loss: 0.9362, accuracy: 0.8555, logits_loss: 0.7396, l2_loss: 0.1967
step: 1100, loss: 0.9372, accuracy: 0.8645, logits_loss: 0.7309, l2_loss: 0.2062
step: 1200, loss: 0.9263, accuracy: 0.86