# Factorization Machines with tf


## 根据user/items的id，建立稀疏矩阵
参考：https://gist.github.com/babakx/7a3fc9739b7778f6673a458605e18963

In [12]:
from itertools import count
from collections import defaultdict
from scipy.sparse import csr
import numpy as np

def vectorize_dic(dic,ix=None,p=None,n=0,g=0):
    # dic -- dictionary of feature lists. Keys are the name of features
    # ix -- index generator (default None)
    # p -- dimension of feature space (number of columns in the sparse matrix) (default None)
    # n -- num sample
    # g -- num group: eg: uese/items---> g=2
    
    if ix==None:
        ix = dict()
    
    
    nz = n * g # number of non-zores

    col_ix = np.empty(nz,dtype = int)

    i = 0
    for k,lis in dic.items():
        for t in range(len(lis)):
            ix[str(lis[t]) + str(k)] = ix.get(str(lis[t]) + str(k),0) + 1
            # 附加索引'l'以防止将具有相同id的不同列映射到同一个索引
            col_ix[i+t*g] = ix[str(lis[t]) + str(k)]
        i += 1

    row_ix = np.repeat(np.arange(0,n),g)
    data = np.ones(nz)
    if p == None:
        p = len(ix)

    ixx = np.where(col_ix < p)
    return csr.csr_matrix((data[ixx],(row_ix[ixx],col_ix[ixx])),shape=(n,p))

# Loading data
使用MovieLens100k的数据，将数据转化成稀疏矩阵

In [14]:
import pandas as pd
from sklearn.feature_extraction import DictVectorizer

cols = ['user','item','rating','timestamp']
train = pd.read_csv('data/ua.base',delimiter='\t',names=cols)
test = pd.read_csv('data/ua.test', delimiter='\t', names=cols)

x_train = vectorize_dic({'users':train['user'].values, 'items':train['item'].values},n=len(train.index),g=2)
x_test= vectorize_dic({'users':test['user'].values,'items':test['item'].values},ix,x_train.shape[1],n=len(test.index),g=2)

y_train = train.rating.values
y_test = test.rating.values

# Input To Dense
把输入的x_train和x_test转化成dense格式，使其能被tf使用。

In [15]:
x_train = x_train.todense()
x_test = x_test.todense()

print(x_train.shape, x_test.shape)

(90570, 2623) (9430, 2623)


# 用tensorflow定义FM模型

In [18]:
# 初始化参数
import tensorflow as tf

n,p = x_train.shape
# number 0f latent factor
k = 10

x = tf.placeholder('float',[None,p])
y = tf.placeholder('float',[None,1])

# bias and weight
w0 = tf.Variable(tf.zeros([1]))
w = tf.Variable(tf.zeros([p]))

#interaction factors
v = tf.Variable(tf.random_normal([k,p],mean=0,stddev=0.01))

y_hat = tf.Variable(tf.zeros([n, 1]))

## 定义输出y的计算公式
$$ \hat{y}(\mathbf{x}) = w_0 + \sum_{j=1}^{p}w_jx_j + \frac{1}{2} \sum_{f=1}^{k} ((\sum_{j=1}^{p}v_{j,f}x_j)^2-\sum_{j=1}^{p}v_{j,f}^2 x_j^2)$$

In [19]:
# 计算FM公式的输出
linear_terms = tf.add(w0,tf.reduce_sum(tf.multiply(w,x),1,keep_dims=True))
pair_interactions = 0.5 * tf.reduce_sum(
                            tf.subtract(
                                tf.pow(tf.matmul(x,tf.transpose(v)),2),
                                        tf.matmul(tf.pow(x,2),tf.transpose(tf.pow(v,2)))),axis=1, keep_dims=True)
y_hat = tf.add(linear_terms, pair_interactions)

Instructions for updating:
keep_dims is deprecated, use keepdims instead


# Loss function
$$ L = \sum_{i=1}^{n} (y_i - \hat{y}_i)^2 + \lambda_w ||W||^2 + \lambda_v ||V||^2$$

In [21]:
# L2 reg sum of squares of loss function
lambda_w = tf.constant(0.001, name='lambda_w')
lambda_v = tf.constant(0.001, name='lambda_v')

l2_norm = tf.reduce_sum(
                    tf.add(
                        tf.multiply(lambda_w, tf.pow(w,2)),
                        tf.multiply(lambda_v, tf.pow(v,2))))
error = tf.reduce_mean(tf.square(tf.subtract(y,y_hat)))
loss = tf.add(error,l2_norm)

# Optimization
用SGD进行优化: $\Theta_{i+1} = \Theta_{i} - \eta \frac{\delta L}{\delta \Theta}$

In [22]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

# Mini-batcher

In [23]:
def batcher(X_,y_=None,batch_size=-1):
    n_samples = X_.shape[0]
    if batch_size == -1:
        batch_size = n_samples
    if batch_size < 1:
        raise ValueError('Parameter batch_size={} is unsupported'.format(batch_size))
        
    for i in range(0,n_samples,batch_size):
        upper_bound = min(i + batch_size,n_samples)
        ret_x = X_[i:upper_bound]
        ret_y = None
        if y_ is not None:
            ret_y = y_[i:i + batch_size]
            yield (ret_x,ret_y)

# Tensorflow graph and traing

In [28]:
from tqdm import tqdm_notebook as tqdm

epochs = 10
batch_size = 1000

# tf graph
init = tf.global_variables_initializer()
sess = tf.Session()

sess.run(init)

for epochs in tqdm(range(epochs),unit='epoch'):
    perm = np.random.permutation(x_train.shape[0])
    # iterate over batches
    for bX,bY in batcher(x_train[perm],y_train[perm],batch_size):
        sess.run(optimizer, feed_dict={x: bX.reshape(-1,p), y: bY.reshape(-1,1)})

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))




# 评价模型
用RMSE评价

In [30]:
errors = []
for bX,bY in batcher(x_test,y_test):
    errors.append(sess.run(error,feed_dict={x: bX.reshape(-1,p), y: bY.reshape(-1,1)}))

RMSE = np.sqrt(np.array(errors))
print(RMSE)

[1.1257708]
