/
base.py
94 lines (78 loc) · 3.28 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from . import logger
import tensorflow as tf
def l1_norm(V, W, lambda_=0.001):
return tf.reduce_sum(
tf.add(tf.multiply(lambda_, tf.abs(W)), tf.multiply(lambda_, tf.abs(V)))
)
def l2_norm(V, W, lambda_=0.001):
return tf.reduce_sum(
tf.add(tf.multiply(lambda_, tf.pow(W, 2)), tf.multiply(lambda_, tf.pow(V, 2)))
)
def noop_norm(V, W, lambda_=None):
return 0
def fm(X, w0, W, V):
linear_terms = X * W
interactions = tf.subtract(
tf.pow(tf.tensordot(X, tf.transpose(V), 1), 2),
tf.tensordot(tf.pow(X, 2), tf.transpose(tf.pow(V, 2)), 1),
)
if X.ndim > 1:
linear_terms = tf.reduce_sum(linear_terms, 1, keepdims=True)
interactions = tf.reduce_sum(interactions, 1, keepdims=True)
else:
# One dimensional data: e.g. passed when we call fm() for inference
linear_terms = tf.reduce_sum(linear_terms)
interactions = tf.reduce_sum(interactions)
return w0 + linear_terms + 0.5 * interactions
def train(
train_dataset,
num_factors=2,
max_iter=10,
penalty=None,
C=1.0,
loss=None,
optimizer=None,
random_state=None,
dtype=tf.float32,
):
"""Fit a degree 2 polynomial factorization machine, implemented atop Tensorflow 2.
This class contains the generic code to train a Factorazione Machine. Regressors and classifiers can be learnt
by minimizing appropriate loss functions (e.g. MSE or cross entropy).
:param train_dataset: an instance of tensorflow.data.Dataset that contains training data.
:param num_factors: number of latent factor vectors.
:param max_iter: iterations to convergence.
:param penalty: regularization (l1, l2 or None). Default l2.
:param C: inverse of regularization strength.
:param loss: a tensorflow.keras.losses object (e.g. MSE, binary_crossentropy).
:param optimizer: a tensorflow.keras.optimizers object (e.g. tf.keras.optimizers.Adam).
:param random_state: int, random state.
:param dtype: train_dataset types. Default float32.
:returns w0, W, V: tensorflow.Variable instances for bias, weights and interaction factors.
"""
tf.random.set_seed(random_state)
if C < 0:
raise ValueError(f"Inverse regularization term must be positive; got (C={C})")
if max_iter < 1:
raise ValueError(f"max_iter must be > zero. Got {max_iter}")
if num_factors < 1:
raise ValueError(f"num_factors must be >= 1. Got {num_factors}")
# Get the number of feature columns
p = train_dataset.element_spec[0].shape[1]
# bias and weights
w0 = tf.Variable(tf.zeros([1], dtype=dtype))
W = tf.Variable(tf.zeros([p], dtype=dtype))
# interaction factors, randomly initialized
V = tf.Variable(
tf.random.normal(
[num_factors, p], mean=0.0, stddev=0.01, dtype=dtype, seed=random_state
)
)
for epoch_count in range(max_iter):
for batch, (x, y) in enumerate(train_dataset):
with tf.GradientTape() as tape:
pred = fm(x, w0, W, V)
loss_ = loss(y, pred) + penalty(V, W, lambda_=1.0 / C)
grads = tape.gradient(loss_, [w0, W, V])
optimizer.apply_gradients(zip(grads, [w0, W, V]))
logger.debug(f"Epoch: {epoch_count}, batch: {batch} loss:, {loss_.numpy()}")
return w0, W, V