In [44]:
import tensorflow as tf
from pathlib import Path
import numpy as np

batch_size = 960
hist_size = 30
data_dict = {}
feed_dict = {}
batch_idx = 0
feature_size = 1048573
epoch = 25

def data_set(data_dict, feature, string):
        if string not in data_dict:
             data_dict[string] =[[feature]]
        else:
             if(len(data_dict[string]) < batch_idx + 1):
                 data_dict[string].append([feature])
             else:
                 data_dict[string][batch_idx].append(feature)

def input_data_set(data_dict, features, prefix=""):
    for feature in features:
        feature = feature.split(":")
        feature = int(feature[0])
        group_id = feature >> 48
        feature = feature % feature_size 
        data_set(data_dict, feature, prefix+str(group_id))

def input_hist_data_set(data_dict, hist_features, hist_group_ids, pos_group_ids, hist_size, prefix=""):
    hist_len = len(hist_features)
    if hist_features[0] == '\n' or hist_features[0] == '' or hist_features[0] == ' ':
          hist_len = 0
    for i in range(0, hist_size):
        if i < hist_len:
            features = hist_features[i].split()
            for feature in features:
                 feature = feature.split(":")
                 feature = int(feature[0])
                 group_id = feature >> 48
                 feature = feature % feature_size
                 if group_id in pos_group_ids:
                       data_set(data_dict, feature, prefix+"position_"+str(i)+"_"+str(group_id))
                 else:
                       data_set(data_dict, feature, prefix+str(i)+"_"+str(group_id))
        else:
            for group_id in hist_group_ids:
                 data_set(data_dict, 0, prefix+str(i)+"_"+str(group_id))
            for group_id in pos_group_ids:
                 data_set(data_dict, 0, prefix+"position_"+str(i)+"_"+str(group_id))
             
    if prefix+"histLen" not in data_dict:
            data_dict[prefix+"histLen"] = [hist_len]
    else:
            data_dict[prefix+"histLen"].append(hist_len)

def data_dict_sparse_feature(data_dict, string):
    index, value = [], []
    for i in range(batch_size):
           for k in range(len(data_dict[string][i])):
                index.append(np.array([i, k], dtype = np.int64))
                value.append(data_dict[string][i][k])
    iv = tf.sparse.SparseTensor(index, value, [len(data_dict[string]), feature_size])
    data_dict[string] = iv


def train_data_process(data, main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, pos_group_ids):
    global data_dict, feed_dict, batch_idx, batch_size
    data = data.split('\t')
    label = float(data[0])
    weight = float(data[1])
    features = data[2].split('|')
    main_features = features[0].split()
    candidate_features = features[1].split()
    clicked_features = features[2].split(';')
    unclick_features = features[3].split(';')
    feedback_features = features[4].split(';')
    if "label" not in data_dict:
        data_dict["label"] = [label]
    else:
        data_dict["label"].append(label)
    
    if "weight" not in data_dict:
        data_dict["weight"] = [weight]
    else:
        data_dict["weight"].append(weight)
    
    input_data_set(data_dict, main_features, "main_")
    input_data_set(data_dict, candidate_features, "candidate_")
    input_hist_data_set(data_dict, clicked_features, clicked_group_ids, pos_group_ids, hist_size, "clicked_")
    input_hist_data_set(data_dict, unclick_features, unclick_group_ids, pos_group_ids, hist_size, "unclick_")
    input_hist_data_set(data_dict, feedback_features, feedback_group_ids, pos_group_ids, hist_size, "feedback_")


def data_gen(path):
    global batch_idx, batch_size, main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, pos_group_ids
    while True:
        f = path.open(mode='r')
        line = f.readline()
        while line:
            train_data_process(line, main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, pos_group_ids)
            if batch_idx < batch_size -1: 
                batch_idx += 1
            else:
                for group_id in main_group_ids:
                    data_name = "main_" + str(group_id)
                    data_dict_sparse_feature(data_dict, data_name)
                for group_id in candidate_group_ids:
                    data_name = "candidate_" + str(group_id)
                    data_dict_sparse_feature(data_dict, data_name)
                for i in range(hist_size):
                    for group_id in clicked_group_ids:
                        data_name = "clicked_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name) 
                    for group_id in unclick_group_ids:
                        data_name = "unclick_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name) 
                    for group_id in feedback_group_ids:
                        data_name = "feedback_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name)
                    for group_id in pos_group_ids:   
                        data_name = "clicked_position_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name)
                        data_name = "unclick_position_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name)
                        data_name = "feedback_position_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name)
                data_input = {k: v for k, v in data_dict.items() if k != "label" and k != "weight"}
                yield (data_input, data_dict["label"], data_dict["weight"])
            line = f.readline()
        f.close()

In [45]:
main_group_ids=[16,10001,10002,10003,21,10006,10019,10034,20147,20148,10035,20156,61,10047,10048,10049,10050,10055,10056,60, 46, 48, 50, 122]
candidate_group_ids=[3060,3061,3062,3063,3064]
clicked_group_ids=[3060,3061,3062,3063,3064]
unclick_group_ids=[3060,3061,3062,3063,3064]
feedback_group_ids=[3060,3061,3063,3064]
pos_group_ids=[3065]

path = Path("/Volumes/D/guohao/resys/dfn/example")
a = next(data_gen(path,))

FileNotFoundError: [Errno 2] No such file or directory: '\\Volumes\\D\\guohao\\resys\\dfn\\example'

In [16]:
list(a[0].keys())

['main_16',
 'main_21',
 'main_60',
 'main_61',
 'main_20147',
 'main_46',
 'main_48',
 'main_50',
 'main_122',
 'main_10002',
 'main_10001',
 'main_10003',
 'main_10006',
 'main_10055',
 'main_10056',
 'main_10048',
 'main_20156',
 'main_10050',
 'main_10034',
 'main_10035',
 'main_10019',
 'main_20148',
 'main_10033',
 'main_10065',
 'main_10049',
 'main_10047',
 'candidate_3060',
 'candidate_3061',
 'candidate_3062',
 'candidate_3063',
 'candidate_3064',
 'candidate_3065',
 'clicked_0_3060',
 'clicked_0_3061',
 'clicked_0_3062',
 'clicked_0_3063',
 'clicked_0_3064',
 'clicked_position_0_3065',
 'clicked_1_3060',
 'clicked_1_3061',
 'clicked_1_3062',
 'clicked_1_3063',
 'clicked_1_3064',
 'clicked_position_1_3065',
 'clicked_2_3060',
 'clicked_2_3061',
 'clicked_2_3062',
 'clicked_2_3063',
 'clicked_2_3064',
 'clicked_position_2_3065',
 'clicked_3_3060',
 'clicked_3_3061',
 'clicked_3_3062',
 'clicked_3_3063',
 'clicked_3_3064',
 'clicked_position_3_3065',
 'clicked_4_3060',
 'clicke

In [21]:
tf.sparse.to_dense(a[0]["clicked_position_17_3065"])

<tf.Tensor: shape=(256, 1048573), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>

In [11]:
b = tf.sparse.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])

In [25]:
attQ_w = tf.Variable(tf.keras.initializers.GlorotNormal()(shape=[100, 4]), name="a" + "attQ_w" + str(1), dtype=tf.float32)

In [110]:
import scipy as sp

In [46]:
import tensorflow as tf
from pathlib import Path
import numpy as np
import scipy as sp

batch_size = 960
hist_size = 30
data_dict = {}
feed_dict = {}
batch_idx = 0
feature_size = 1048573
epoch = 25

def data_set(data_dict, feature, string):
        if string not in data_dict:
             data_dict[string] =[[feature]]
        else:
             if(len(data_dict[string]) < batch_idx + 1):
                 data_dict[string].append([feature])
             else:
                 data_dict[string][batch_idx].append(feature)

def input_data_set(data_dict, features, prefix=""):
    global main_group_ids, candidate_group_ids
    for feature in features:
        feature = feature.split(":")
        feature = int(feature[0])
        group_id = feature >> 48
        feature = feature % feature_size
        if prefix == "main_":
            if group_id not in main_group_ids:
                continue
        elif prefix == "candidate_":
            if group_id not in candidate_group_ids:
                continue
        data_set(data_dict, feature, prefix+str(group_id))

def input_hist_data_set(data_dict, hist_features, hist_group_ids, pos_group_ids, hist_size, prefix=""):
    hist_len = len(hist_features)
    if hist_features[0] == '\n' or hist_features[0] == '' or hist_features[0] == ' ':
          hist_len = 0
    for i in range(0, hist_size):
        if i < hist_len:
            features = hist_features[i].split()
            for feature in features:
                 feature = feature.split(":")
                 feature = int(feature[0])
                 group_id = feature >> 48
                 feature = feature % feature_size
                 if group_id in pos_group_ids:
                       data_set(data_dict, feature, prefix+"position_"+str(i)+"_"+str(group_id))
                 else:
                       data_set(data_dict, feature, prefix+str(i)+"_"+str(group_id))
        else:
            for group_id in hist_group_ids:
                 data_set(data_dict, 0, prefix+str(i)+"_"+str(group_id))
            for group_id in pos_group_ids:
                 data_set(data_dict, 0, prefix+"position_"+str(i)+"_"+str(group_id))
             
    if prefix+"histLen" not in data_dict:
            data_dict[prefix+"histLen"] = [hist_len]
    else:
            data_dict[prefix+"histLen"].append(hist_len)

def data_dict_sparse_feature(data_dict, string, dtype):
    index, value = [], []
#     rows, cols, value = [], [], []
    for i in range(batch_size):
           for k in range(len(data_dict[string][i])):
#                 rows.append(i)
#                 cols.append(k)
                index.append(np.array([i, k], dtype = np.int32))
                value.append(data_dict[string][i][k])
#     iv = sp.sparse.coo_matrix((value, (rows, cols)), shape=[len(data_dict[string]), feature_size])
#     if dtype == tf.int32:
#         iv = iv.astype(np.int32)
#     elif dtype == tf.float32:
#         iv = iv.astype(np.float32)
    iv = tf.sparse.SparseTensor(index, value, [len(data_dict[string]), feature_size])
    iv = tf.cast(iv, dtype=dtype)
    data_dict[string] = iv


def train_data_process(data, data_dict, main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, pos_group_ids):
    data = data.split('\t')
    label = float(data[0])
    weight = float(data[1])
    features = data[2].split('|')
    main_features = features[0].split()
    candidate_features = features[1].split()
    clicked_features = features[2].split(';')
    unclick_features = features[3].split(';')
    feedback_features = features[4].split(';')
    if "label" not in data_dict:
        data_dict["label"] = [label]
    else:
        data_dict["label"].append(label)
    
    if "weight" not in data_dict:
        data_dict["weight"] = [weight]
    else:
        data_dict["weight"].append(weight)
    
    input_data_set(data_dict, main_features, "main_")
    input_data_set(data_dict, candidate_features, "candidate_")
    input_hist_data_set(data_dict, clicked_features, clicked_group_ids, pos_group_ids, hist_size, "clicked_")
    input_hist_data_set(data_dict, unclick_features, unclick_group_ids, pos_group_ids, hist_size, "unclick_")
    input_hist_data_set(data_dict, feedback_features, feedback_group_ids, pos_group_ids, hist_size, "feedback_")


def data_gen(path):
    global batch_idx, data_dict, batch_size, main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, pos_group_ids
    while True:
        f = path.open(mode='r')
        line = f.readline()
        while line:
            train_data_process(line, data_dict, main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, pos_group_ids)
            if batch_idx < batch_size -1: 
                batch_idx += 1
            else:
                for group_id in main_group_ids:
                    data_name = "main_" + str(group_id)
                    data_dict_sparse_feature(data_dict, data_name, tf.int32)
                for group_id in candidate_group_ids:
                    data_name = "candidate_" + str(group_id)
                    data_dict_sparse_feature(data_dict, data_name, tf.int32)
                for i in range(hist_size):
                    for group_id in clicked_group_ids:
                        data_name = "clicked_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name, tf.int32) 
                    for group_id in unclick_group_ids:
                        data_name = "unclick_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name, tf.int32) 
                    for group_id in feedback_group_ids:
                        data_name = "feedback_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name, tf.int32)
                    for group_id in pos_group_ids:   
                        data_name = "clicked_position_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name, tf.int32)
                        data_name = "unclick_position_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name, tf.int32)
                        data_name = "feedback_position_" + str(i) + "_" + str(group_id)
                        data_dict_sparse_feature(data_dict, data_name, tf.int32)
                data_dict["clicked_histLen"] = tf.convert_to_tensor(data_dict["clicked_histLen"], dtype=tf.float32)
                data_dict["unclick_histLen"] = tf.convert_to_tensor(data_dict["unclick_histLen"], dtype=tf.float32)
                data_dict["feedback_histLen"] = tf.convert_to_tensor(data_dict["feedback_histLen"], dtype=tf.float32)
                data_dict["label"] = tf.convert_to_tensor(data_dict["label"], dtype=tf.float32)
                data_dict["weight"] = tf.convert_to_tensor(data_dict["weight"], dtype=tf.float32)
                data_input = {k: v for k, v in data_dict.items() if k != "label" and k != "weight"}
                labels = data_dict["label"]
                weights = data_dict["weight"]
                batch_idx = 0
                data_dict = {}
                yield (data_input, labels, weights)
            line = f.readline()
        f.close()

In [47]:
main_group_ids=[16,10001,10002,10003,21,10006,10019,10034,20147,20148,10035,20156,61,10047,10048,10049,10050,10055,10056,60]
candidate_group_ids=[3060,3061,3062,3063,3064]
clicked_group_ids=[3060,3061,3062,3063,3064]
unclick_group_ids=[3060,3061,3062,3063,3064]
feedback_group_ids=[3060,3061,3063,3064]
pos_group_ids=[3065]
path = Path(r"E:\ML_study\deepctr\dfn_tf2\example")
# a = next(data_gen(path))

In [48]:
train_data, train_label, sample_weight = next(data_gen(path))

In [18]:
# build input
from collections import OrderedDict
group_feature = OrderedDict()
for group_id in candidate_group_ids:
    group_feature["candidate_" + str(group_id)] = tf.keras.layers.Input(shape=(feature_size, ), dtype=tf.int32, sparse=True, name=("candidate_" + str(group_id)))    

In [19]:
class Embedding_Lookup(tf.keras.layers.Layer):
    def __init__(self, feature_size, **kwargs):
        super().__init__(**kwargs)
        self.feature_size = feature_size
        
    def build(self, input_shape):
        super().build(input_shape)
        self.embedding_w = self.add_weight(name="embedding_w", shape=(self.feature_size, 16), 
                                           initializer=tf.keras.initializers.TruncatedNormal(mean=0., stddev=0.01),
                                          )
        
    def call(self, inputs, **kwargs):
        embedding = tf.nn.embedding_lookup_sparse(self.embedding_w, inputs, sp_weights=None, combiner='mean')
        return embedding

In [20]:
candidate_embeddings = []
embed_layer = Embedding_Lookup(feature_size)
for group_id in candidate_group_ids:
    embedding = embed_layer(group_feature["candidate_" + str(group_id)])
    candidate_embeddings.append(embedding)
candidate_embedding = tf.concat(candidate_embeddings, axis=1)

In [21]:
# def embedding_lookup(embedding_w, group_ids, prefix=""):
#     embeddings = []
#     for group_id in group_ids:
#         embedding = tf.nn.embedding_lookup_sparse(embedding_w, group_feature[prefix + str(group_id)], sp_weights=None, combiner='mean')
#         embeddings.append(embedding)
#     embedding_out = tf.concat(embeddings, axis=1)
#     return embedding_out

In [22]:
# init_w = tf.keras.initializers.TruncatedNormal(mean=0., stddev=0.01)
# embed_w = tf.Variable(init_w(shape=[feature_size, 16]), name='embedding_w', dtype=tf.float32)

# # batch_size, len(main_group_ids) * embed_dim, 相同field之间的特征求mean
# candidate_embedding = embedding_lookup(embed_w, candidate_group_ids, prefix="candidate_")

In [23]:
dense = tf.keras.layers.Dense(1, activation="sigmoid")
output = dense(candidate_embedding)

In [32]:
class CustomModel(tf.keras.Model):
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        if len(data) == 3:
            x, y, sample_weight = data
        else:
            x, y = data

        y_pred = self(x, training=True)  # Forward pass
        # Compute the loss value.
        # The loss function is configured in `compile()`.
        loss = self.compiled_loss(
            y,
            y_pred,
            sample_weight=sample_weight,
            regularization_losses=self.losses,
        )

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tf.gradients(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics.
        # Metrics are configured in `compile()`.
        self.compiled_metrics.update_state(y, y_pred, sample_weight=sample_weight)

        # Return a dict mapping metric names to current value.
        # Note that it will include the loss (tracked in self.metrics).
        return {m.name: m.result() for m in self.metrics}

In [33]:
model = CustomModel(group_feature, output)

In [34]:
# model = tf.keras.models.Model(group_feature, output)

In [35]:
model.compile(tf.keras.optimizers.Adagrad(), "binary_crossentropy",
              metrics=['binary_crossentropy'], )
history = model.fit(train_data, train_label, epochs=25, batch_size=16, shuffle=False, sample_weight=sample_weight)

Train on 256 samples
Epoch 1/25
 16/256 [>.............................] - ETA: 3s

SystemError: <built-in function len> returned a result with an error set

In [258]:
dataset = tf.data.TextLineDataset([r"E:\ML_study\deepctr\dfn_tf2\example"])

In [None]:
def input_data_set(data_dict, features, prefix=""):
    global main_group_ids
    for feature in features:
        feature = feature.split(":")
        feature = int(feature[0])
        group_id = feature >> 48
        feature = feature % feature_size
        if prefix == "main_":
            if group_id not in main_group_ids:
                continue             
        data_dict[prefix+str(group_id)] =[feature]
        index, value = [], []
        for k in range(len(data_dict[string][0])):
    #                 rows.append(i)
    #                 cols.append(k)
                    index.append(np.array([i, k], dtype = np.int32))
                    value.append(data_dict[string][i][k])
    #     iv = sp.sparse.coo_matrix((value, (rows, cols)), shape=[len(data_dict[string]), feature_size])
    #     if dtype == tf.int32:
    #         iv = iv.astype(np.int32)
    #     elif dtype == tf.float32:
    #         iv = iv.astype(np.float32)
        iv = tf.sparse.SparseTensor(index, value, [len(data_dict[string]), feature_size])
        iv = tf.cast(iv, dtype=dtype)
        data_dict[string] = iv
        
def train_data_process(data, data_dict, main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, pos_group_ids):
    data_dict = {}
    data = tf.strings.split(data, "\t")
    label = tf.cast(data[0], dtype=tf.float32)
    weight = tf.cast(data[1], dtype=tf.float32)
    features = tf.strings.split(data[2], "|")
    main_features = tf.strings.split(features[0], " ")
    candidate_features = tf.strings.split(features[1], " ")
    clicked_features = tf.strings.split(features[2], ";")
    unclick_features = tf.strings.split(features[3], ";")
    feedback_features = tf.strings.split(features[4], ";")
    data_dict["label"] = [label]
    
    data_dict["weight"] = [weight]
    
    input_data_set(data_dict, main_features, "main_")
    input_data_set(data_dict, candidate_features, "candidate_")
    input_hist_data_set(data_dict, clicked_features, clicked_group_ids, pos_group_ids, hist_size, "clicked_")
    input_hist_data_set(data_dict, unclick_features, unclick_group_ids, pos_group_ids, hist_size, "unclick_")
    input_hist_data_set(data_dict, feedback_features, feedback_group_ids, pos_group_ids, hist_size, "feedback_")

In [259]:
for line in dataset.take(5):
    print(line.numpy())

b'0\t1.0\t4597697815101895:1.0 6016574026494338:1.0 17027853178969981:1.0 17195487356851262:1.0 5671018667602395793:1.0 13070185260218823:1.0 13042712529216655:1.0 13003510089913431:1.0 13101372349892736:1.0 13102308708812472:1.0 13681236340273332:1.0 13720715262011759:1.0 13742524213838863:1.0 13573771682066531:1.0 13527563666807508:1.0 14335342827392411:1.0 14213538197159935:1.0 14230815525973305:1.0 14250523910490838:1.0 14309043682027648:1.0 34505104869198441:1.0 34606920073409687:1.0 34369972135808970:1.0 34432175036929139:1.0 34574397145101198:1.0 2815540589333415558:1.0 2815310558040711643:1.0 2815845193123668893:1.0 2816472454912085528:1.0 2830453274016811363:1.0 2830604277019819904:1.0 2828345537882128037:1.0 5673538572898786380:1.0 2829095378905735861:1.0 2824466694240443953:1.0 2824658891131706915:1.0 2820179654887781996:1.0 2820283407214145740:1.0 2820107523379246349:1.0 2820294557960302561:1.0 2820246555415114990:1.0 2820232643767827587:1.0 2820182182810824006:1.0 28203488

In [118]:
a.dtype

dtype('float32')

In [55]:
# -*- coding:utf-8 -*-
import tensorflow as tf
import numpy as np
from collections import OrderedDict

class Sequence_Embedding(tf.keras.layers.Layer):
    def __init__(self, clicked_item_dim, pos_item_dim, unclick_item_dim, feedback_item_dim, item_dim,
                 initializers=tf.keras.initializers.GlorotNormal(), **kwargs):
        super().__init__(**kwargs)
        self.clicked_item_dim = clicked_item_dim
        self.pos_item_dim = pos_item_dim
        self.unclick_item_dim = unclick_item_dim
        self.feedback_item_dim = feedback_item_dim
        self.item_dim = item_dim

    def build(self, input_shape):
        super().build(input_shape)
        self.pos_w_clicked = self.add_weight(name="pos_w_clicked", shape=(self.clicked_item_dim + self.pos_item_dim, self.item_dim), 
                                             initializer=tf.keras.initializers.GlorotNormal(),
                                             dtype=tf.float32)
        self.pos_w_unclick = self.add_weight(name="pos_w_unclick", shape=(self.unclick_item_dim + self.pos_item_dim, self.item_dim), 
                                             initializer=tf.keras.initializers.GlorotNormal(),
                                             dtype=tf.float32)
        self.pos_w_feedback = self.add_weight(name="pos_w_feedback", shape=(self.feedback_item_dim + self.pos_item_dim, self.item_dim), 
                                             initializer=tf.keras.initializers.GlorotNormal(),
                                             dtype=tf.float32)

    def call(self, inputs, **kwargs):
        clicked_z = tf.matmul(inputs[0], self.pos_w_clicked)
        unclick_z = tf.matmul(inputs[1], self.pos_w_unclick)
        feedback_z = tf.matmul(inputs[2], self.pos_w_feedback)
        return clicked_z, unclick_z, feedback_z

class Embedding_Lookup(tf.keras.layers.Layer):
    def __init__(self, feature_size, embed_dim, initializers=tf.keras.initializers.GlorotNormal(), **kwargs):
        super().__init__(**kwargs)
        self.feature_size = feature_size
        self.embed_dim = embed_dim
        self.initializers = initializers
        
    def build(self, input_shape):
        super().build(input_shape)
        self.embedding_w = self.add_weight(name="embedding_w", shape=(self.feature_size, self.embed_dim), 
                                           initializer=self.initializers,
                                          )
        
    def call(self, inputs, **kwargs):
        embedding = tf.nn.embedding_lookup_sparse(self.embedding_w, inputs, sp_weights=None, combiner='mean')
        return embedding

class Transformer(tf.keras.layers.Layer):
    def __init__(self, hist_size, hist_embedding_dim, initializers=tf.keras.initializers.GlorotNormal(), **kwargs):
        super().__init__(**kwargs)
        self.hist_size = hist_size
        self.hist_embedding_dim = hist_embedding_dim
        self.initializers = initializers

    def call(self, inputs, prefix="", **kwargs):
        candidate_embedding, hist_embeddings, hisLens = inputs
        hist_size = self.hist_size + 1
        hist_z = [candidate_embedding]
        for i in range(0,len(hist_embeddings)):
            hist_z.append(hist_embeddings[i])
        hist_z_all = tf.stack(hist_z, axis=1) #(batch, hist_size, hist_embedding_dim)
        
        headnum = 4
        mutil_head_att = []

        #attention
        for i in range(0, headnum):
            self.attQ_w = self.add_weight(name=prefix + "attQ_w" + str(i), shape=(self.hist_embedding_dim, int(self.hist_embedding_dim / headnum)), 
                                          initializer=self.initializers, dtype=tf.float32)
            self.attK_w = self.add_weight(name=prefix + "attK_w" + str(i), shape=(self.hist_embedding_dim, int(self.hist_embedding_dim / headnum)), 
                                          initializer=self.initializers, dtype=tf.float32)
            self.attV_w = self.add_weight(name=prefix + "attV_w" + str(i), shape=(self.hist_embedding_dim, int(self.hist_embedding_dim / headnum)), 
                                          initializer=self.initializers, dtype=tf.float32)
            
            attQ = tf.tensordot(hist_z_all, self.attQ_w, axes=1) #(batch, hist_size, hist_embedding_dim/headnum)
            attK = tf.tensordot(hist_z_all, self.attK_w, axes=1) #(batch, hist_size, hist_embedding_dim/headnum)
            attV = tf.tensordot(hist_z_all, self.attV_w, axes=1) #(batch, hist_size, hist_embedding_dim/headnum)
            
            attQK = tf.matmul(attQ, attK, transpose_b=True) #(batch, hist_size, hist_size)

            #scale
            attQK_scale = attQK / (hist_embedding_dim ** 0.5)
            padding = tf.ones_like(attQK_scale) * (-2 ** 32 + 1) #(batch, hist_size, hist_size)

            #mask
            key_masks = tf.sequence_mask(hisLens + 1, hist_size)  # (batch, hist_size)
            key_masks_new = tf.reshape(key_masks, [-1, 1, hist_size])
            key_masks_tile = tf.tile(key_masks_new, [1, hist_size, 1]) #(batch, hist_size, hist_size)
            key_masks_cast = tf.cast(key_masks_tile, dtype=tf.float32)
            outputs_QK = tf.where(key_masks_tile, attQK_scale, padding) #(batch, hist_size, hist_size)

            #norm
            outputs_QK_norm = tf.nn.softmax(outputs_QK) #(batch, hist_size, hist_size)

            #query mask
            outputs_QK_q = tf.multiply(outputs_QK_norm, key_masks_cast) #(batch, hist_size, hist_size)
            # weighted sum
            outputs_QKV_head = tf.matmul(outputs_QK_q, attV) #(batch, hist_size, hist_embedding_dim/headnum)
            mutil_head_att.append(outputs_QKV_head)

        outputs_QKV = tf.concat(mutil_head_att, axis=2) # (batch, hist_size, hist_embedding_dim)
        #FFN
        self.FFN_w0 = self.add_weight(name=prefix + 'FFN_w0', shape=(self.hist_embedding_dim, self.hist_embedding_dim * 4), 
                                      initializer=self.initializers, dtype=tf.float32)
        self.FFN_b0 = self.add_weight(name=prefix + 'FFN_b0', shape=(self.hist_embedding_dim * 4), 
                                      initializer=self.initializers, dtype=tf.float32)

        self.FFN_w1 = self.add_weight(name=prefix + 'FFN_w1', shape=(self.hist_embedding_dim, self.hist_embedding_dim * 4), 
                                      initializer=self.initializers, dtype=tf.float32)
        self.FFN_b1 = self.add_weight(name=prefix + 'FFN_b1', shape=(self.hist_embedding_dim * 4), 
                                      initializer=self.initializers, dtype=tf.float32)
        
        TH0 = tf.tensordot(outputs_QKV, self.FFN_w0, axes=1) + self.FFN_b0 #(batch, hist_size, hist_embedding_dim * 4)
        TZ0 = tf.nn.relu(TH0)
        TH1 = tf.tensordot(TZ0, self.FFN_w1, axes=1) + self.FFN_b1
        # average pool
        return tf.reduce_sum(TH1, axis=1) #(batch, hist_embedding_dim)

class Attention(tf.keras.layers.Layer):
    def __init__(self, hist_size, hist_embedding_dim, initializers=tf.keras.initializers.GlorotNormal(), **kwargs):
        super().__init__(**kwargs)
        self.hist_size = hist_size
        self.hist_embedding_dim = hist_embedding_dim
        self.initializers = initializers

    def call(self, inputs, prefix="", **kwargs):
        candidate_embedding, hist_embeddings, hisLens = inputs
        attention_hidden_ = 32
        self.attW1 = self.add_weight(name=prefix + "attention_hidden_w1", shape=(self.hist_embedding_dim * 4, attention_hidden_), 
                                     initializer=self.initializers, dtype=tf.float32)
        self.attB1 = self.add_weight(name=prefix + "attention_hidden_b1", shape=(attention_hidden_), 
                                     initializer=tf.keras.initializers.Zeros(), dtype=tf.float32)

        self.attW2 = self.add_weight(name=prefix + "attention_hidden_w2", shape=(attention_hidden_, 1), 
                                     initializer=self.initializers, dtype=tf.float32)
        self.attB2 = self.add_weight(name=prefix + "attention_hidden_b2", shape=(1), 
                                     initializer=tf.keras.initializers.Zeros(), dtype=tf.float32)
        
        hist_embedding_list=[]
        for i in range(0, self.hist_size):
            # batch, hist_embedding_dim * 4
            z1 = tf.concat([candidate_embedding, hist_embeddings[i], candidate_embedding * hist_embeddings[i], candidate_embedding-hist_embeddings[i]], axis=1)
            hist_embedding_list.append(z1)
        hist_z_all = tf.stack(hist_embeddings, axis=1) #(batch, hist_size, hist_embedding_dim)
        z2 = tf.concat(hist_embedding_list, axis=1)  #(batch, hist_size * hist_embedding_dim * 4)
        z3 = tf.reshape(z2, [-1, self.hist_size, 4 * self.hist_embedding_dim])
        z4 = tf.tensordot(z3, self.attW1, axes=1) + self.attB1 #(batch , hist_size, attention_hidden_)
        z5 = tf.nn.relu(z4)
        z6 = tf.tensordot(z5, self.attW2, axes=1) + self.attB2 #(batch, hist_size, 1)
        att_w_all = tf.reshape(z6, [-1, self.hist_size])

        #mask
        hist_masks = tf.sequence_mask(hisLens, self.hist_size) #(batch, hist_size)
        padding = tf.ones_like(att_w_all) * (-2**32 + 1)
        att_w_all_rep = tf.where(hist_masks, att_w_all, padding)

        #scale
        att_w_all_scale = att_w_all_rep / (self.hist_embedding_dim ** 0.5)

        #norm
        att_w_all_norm = tf.nn.softmax(att_w_all_scale)

        att_w_all_mul = tf.reshape(att_w_all_norm, [-1, 1, self.hist_size])
        weighted_hist_all = tf.matmul(att_w_all_mul, hist_z_all) #(batch, 1, hist_embedding_dim)
        return tf.reshape(weighted_hist_all, [-1, self.hist_embedding_dim])

class DNN(tf.keras.layers.Layer):
    """The Multi Layer Percetron

      Input shape
        - nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``.

      Output shape
        - nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``.

      Arguments
        - **hidden_units**:list of positive integer, the layer number and units in each layer.

        - **activation**: Activation function to use.

        - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix.

        - **dropout_rate**: float in [0,1). Fraction of the units to dropout.

        - **use_bn**: bool. Whether use BatchNormalization before activation or not.

        - **seed**: A Python integer to use as random seed.
    """

    def __init__(self, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, **kwargs):
        super(DNN, self).__init__(**kwargs)
        self.hidden_units = hidden_units
        self.activation = activation
        self.dropout_rate = dropout_rate
        self.seed = seed
        self.l2_reg = l2_reg
        self.use_bn = use_bn
        self.activation_dict = {"relu": tf.nn.relu(),
                                "sigmoid": tf.nn.sigmoid(),
                                "tanh": tf.nn.tanh()}

    def build(self, input_shape):
        super(DNN, self).build(input_shape)  # Be sure to call this somewhere!
        # if len(self.hidden_units) == 0:
        #     raise ValueError("hidden_units is empty")
        input_size = input_shape[-1]
        hidden_units = [int(input_size)] + list(self.hidden_units)
        self.kernels = [self.add_weight(name='kernel' + str(i),
                                        shape=(
                                            hidden_units[i], hidden_units[i + 1]),
                                            initializer=tf.keras.initializers.GlorotNormal(
                                                seed=self.seed),
                                        regularizer=tf.keras.regularizers.L2(self.l2_reg),
                                        trainable=True) for i in range(len(self.hidden_units))]
        self.bias = [self.add_weight(name='bias' + str(i),
                                     shape=(self.hidden_units[i],),
                                     initializer=tf.keras.initializers.Zeros(),
                                     trainable=True) for i in range(len(self.hidden_units))]
        if self.use_bn:
            self.bn_layers = [tf.keras.layers.BatchNormalization() for _ in range(len(self.hidden_units))]

        self.dropout_layers = [tf.keras.layers.Dropout(self.dropout_rate, seed=self.seed + i) for i in
                               range(len(self.hidden_units))]

        self.activation_layers = [self.activation_dict[self.activation] for _ in range(len(self.hidden_units))]


    def call(self, inputs, training=None, **kwargs):

        deep_input = inputs

        for i in range(len(self.hidden_units)):
            fc = tf.nn.bias_add(tf.tensordot(
                deep_input, self.kernels[i], axes=(-1, 0)), self.bias[i])
            # fc = Dense(self.hidden_size[i], activation=None, \
            #           kernel_initializer=glorot_normal(seed=self.seed), \
            #           kernel_regularizer=l2(self.l2_reg))(deep_input)
            if self.use_bn:
                fc = self.bn_layers[i](fc, training=training)

            fc = self.activation_layers[i](fc)

            fc = self.dropout_layers[i](fc, training=training)
            deep_input = fc

        return deep_input

    def compute_output_shape(self, input_shape):
        if len(self.hidden_units) > 0:
            shape = input_shape[:-1] + (self.hidden_units[-1],)
        else:
            shape = input_shape

        return tuple(shape)

    def get_config(self, ):
        config = {'activation': self.activation, 'hidden_units': self.hidden_units,
                  'l2_reg': self.l2_reg, 'use_bn': self.use_bn, 'dropout_rate': self.dropout_rate, 'seed': self.seed}
        base_config = super(DNN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


class DFN():
    def __init__(self, main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, 
                 pos_group_ids, batch_size=256, embed_dim=16, feature_size=1048573, hist_size=30):
        self.main_group_ids = main_group_ids
        self.candidate_group_ids = candidate_group_ids
        self.clicked_group_ids = clicked_group_ids
        self.unclick_group_ids = unclick_group_ids
        self.feedback_group_ids = feedback_group_ids
        self.pos_group_ids = pos_group_ids
        self.batch_size = batch_size
        self.embed_dim = embed_dim
        self.feature_size = feature_size
        self.hist_size = hist_size
        self.group_feature = OrderedDict()
        self.clicked_item_dim = len(clicked_group_ids) * embed_dim
        self.unclick_item_dim = len(unclick_group_ids) * embed_dim
        self.feedback_item_dim = len(feedback_group_ids) * embed_dim
        self.item_dim = self.clicked_item_dim
        self.pos_item_dim = len(pos_group_ids) * embed_dim
        self._results = None
        # build input
        for group_id in main_group_ids:
          self.group_feature["main_" + str(group_id)] = tf.keras.layers.Input(shape=(self.feature_size, ), dtype=tf.int32, sparse=True, name=("main_" + str(group_id)))
        for group_id in candidate_group_ids:
          self.group_feature["candidate_" + str(group_id)] = tf.keras.layers.Input(shape=(self.feature_size, ), dtype=tf.int32, sparse=True, name=("candidate_" + str(group_id)))
 
        for i in range(0, hist_size):
            for group_id in clicked_group_ids:
                self.group_feature["clicked" + "_" + str(i) + "_" + str(group_id)] = tf.keras.layers.Input(shape=(self.feature_size, ), dtype=tf.int32, sparse=True, name=("clicked" + "_" + str(i) + "_" + str(group_id)))
            for group_id in unclick_group_ids:
                self.group_feature["unclick" + "_" + str(i) + "_" + str(group_id)] = tf.keras.layers.Input(shape=(self.feature_size, ), dtype=tf.int32, sparse=True, name=("unclick" + "_" + str(i) + "_" + str(group_id)))
            for group_id in feedback_group_ids:
                self.group_feature["feedback" + "_" + str(i) + "_" + str(group_id)] = tf.keras.layers.Input(shape=(self.feature_size, ), dtype=tf.int32, sparse=True, name=("feedback" + "_" + str(i)+"_"+str(group_id)))  
            for group_id in pos_group_ids:
                self.group_feature["clicked" + "_" + "position" + "_" + str(i) + "_" + str(group_id)] = tf.keras.layers.Input(shape=(self.feature_size, ), dtype=tf.int32, sparse=True, name=("clicked" + "_" + "position" + "_"+str(i) + "_"+str(group_id)))
                self.group_feature["unclick" + "_" + "position" + "_" + str(i) + "_" + str(group_id)] = tf.keras.layers.Input(shape=(self.feature_size, ), dtype=tf.int32, sparse=True, name=("unclick" + "_" + "position" + "_"+str(i) + "_" + str(group_id)))
                self.group_feature["feedback" + "_" + "position" + "_" + str(i) + "_" + str(group_id)] = tf.keras.layers.Input(shape=(self.feature_size, ), dtype=tf.int32, sparse=True, name=("feedback" + "_" + "position" + "_"+str(i) + "_" + str(group_id)))
        self.group_feature["clicked_histLen"] = tf.keras.layers.Input(shape=(1, ), dtype=tf.float32, name=("clicked_histLen"))
        self.group_feature["unclick_histLen"] = tf.keras.layers.Input(shape=(1, ), dtype=tf.float32, name=("unclick_histLen"))
        self.group_feature["feedback_histLen"] = tf.keras.layers.Input(shape=(1, ), dtype=tf.float32, name=("feedback_histLen"))

    def embedding_lookup(self, group_ids, prefix=""):
        embeddings = []
        for group_id in group_ids:
            embedding = self.embed_layer(self.group_feature[prefix + str(group_id)])
            embeddings.append(embedding)
        embedding_out = tf.concat(embeddings, axis=1)
        return embedding_out

    def __call__(self,):
        clicked_embeddings = []
        unclick_embeddings = []
        feedback_embeddings = []
        self.embed_layer = Embedding_Lookup(self.feature_size, self.embed_dim,
                                            tf.keras.initializers.TruncatedNormal(mean=0., stddev=0.01),
                                            name="embedding_w")
        # batch_size, len(main_group_ids) * embed_dim, 相同field之间的特征求mean
        main_embedding = self.embedding_lookup(self.main_group_ids, prefix="main_")

        # batch_size, len(candidate_group_ids) * embed_dim, 相同field之间的特征求mean
        candidate_embedding = self.embedding_lookup(self.candidate_group_ids, prefix="candidate_")

        seq_emb = Sequence_Embedding(self.clicked_item_dim, self.pos_item_dim, self.unclick_item_dim, self.feedback_item_dim, self.item_dim)
        for i in range(0, self.hist_size):
            # 一个用户不用field都有30长的序列，对于用户序列，每个序号求相同field的mean
            # batch_size, len(clicked_group_ids) * embed_dim
            self.embedding_lookup(self.main_group_ids, prefix="main_")
            clicked_embedding = self.embedding_lookup(self.clicked_group_ids, prefix = "clicked" + "_" + str(i) + "_")
            unclick_embedding = self.embedding_lookup(self.unclick_group_ids, prefix="unclick" + "_" + str(i) + "_")
            feedback_embedding = self.embedding_lookup(self.feedback_group_ids, prefix="feedback" + "_" + str(i) + "_")
            clicked_position_embedding = self.embedding_lookup(self.pos_group_ids, prefix="clicked" + "_" + "position" + "_" + str(i) + "_")
            unclick_position_embedding = self.embedding_lookup(self.pos_group_ids, prefix="unclick" + "_" + "position" + "_" + str(i) + "_")
            feedback_position_embedding = self.embedding_lookup(self.pos_group_ids, prefix="feedback" + "_" + "position" + "_" + str(i) + "_")
            # 位置信息concat
            clicked_pos = tf.concat([clicked_embedding, clicked_position_embedding], axis=1)
            unclick_pos = tf.concat([unclick_embedding, unclick_position_embedding], axis=1)
            feedback_pos = tf.concat([feedback_embedding, feedback_position_embedding], axis=1)
            # 特征和位置embedding
            # batch_size, len(clicked_group_ids) * embed_dim
            clicked_z, unclick_z, feedback_z = seq_emb([clicked_pos, unclick_pos, feedback_pos])
            clicked_embeddings.append(clicked_z)
            unclick_embeddings.append(unclick_z)
            feedback_embeddings.append(feedback_z)

        # wide embedding
        main_embeddings_wide = []
        candidate_embeddings_wide = []
        self.embed_wide = Embedding_Lookup(self.feature_size, 1, tf.keras.initializers.Zeros(), name="embedding_wide")
        for group_id in self.main_group_ids:
            # batch_size, len(main_group_ids) * 1
            embedding_wide = self.embed_wide(self.group_feature["main_" + str(group_id)])
            main_embeddings_wide.append(embedding_wide)
        main_embedding_wide = tf.concat(main_embeddings_wide, axis=1)

        for group_id in candidate_group_ids:
            # batch_size, len(candidate_group_ids) * 1
            embedding_wide = self.embed_wide(self.group_feature["candidate_" + str(group_id)])
            candidate_embeddings_wide.append(embedding_wide)
        candidate_embedding_wide = tf.concat(candidate_embeddings_wide, axis=1)

        # batch, hist_embedding_dim
        transformer = Transformer(self.hist_size, self.item_dim)
        output_clicked = transformer([candidate_embedding, clicked_embeddings, self.group_feature["clicked_histLen"]], prefix="clicked")
        output_unclick = transformer([candidate_embedding, unclick_embeddings, self.group_feature["unclick_histLen"]], prefix="unclick")
        output_feedback = transformer([candidate_embedding, feedback_embeddings, self.group_feature["feedback_histLen"]], prefix="feedback")
        
        attention = Attention(self.hist_size, self.item_dim)
        output_unclick_clicked = attention([output_clicked, unclick_embeddings, self.group_feature["unclick_histLen"]], prefix="unclick_clicked")
        output_unclick_feedback = attention([output_feedback, unclick_embeddings, self.group_feature["unclick_histLen"]], prefix="unclick_feedback")

        input_embedding = tf.concat([main_embedding, candidate_embedding, output_clicked, output_unclick, output_feedback, output_unclick_clicked, output_unclick_feedback],axis=1)

        #fm part 这个*6估计相当于multihead一样，切分成多个子空间进行两两交叉组合
        m = len(main_group_ids) + len(candidate_group_ids) * 6
        fm_in = tf.reshape(input_embedding, shape=[-1, m, self.embed_dim])
        sum1 = tf.reduce_sum(fm_in, axis=1)
        sum2 = tf.reduce_sum(fm_in * fm_in, axis=1)
        fm = (sum1 * sum1 - sum2) * 0.5

        #deep part
        deep = DNN([32, 16])(input_embedding)

        z = tf.concat([deep, fm, main_embedding_wide, candidate_embedding_wide], axis=1)
        results = DNN([1,], activation="sigmoid")
        return tf.reshape(results, [-1, 1])

In [56]:
main_group_ids=[16,10001,10002,10003,21,10006,10019,10034,20147,20148,10035,20156,
                    61,10047,10048,10049,10050,10055,10056,60]
candidate_group_ids=[3060,3061,3062,3063,3064]
clicked_group_ids=[3060,3061,3062,3063,3064]
unclick_group_ids=[3060,3061,3062,3063,3064]
feedback_group_ids=[3060,3061,3063,3064]
pos_group_ids=[3065]

path = Path("/Volumes/D/guohao/resys/dfn/example")
dfn = DFN(main_group_ids, candidate_group_ids, clicked_group_ids, unclick_group_ids, feedback_group_ids, pos_group_ids)

In [57]:
dfn()

NameError: in converted code:

    <ipython-input-55-ad838a5e6e42>:85 call  *
        attQK_scale = attQK / (hist_embedding_dim ** 0.5)

    NameError: name 'hist_embedding_dim' is not defined


In [63]:
a = tf.keras.layers.Input(shape=(1, 30))
b = tf.keras.layers.Input(shape=(30, 80))

In [65]:
tf.matmul(a, b).shape

TensorShape([256, 1, 80])