In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
from sklearn.metrics import roc_auc_score
from tqdm import tqdm
import os, pickle

# specify the GPU device
# os.environ['CUDA_DEVICE_ORDER']="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = '7'

import tensorflow as tf
from tensorflow.python.layers.core import Dense
import keras
from keras.preprocessing.sequence import pad_sequences
tf.reset_default_graph() 

Using TensorFlow backend.


In [2]:
'''
Config
'''
# batch size per iteration
BATCHSIZE = 400
# mini-batch size for few-shot learning
# MINIBATCHSIZE = 10
# learning rate
LR = 1e-3
# coefficient to balance `cold-start' and `warm-up'
ALPHA = 0.1
# length of embedding vectors
EMB_SIZE = 128
# model
MODEL = 'DisNet'
# log file
LOG = "logs/{}.csv".format(MODEL)
# path to save the model
saver_path ="saver/model-"+LOG.split("/")[-1][:-4]
# Size of latent vectors
LATENT_SIZE = 64
# ISO type
ISO = 'nn'
#Title length
title_len = 100
num_rel_items = 10

上方代码配置超参数和存放的路径，这里有个Alpha是损失函数里的权衡参数，EMB_SIZE是Embedding的维度，这里选择DeepFM

In [3]:
def read_pkl(path):
    with open(path, "rb") as f:
        t = pickle.load(f)
    return t

上方是读入数据的函数

In [4]:
# some pre-processing
num_words_dict = {
    'User-ID': 92108, 
    'ISBN': 270171, 
    'Location': 22449, 
    'Age': 142, 
    'Book-Author': 101582, 
    'Year-Of-Publication': 117, 
    'Publisher': 16729
}
# 'User-ID', 'ISBN', 'Location', 'Age', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher', 'label'
ID_col = 'ISBN'
item_col = ['Book-Author', 'Year-Of-Publication', 'Publisher']
context_col = ['User-ID', 'Age', 'Location']
real_con_cols_num = 1
# context_col = ['Age', 'Gender', 'UserID', 'Occupation']

def get_data(file):
    data = read_pkl(file)
    y = np.array(list(data['label']))
    title = np.array(list(map(lambda x:  list(x), data['Book-Title'].values)))
    title_len = title.shape[1]
    feature = data[[ID_col]+item_col+context_col]
    return feature, title, y

rel_item_col = "rel_item_ids"
def get_rel_data(file):
    global num_rel_items
    data = read_pkl(file)
    y = np.array(list(data['label']))
    rel_item = np.array(list(data[rel_item_col]), dtype=np.int32)
    num_rel_items = rel_item.shape[1]
    title = np.array(list(map(lambda x:  list(x), data['Book-Title'].values)))
    title_len = title.shape[1]
    feature = data[[ID_col]+item_col+context_col]
    return feature, title, rel_item, y

big_train_main应该是用来训练base模型的

pad_sequences可以把序列数据进行截断或者补齐，Genres为什么要这么做呢？

训练数据基本就是ID+item_col+context_col，和文章对应

感觉train_t和train_g也是item的属性，属于item_col

In [5]:
pre_x, pre_t, pre_y = get_data("./disnet_data/data_pretrain.pkl")
pop_x_train, pop_t_train, pop_y_train = get_data("./disnet_data/train_pop.pkl")
pop_x_test, pop_t_test, pop_y_test = get_data("./disnet_data/test_pop.pkl")

这里是读入其他训练数据，a,b,c看起来是预热用来更新Embedding的

In [30]:
class Meta_Model(object):
    def __init__(self, ID_col, item_col, context_col, nb_words, model='FM',
                 emb_size=128, alpha=0.1,
                 warm_lr=1e-3, cold_lr=1e-4, ME_lr=1e-3, latent_size=16, is_pretrain=True, is_meta=False,
                REG=0.5):
        """
        ID_col: string, the column name of the item ID
        item_col: list, the columns of item features
        context_col: list, the columns of other features
        nb_words: dict, nb of words in each of these columns
        """
        tf.reset_default_graph() 
        self.dropout_keep = tf.placeholder(tf.float32)
        all_tables = []
        self.flag = True
        self.is_pretrain = is_pretrain
        self.is_meta = is_meta
        columns = [ID_col] + item_col + context_col
        inputs_b = None
        warm_loss = None
        if self.is_pretrain:
            columns = [ID_col] + item_col + context_col[:-real_con_cols_num]
        else:
            columns = [ID_col] + item_col + context_col
        def get_embeddings():
            inputs, tables = {}, []
            item_embs, other_embs = [], []
            with tf.variable_scope("embeddings", reuse=tf.AUTO_REUSE):
                # All the embeddings are reusable
                for col in columns:
                    inputs[col] = tf.placeholder(tf.int32, [None])
                    table = tf.get_variable(
                        "table_{}".format(col), [nb_words[col], emb_size],
                        initializer=tf.random_normal_initializer(stddev=0.01))
                    if self.flag:
                        all_tables.append(table)
                    emb = tf.nn.embedding_lookup(table, inputs[col])
                    if col==ID_col:
                        ID_emb = emb
                        ID_table = table
                    elif col in item_col:
                        item_embs.append(emb)
                    else:
                        other_embs.append(emb)
                inputs["title"] = tf.placeholder(tf.float32, [None, 100])
                inputs["rel_item_ids"] = tf.placeholder(tf.int32, [None, num_rel_items])
            self.flag = False
            
            return inputs, ID_emb, item_embs, other_embs, ID_table
        
        def generate_rel_meta_emb(item_embs, rel_id_embs, title):
            ''' 生成relational meta embedding
            '''
            with tf.variable_scope("meta_embeddings", reuse=tf.AUTO_REUSE):
                """
                This is the simplest architecture of the embedding generator,
                with only a dense layer.
                You can customize it if you want to get a stronger performance, 
                for example, you can add an l2 regularization term or alter 
                the pooling layer. 
                """
                embs = tf.stop_gradient(tf.stack(item_embs, 1))
                item_h = tf.layers.flatten(embs)
                
                # attention part, item_h: [None, M], rel_id_embs:[None, 10, emb_size]
                emb_size = 128
                hidden_size = 64
                attention_w = tf.get_variable("attention_w", [emb_size, hidden_size])
                attention_h = tf.get_variable("attention_h", [hidden_size])
                attention_b = tf.get_variable("attention_b", [hidden_size])
                attention_mul = tf.nn.relu(tf.matmul(rel_id_embs, attention_w) + attention_b)
                attention_relu = tf.reduce_sum(tf.multiply(attention_h, attention_mul), 2, keepdims=True)
                attention_score = tf.nn.softmax(attention_relu)
                # rel_id_embs: [None, 10]
                rel_id_embs = tf.reduce_sum(tf.multiply(attention_score, rel_id_embs), 2) / 10
                item_h = tf.concat([item_h, title, rel_id_embs], axis=-1)
                
                emb_pred_Dense1 = Dense(
                    emb_size, activation=tf.nn.relu, use_bias=True,
                    name='emb_predictor1')
                
                emb_pred_Dense2 = Dense(
                    emb_size, activation=tf.nn.tanh, use_bias=False,
                    name='emb_predictor2')
                
                emb_pred = emb_pred_Dense2(emb_pred_Dense1(item_h)) / 5.
                ME_vars = [attention_w, attention_h, attention_b]\
                        + emb_pred_Dense1.trainable_variables + emb_pred_Dense2.trainable_variables
#                 ME_vars.extend([attention_w, attention_h, attention_b])
                return emb_pred, ME_vars
        
        def generate_meta_emb(item_embs, title):
            with tf.variable_scope("meta_embeddings", reuse=tf.AUTO_REUSE):
                """
                This is the simplest architecture of the embedding generator,
                with only a dense layer.
                You can customize it if you want to get a stronger performance, 
                for example, you can add an l2 regularization term or alter 
                the pooling layer. 
                """
                embs = tf.stop_gradient(tf.stack(item_embs, 1))
                item_h = tf.layers.flatten(embs)
                item_h = tf.concat([item_h, title], axis=1)
                emb_pred_Dense = Dense(
                    emb_size, activation=tf.nn.tanh, use_bias=False,
                    name='emb_predictor') 
                emb_pred = emb_pred_Dense(item_h) / 5.
                ME_vars = emb_pred_Dense.trainable_variables
                return emb_pred, ME_vars

        def get_yhat_DisNet(ID_emb, item_embs, other_embs, title, **kwargs):
            item_input = tf.concat([ID_emb] + item_embs + [title], -1)
            if self.is_pretrain:
                user_input = tf.concat(other_embs, -1)
                # context has been removed in advance
            else:
                user_input = tf.concat(other_embs[:-real_con_cols_num], -1)
                # if not pretrain, remove the context
            
            with tf.variable_scope("DisNet_user_net", reuse=tf.AUTO_REUSE):
                user_interest = tf.nn.relu(tf.layers.dense(user_input, latent_size, name='user_net_1'))
                user_interest = tf.layers.dense(user_interest, latent_size, name='user_net_2')
            
            with tf.variable_scope("DisNet_item_net", reuse=tf.AUTO_REUSE):
                item_interest = tf.nn.relu(tf.layers.dense(item_input, latent_size, name='item_net_1'))
                item_interest = tf.layers.dense(item_interest, latent_size, name='item_net_2')
            
            if self.is_pretrain == False:
                with tf.variable_scope("context_net", reuse=tf.AUTO_REUSE):
                    context_input = tf.concat(other_embs[-real_con_cols_num:], -1)
                    print('context_input: ', context_input)
                    # load time_stamp
                    if ISO == 'add':
                        interest_shifting_vector = tf.nn.relu(tf.layers.dense(context_input, latent_size, name='con_net_1'))
                        interest_shifting_vector = tf.nn.relu(tf.layers.dense(context_input, latent_size, name='con_net_2'))
                        interest_shifting_vector = tf.layers.dense(interest_shifting_vector, latent_size, name='con_net_3')
                        user_interest = user_interest + interest_shifting_vector
                    elif ISO == 'nn':
                        interest_shifting_vector = tf.nn.relu(tf.layers.dense(context_input, latent_size, name='con_net_1'))
                        interest_shifting_vector = tf.layers.dense(interest_shifting_vector, latent_size, name='con_net_2')
                        concated_vectors = tf.concat([user_interest, interest_shifting_vector], 1)
                        user_interest = tf.nn.relu(concated_vectors)
                        user_interest = tf.layers.dense(user_interest, latent_size, name='iso_net_2')
                    elif ISO == 'cot':
                        interest_shifting_vector = tf.nn.relu(tf.layers.dense(context_input, latent_size * latent_size, name='con_net_1'))
                        interest_shifting_vector = tf.layers.dense(interest_shifting_vector, latent_size * latent_size, name='con_net_2')
                        interest_shifting_vector = tf.reshape(interest_shifting_vector, [-1, latent_size, latent_size])
                        user_interest = tf.matmul(interest_shifting_vector, tf.expand_dims(user_interest, 2))
                        user_interest = tf.reshape(user_interest, [-1, latent_size])
            
            with tf.variable_scope("merge_net", reuse=tf.AUTO_REUSE):
                h_deep = tf.nn.relu(tf.concat([item_interest, user_interest], axis=1))
                h_deep = tf.nn.relu(tf.layers.dense(h_deep, latent_size, name='merge_net_1'))
                y_deep = tf.layers.dense(h_deep, 1, name='merge_net_2')
            
            with tf.variable_scope("FM_part"):
                embeddings = [ID_emb] + item_embs + other_embs
                sum_of_emb = tf.add_n(embeddings)
                diff_of_emb = [sum_of_emb - x for x in embeddings]
                # n*t
                dot_of_emb = [tf.reduce_sum(embeddings[i]*diff_of_emb[i], 
                                            axis=1, keepdims=True) 
                              for i in range(len(columns))]
                h_fm = tf.concat(dot_of_emb, 1)
                y_fm = tf.reduce_sum(h_fm, axis=1, keepdims=True)
            
            with tf.variable_scope("output_part"):
                y = tf.nn.sigmoid(y_deep)
            
            return y
        
        def get_yhat_deepFM(ID_emb, item_embs, other_embs, title, **kwargs):
            with tf.variable_scope("context_models"):
                embeddings = [ID_emb] + item_embs + other_embs
                sum_of_emb = tf.add_n(embeddings)
                diff_of_emb = [sum_of_emb - x for x in embeddings]
                dot_of_emb = [tf.reduce_sum(embeddings[i]*diff_of_emb[i], 
                                            axis=1, keepdims=True) 
                              for i in range(len(columns))]
                y_fm = tf.reduce_sum(tf.concat(dot_of_emb, 1), axis=1, keepdims=True)
                
                h2 = tf.concat(embeddings + [title], 1)
                for i in range(3):
                    h2 = tf.nn.relu(tf.layers.dense(h2, emb_size, name='deep-{}'.format(i)))
                y_deep = tf.layers.dense(h2, 1, name='deep-out')
                
                y = tf.nn.sigmoid(y_deep + y_fm)
                return y
        # DeepFM实现
        

        '''
        *CHOOSE THE BASE MODEL HERE*
        '''
        get_yhat = {
            "deepFM": get_yhat_deepFM,
            'DisNet': get_yhat_DisNet,
        }[model]
        
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        reg_losses = tf.reduce_sum(reg_losses) * REG
    
        if self.is_meta == False:
            if self.is_pretrain:
                with tf.variable_scope("base_model", reuse=tf.AUTO_REUSE):
                    # build the base model
                    inputs, ID_emb, item_embs, other_embs, ID_table = get_embeddings()
                    label = tf.placeholder(tf.float32, [None, 1])
                    yhat = get_yhat(ID_emb, item_embs, other_embs, inputs['title'])
                    warm_loss = tf.losses.log_loss(label, yhat) + reg_losses
                    # Meta-Embedding: build the embedding generator
                    # build the optimizer and update op for the original model
                    warm_optimizer = tf.train.AdamOptimizer(warm_lr)
                    warm_update_op = warm_optimizer.minimize(warm_loss)
                    warm_update_emb_op = warm_optimizer.minimize(warm_loss, var_list=[ID_table])
            else:
                with tf.variable_scope("base_model", reuse=tf.AUTO_REUSE):
                    # build the base model
                    inputs, ID_emb, item_embs, other_embs, ID_table = get_embeddings()
                    label = tf.placeholder(tf.float32, [None, 1])
                    # outputs and losses of the base model
                    yhat = get_yhat(ID_emb, item_embs, other_embs, inputs['title'])
                    warm_loss = tf.losses.log_loss(label, yhat) + reg_losses
                    # Meta-Embedding: build the embedding generator
                    # build the optimizer and update op for the original model
                    warm_optimizer = tf.train.AdamOptimizer(warm_lr)
                    var_list = all_tables
                    for var in tf.global_variables():
                        if (str(var).find('Adam') < 0 and str(var).find('beta') < 0 \
                            and str(var).find('context_net') < 0 and str(var).find('merge_net') < 0) == False:
                            print('Collecting shifting vars: ', var)
                            var_list.append(var)
                    warm_update_op = warm_optimizer.minimize(warm_loss)
                    warm_update_emb_op = warm_optimizer.minimize(warm_loss,  var_list=[ID_table])
        else:
            with tf.variable_scope("base_model", reuse=tf.AUTO_REUSE):
                inputs, ID_emb, item_embs, other_embs, ID_table = get_embeddings()
                label = tf.placeholder(tf.float32, [None, 1])
                # outputs and losses of the base model
                yhat = get_yhat(ID_emb, item_embs, other_embs, inputs['title'])
                warm_loss = tf.losses.log_loss(label, yhat) + reg_losses
                warm_optimizer = tf.train.AdamOptimizer(warm_lr)
                warm_update_emb_op = warm_optimizer.minimize(warm_loss, var_list=[ID_table])

                rel_id_embs = tf.nn.embedding_lookup(ID_table, inputs["rel_item_ids"])
                meta_ID_emb, ME_vars = generate_rel_meta_emb(item_embs, rel_id_embs, inputs['title'])

                # Meta-Embedding: step 1, cold-start, 
                #     use the generated meta-embedding to make predictions
                #     and calculate the cold-start loss_a
                cold_yhat_a = get_yhat(meta_ID_emb, item_embs, other_embs, inputs['title'])
                cold_loss_a = tf.losses.log_loss(label, cold_yhat_a)

                # Meta-Embedding: step 2, apply gradient descent once
                #     get the adapted embedding
                cold_emb_grads = tf.gradients(cold_loss_a, meta_ID_emb)[0]
                meta_ID_emb_new = meta_ID_emb - cold_lr * cold_emb_grads

                # Meta-Embedding: step 3, 
                #     use the adapted embedding to make prediction on another mini-batch 
                #     and calculate the warm-up loss_b
                inputs_b, _, item_embs_b, other_embs_b, _ = get_embeddings()
                label_b = tf.placeholder(tf.float32, [None, 1])
                cold_yhat_b = get_yhat(meta_ID_emb_new, item_embs_b, other_embs_b, inputs_b['title'])
                cold_loss_b = tf.losses.log_loss(label_b, cold_yhat_b)

                # build the optimizer and update op for meta-embedding
                # Meta-Embedding: step 4, calculate the final meta-loss
                ME_loss = cold_loss_b * (1-alpha) + cold_loss_a * alpha + reg_losses
                # ME_loss = cold_loss_b + reg_losses
                ME_optimizer = tf.train.AdamOptimizer(ME_lr)
                ME_update_op = ME_optimizer.minimize(cold_loss_b, var_list=ME_vars)
         
        ID_table_new = tf.placeholder(tf.float32, ID_table.shape)
        ME_assign_op = tf.assign(ID_table, ID_table_new)
        
        def predict_warm(sess, X, Title):
            feed_dict = {inputs[col]: X[col] for col in columns}
            feed_dict = {inputs["title"]: Title,
                         self.dropout_keep: 1.0,
                         **feed_dict}
            return sess.run(yhat, feed_dict)
        
        def predict_ME(sess, X, Title):
            feed_dict = {inputs[col]: X[col] for col in columns}
            feed_dict = {inputs["title"]: Title,
                         self.dropout_keep: 1.0,
                         **feed_dict}
            return sess.run(cold_yhat_a, feed_dict)
        
        # log: Rel_Predict_ME
        def Predict_Rel_ME(sess, X, Title, rel_item_ids):
            feed_dict = {inputs[col]: X[col] for col in columns}
            feed_dict = {inputs["title"]: Title,
                         inputs["rel_item_ids"]: rel_item_ids,
                         self.dropout_keep: 1.0,
                         **feed_dict}
            return sess.run(cold_yhat_a, feed_dict)
        
        def get_rel_meta_embedding(sess, X, Title, rel_item_ids):
            feed_dict = {inputs[col]: np.reshape(X[col], (-1)) for col in columns}
            feed_dict = {inputs["title"]: np.reshape(Title, (-1, title_len)),
                         inputs["rel_item_ids"]: rel_item_ids,
                         self.dropout_keep: 1.0,
                         **feed_dict}
            return sess.run(meta_ID_emb, feed_dict)
        
        def get_meta_embedding(sess, X, Title):
            feed_dict = {inputs[col]: np.reshape(X[col], (-1)) for col in columns}
            feed_dict = {inputs["title"]: np.reshape(Title, (-1, title_len)),
                         self.dropout_keep: 1.0,
                         **feed_dict}
            return sess.run(meta_ID_emb, feed_dict)
        
        def assign_meta_embedding(sess, IDs, emb):
            # take the embedding matrix
            table = sess.run(ID_table)
            # replace the ID^th row by the new embedding
            for i in range(len(IDs)):
                table[IDs[i], :] = emb[i]
            return sess.run(ME_assign_op, feed_dict={ID_table_new: table})
        
#         def assign_meta_embedding(sess, ID, emb):
#             # take the embedding matrix
#             table = sess.run(ID_table)
#             # replace the ID^th row by the new embedding
#             table[ID, :] = emb
#             return sess.run(ME_assign_op, feed_dict={ID_table_new: table})
        
        def train_pretrain(sess, X, Title, y, embedding_only=False):
            # original training on batch
            feed_dict = {inputs[col]: X[col] for col in columns}
            feed_dict = {inputs["title"]: Title,
                         self.dropout_keep: 0.8,
                         **feed_dict}
            feed_dict[label] = y.reshape((-1,1))
            return sess.run([
                warm_loss, warm_update_emb_op if embedding_only else warm_update_op 
            ], feed_dict=feed_dict)
        
        def train_warm(sess, X, Title, y, embedding_only=False):
            # original training on batch
            feed_dict = {inputs[col]: X[col] for col in columns}
            feed_dict = {inputs["title"]: Title,
                         self.dropout_keep: 0.8,
                         **feed_dict}
            feed_dict[label] = y.reshape((-1,1))
            
            return sess.run([
                warm_loss, warm_update_emb_op if embedding_only else warm_update_op 
            ], feed_dict=feed_dict)
        
        def train_ME(sess, X, Title, y, 
                     X_b, Title_b, y_b):
            # train the embedding generator
            feed_dict = {inputs[col]: X[col] for col in columns}
            feed_dict = {inputs["title"]: Title,
                         self.dropout_keep: 0.8,
                         **feed_dict}
            feed_dict[label] = y.reshape((-1,1))
            feed_dict_b = {inputs_b[col]: X_b[col] for col in columns}
            feed_dict_b = {inputs_b["title"]: Title_b,
                           **feed_dict_b}
            feed_dict_b[label_b] = y_b.reshape((-1,1))
            return sess.run([
                cold_loss_a, cold_loss_b, ME_update_op
            ], feed_dict={**feed_dict, **feed_dict_b})
        
        # log: train_Rel_ME
        def train_Rel_ME(sess, X, Title, rel_item_id, y, 
                     X_b, Title_b, rel_item_id_b, y_b):
            # train the embedding generator
            feed_dict = {inputs[col]: X[col] for col in columns}
            feed_dict = {inputs["title"]: Title,
                         inputs["rel_item_ids"]: rel_item_id, # log: rel_item_id
                         self.dropout_keep: 0.8,
                         **feed_dict}
            feed_dict[label] = y.reshape((-1,1))
            feed_dict_b = {inputs_b[col]: X_b[col] for col in columns}
            feed_dict_b = {inputs_b["title"]: Title_b,
                           inputs["rel_item_ids"]: rel_item_id_b, # log: rel_item_id
                           **feed_dict_b}
            feed_dict_b[label_b] = y_b.reshape((-1,1))
            return sess.run([
                cold_loss_a, cold_loss_b, ME_update_op
            ], feed_dict={**feed_dict, **feed_dict_b})
        
        self.predict_warm = predict_warm
        self.predict_ME = predict_ME
        self.predict_Rel_ME = Predict_Rel_ME
        self.train_warm = train_warm
        self.train_ME = train_ME
        self.train_Rel_ME = train_Rel_ME
        self.get_meta_embedding = get_meta_embedding
        self.get_rel_meta_embedding = get_rel_meta_embedding
        self.assign_meta_embedding = assign_meta_embedding

    def construct_conv_layers(self, depth, embeddings, name):
        nc = self.nc
        layer_num = len(nc)
        iszs = [1] + nc[:-1]
        oszs = nc
        P = []
        for i in range(layer_num - 1):
            P.append(self._conv_weight(depth[i], iszs[i], oszs[i], name+'conv_layer_%d'%i))  # first 5 layers
        P.append(self._conv_weight(1, iszs[layer_num - 1], oszs[layer_num - 1], name+'conv_layer_output'))

        # Interaction Cube
        # self.positive_embeddings = tf.concat(self.individual_embeddings, axis=1)

        # self.split = tf.split(axis=1, num_or_size_splits=self.num_field, value=self.positive_embeddings)  # split as field

        split = embeddings

        num_field = len(split)
        print('Num fields: ', num_field)
        # build interaction cube
        for i in range(0, num_field):
            for j in range(i + 1, num_field):
                content = 'split[' + str(i) + ']'
                x = eval(content)
                content1 = 'split[' + str(j) + ']'
                y = eval(content1)
                relation = tf.matmul(tf.transpose(x, perm=[0, 2, 1]), y)
                net_input = tf.expand_dims(relation, 1)
                if i == 0 and j == 1:
                    cube = net_input
                else:
                    cube = tf.concat([cube, net_input], 1)
        positive_cube = tf.expand_dims(cube, -1)

        print(positive_cube.shape)

        # 3D Convolution Layers
        layer = []
        positive_input = positive_cube
        i = 0
        for p in P:
            # convolution
            layer.append(self._conv_layer(depth[i], positive_input, p))
            i = i + 1
            positive_input = layer[-1]
            print(positive_input.shape)
        return tf.nn.dropout(layer[-1], self.dropout_keep)
    # functions for CFM

    def weight_variable(self, shape, name):
        return tf.get_variable(name, shape=shape, initializer=tf.initializers.constant(0.))

    def bias_variable(self, shape, name):
        return tf.get_variable(name, shape=shape, initializer=tf.initializers.constant(0.))

    def _regular(self, params):
        res = 0
        for param in params:
            res += tf.reduce_sum(tf.square(param[0])) + tf.reduce_sum(tf.square(param[1]))
        return res

    def _conv_weight(self, deep, isz, osz, name):
        return (self.weight_variable([deep, 2, 2, isz, osz], name+'_weight'), self.bias_variable([osz], name+'_bias'))

    def _conv_layer(self, depth, input, P):
        '''
        Convolution layer of 3D CNN
        :param input:
        :param P: weights and bias
        :return: convolution result
        '''
        conv = tf.nn.conv3d(input, P[0], strides=[1, depth, 2, 2, 1], padding='VALID')
        return tf.nn.relu(conv + P[1])  # bias_add and activate

def predict_on_batch(sess, predict_func, test_x, test_t, batchsize=800):
    n_samples_test = test_x.shape[0]
    n_batch_test = n_samples_test//batchsize
    test_pred = np.zeros(n_samples_test)
    for i_batch in range(n_batch_test):
        batch_x = test_x.iloc[i_batch*batchsize:(i_batch+1)*batchsize]
        batch_t = test_t[i_batch*batchsize:(i_batch+1)*batchsize]
        _pred = predict_func(sess, batch_x, batch_t)
        test_pred[i_batch*batchsize:(i_batch+1)*batchsize] = _pred.reshape(-1)
    if n_batch_test*batchsize<n_samples_test:
        batch_x = test_x.iloc[n_batch_test*batchsize:]
        batch_t = test_t[n_batch_test*batchsize:]
        _pred = predict_func(sess, batch_x, batch_t)
        test_pred[n_batch_test*batchsize:] = _pred.reshape(-1)
    return test_pred

# log: rel_predict
def predict_rel_on_batch(sess, predict_func, test_x, test_t, test_r, batchsize=800):
    n_samples_test = test_x.shape[0]
    n_batch_test = n_samples_test//batchsize
    test_pred = np.zeros(n_samples_test)
    for i_batch in range(n_batch_test):
        batch_x = test_x.iloc[i_batch*batchsize:(i_batch+1)*batchsize]
        batch_t = test_t[i_batch*batchsize:(i_batch+1)*batchsize]
        batch_r = test_r[i_batch*batchsize:(i_batch+1)*batchsize]
        _pred = predict_func(sess, batch_x, batch_t, batch_r)
        test_pred[i_batch*batchsize:(i_batch+1)*batchsize] = _pred.reshape(-1)
    if n_batch_test*batchsize<n_samples_test:
        batch_x = test_x.iloc[n_batch_test*batchsize:]
        batch_t = test_t[n_batch_test*batchsize:]
        batch_r = test_r[n_batch_test*batchsize:]
        _pred = predict_func(sess, batch_x, batch_t, batch_r)
        test_pred[n_batch_test*batchsize:] = _pred.reshape(-1)
    return test_pred


In [7]:
tf.reset_default_graph()
model = Meta_Model(ID_col, item_col, context_col, num_words_dict, model=MODEL,
                   emb_size=EMB_SIZE, alpha=ALPHA,
                   warm_lr=LR, cold_lr=LR/10., ME_lr=LR, latent_size=LATENT_SIZE, is_pretrain=True, is_meta=False)

Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [8]:
# ************* Evaluating DisNet *****************

"""
Pre-train the base model
"""
batchsize = BATCHSIZE

sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

n_samples = pre_x.shape[0]
print('Num samples: ', n_samples)
n_batch = n_samples//batchsize
for _ in range(1):
    for i_batch in tqdm(range(n_batch)):
        batch_x = pre_x.iloc[i_batch*batchsize:(i_batch+1)*batchsize]
        batch_t = pre_t[i_batch*batchsize:(i_batch+1)*batchsize]
        batch_y = pre_y[i_batch*batchsize:(i_batch+1)*batchsize]
        loss, _ = model.train_warm(sess, batch_x, batch_t, batch_y)

test_pred_test = predict_on_batch(sess, model.predict_warm, 
                                  pop_x_test, pop_t_test)
logloss_base_cold = test_loss_test = log_loss(pop_y_test, test_pred_test)
print("[pre-train]\n\ttest-test loss: {:.6f}".format(test_loss_test))
auc_base_cold = test_auc_test = roc_auc_score(pop_y_test, test_pred_test)
print("[pre-train]\n\ttest-test auc: {:.6f}".format(test_auc_test))
save_path = saver.save(sess, saver_path)
print("Model saved in path: %s" % save_path)

  0%|          | 0/1869 [00:00<?, ?it/s]

Num samples:  747782


100%|██████████| 1869/1869 [00:25<00:00, 71.95it/s]


[pre-train]
	test-test loss: 0.901675
[pre-train]
	test-test auc: 0.740235
Model saved in path: saver/model-DisNet


In [9]:
tf.reset_default_graph()
model = Meta_Model(ID_col, item_col, context_col, num_words_dict, model=MODEL,
                   emb_size=EMB_SIZE, alpha=ALPHA,
                   warm_lr=LR, cold_lr=LR/10., ME_lr=LR, latent_size=LATENT_SIZE, is_pretrain=False, is_meta=False)

context_input:  Tensor("base_model/context_net/concat/concat:0", shape=(?, 128), dtype=float32)
Collecting shifting vars:  <tf.Variable 'base_model/context_net/con_net_1/kernel:0' shape=(128, 64) dtype=float32_ref>
Collecting shifting vars:  <tf.Variable 'base_model/context_net/con_net_1/bias:0' shape=(64,) dtype=float32_ref>
Collecting shifting vars:  <tf.Variable 'base_model/context_net/con_net_2/kernel:0' shape=(64, 64) dtype=float32_ref>
Collecting shifting vars:  <tf.Variable 'base_model/context_net/con_net_2/bias:0' shape=(64,) dtype=float32_ref>
Collecting shifting vars:  <tf.Variable 'base_model/context_net/iso_net_2/kernel:0' shape=(128, 64) dtype=float32_ref>
Collecting shifting vars:  <tf.Variable 'base_model/context_net/iso_net_2/bias:0' shape=(64,) dtype=float32_ref>
Collecting shifting vars:  <tf.Variable 'base_model/merge_net/merge_net_1/kernel:0' shape=(128, 64) dtype=float32_ref>
Collecting shifting vars:  <tf.Variable 'base_model/merge_net/merge_net_1/bias:0' shape=(6

In [10]:
"""
Popular Data
"""
print(ISO, MODEL, model.is_pretrain)
batchsize = BATCHSIZE
save_path = 'saver/model-'+MODEL

sess = tf.Session()
sess.run(tf.global_variables_initializer())
var_list = []
for var in tf.global_variables():
    if MODEL != 'CFM' and str(var).find('Adam') < 0 and str(var).find('beta') < 0 \
        and str(var).find('table_Location') < 0 \
        and str(var).find('output_part') < 0\
        and str(var).find('context_net') < 0\
        and str(var).find('context_models') < 0:
        print('Define saver: ', var)
        var_list.append(var)
    
    if MODEL == 'CFM' and str(var).find('table_Location') < 0 and str(var).find('table') >= 0:
        var_list.append(var)
saver = tf.train.Saver(var_list=var_list)
saver.restore(sess, save_path)

n_samples = pop_x_train.shape[0]
print('Num samples: ', n_samples)
n_batch = n_samples//batchsize
for _ in range(1):
    for i_batch in tqdm(range(n_batch)):
        batch_x = pop_x_train.iloc[i_batch*batchsize:(i_batch+1)*batchsize]
        batch_t = pop_t_train[i_batch*batchsize:(i_batch+1)*batchsize]
        batch_y = pop_y_train[i_batch*batchsize:(i_batch+1)*batchsize]
        loss, _ = model.train_warm(sess, batch_x, batch_t, batch_y)

    test_pred_test = predict_on_batch(sess, model.predict_warm, 
                                      pop_x_test, pop_t_test)
    logloss_base_cold = test_loss_test = log_loss(pop_y_test, test_pred_test)
    print("[pre-train]\n\ttest-test loss: {:.6f}".format(test_loss_test))
    auc_base_cold = test_auc_test = roc_auc_score(pop_y_test, test_pred_test)
    print("[pre-train]\n\ttest-test auc: {:.6f}".format(test_auc_test))

saver2 = tf.train.Saver()
save_path = save_path + '_Pop'
save_path = saver2.save(sess, save_path)
print("Model saved in path: %s" % save_path)

nn DisNet False
Define saver:  <tf.Variable 'base_model/embeddings/table_ISBN:0' shape=(270171, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Book-Author:0' shape=(101582, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Year-Of-Publication:0' shape=(117, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Publisher:0' shape=(16729, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_User-ID:0' shape=(92108, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Age:0' shape=(142, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/DisNet_user_net/user_net_1/kernel:0' shape=(256, 64) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/DisNet_user_net/user_net_1/bias:0' shape=(64,) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/DisNet_user_net/user_net_2/kernel:0' shape=(64, 64) dtype=float32_ref>
Define sav

  0%|          | 0/373 [00:00<?, ?it/s]

Num samples:  149221


100%|██████████| 373/373 [00:05<00:00, 65.12it/s]


[pre-train]
	test-test loss: 0.525629
[pre-train]
	test-test auc: 0.785843
Model saved in path: saver/model-DisNet_Pop


In [71]:
# ************* Evaluating RM-IdEG*************
'''
log: Train the Relational Meta-Embedding generator
'''
new_x_test_a, new_t_test_a, new_r_test_a, new_y_test_a = get_rel_data("./disnet_data/test_one_shot_a.pkl")
new_x_test_b, new_t_test_b, new_r_test_b, new_y_test_b = get_rel_data("./disnet_data/test_one_shot_b.pkl")

MINIBATCHSIZE = 50
minibatchsize = MINIBATCHSIZE
batch_n_ID = 1
batchsize = MINIBATCHSIZE * batch_n_ID
n_epoch = 1
tf.reset_default_graph()
model = Meta_Model(ID_col, item_col, context_col, num_words_dict, model=MODEL,
                   emb_size=EMB_SIZE, alpha=ALPHA,
                   warm_lr=LR, cold_lr=LR, ME_lr=LR, latent_size=LATENT_SIZE, is_pretrain=False, is_meta=True,
                  REG=0.5)
save_path = 'saver/model-'+MODEL + '_Pop'

sess = tf.Session()
sess.run(tf.global_variables_initializer())
var_list = []
for var in tf.global_variables():
    if MODEL != 'CFM' and str(var).find('Adam') < 0 and str(var).find('beta') < 0 \
        and str(var).find('meta_embeddings') < 0:
        print('Define saver: ', var)
        var_list.append(var)
    
    if MODEL == 'CFM' and str(var).find('table_Location') < 0 and str(var).find('table') >= 0:
        var_list.append(var)
saver = tf.train.Saver(var_list=var_list)
saver.restore(sess, save_path)

best_auc = 0
best_loss = 10
for i_epoch in range(n_epoch):
    # Read the few-shot training data of big ads
    if i_epoch==0:
        # log: get_rel_data
        train_x_a, train_t_a, train_r_a, train_y_a = get_rel_data("./disnet_data/train_one_shot_a.pkl")
        train_x_b, train_t_b, train_r_b, train_y_b = get_rel_data("./disnet_data/train_one_shot_b.pkl")
    
    n_samples = train_x_a.shape[0]
    n_batch = n_samples//batchsize
    # Start training
    for _ in range(3):
        # log: add rel feature
        for i_batch in tqdm(range(n_batch)):
            batch_x_a = train_x_a.iloc[i_batch*batchsize:(i_batch+1)*batchsize]
            batch_t_a = train_t_a[i_batch*batchsize:(i_batch+1)*batchsize]
            batch_r_a = train_r_a[i_batch*batchsize:(i_batch+1)*batchsize]
            batch_y_a = train_y_a[i_batch*batchsize:(i_batch+1)*batchsize]
            batch_x_b = train_x_b.iloc[i_batch*batchsize:(i_batch+1)*batchsize]
            batch_t_b = train_t_b[i_batch*batchsize:(i_batch+1)*batchsize]
            batch_r_b = train_r_b[i_batch*batchsize:(i_batch+1)*batchsize]
            batch_y_b = train_y_b[i_batch*batchsize:(i_batch+1)*batchsize]
            loss_a, loss_b, _ = model.train_Rel_ME(sess, 
                                               batch_x_a, batch_t_a, batch_r_a, batch_y_a, 
                                               batch_x_b, batch_t_b, batch_r_b, batch_y_b, )
        # on epoch end
        # log: add rel feature
        test_pred_test = predict_rel_on_batch(sess, model.predict_Rel_ME, 
                                          train_x_b, train_t_b, train_r_b)
        logloss_ME_cold = test_loss_test = log_loss(train_y_b, test_pred_test)
        print("[Meta-Embedding]\n\t Training loss: {:.6f}".format(test_loss_test))
        auc_ME_cold = test_auc_test = roc_auc_score(train_y_b, test_pred_test)
        print("[Meta-Embedding]\n\t Training auc: {:.6f}".format(test_auc_test))
        
        test_pred_test = predict_rel_on_batch(sess, model.predict_Rel_ME, 
                                          new_x_test_b, new_t_test_b, new_r_test_b)
        logloss_ME_cold = test_loss_test = log_loss(new_y_test_b, test_pred_test)
        print("[Meta-Embedding]\n\t Test loss: {:.6f}".format(test_loss_test))
        auc_ME_cold = test_auc_test = roc_auc_score(new_y_test_b, test_pred_test)
        print("[Meta-Embedding]\n\t Test auc: {:.6f}".format(test_auc_test))

saver2 = tf.train.Saver()
save_path = 'saver/model-'+MODEL + '_Rel_Meta'
save_path = saver2.save(sess, save_path)
print("Model saved in path: %s" % save_path)

context_input:  Tensor("base_model/context_net/concat/concat:0", shape=(?, 128), dtype=float32)
context_input:  Tensor("base_model/context_net_1/concat/concat:0", shape=(?, 128), dtype=float32)
context_input:  Tensor("base_model/context_net_2/concat/concat:0", shape=(?, 128), dtype=float32)
Define saver:  <tf.Variable 'base_model/embeddings/table_ISBN:0' shape=(270171, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Book-Author:0' shape=(101582, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Year-Of-Publication:0' shape=(117, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Publisher:0' shape=(16729, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_User-ID:0' shape=(92108, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Age:0' shape=(142, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Locati

100%|██████████| 773/773 [00:07<00:00, 101.60it/s]
  2%|▏         | 12/773 [00:00<00:06, 111.02it/s]

[Meta-Embedding]
	 Training loss: 0.494136
[Meta-Embedding]
	 Training auc: 0.797340
[Meta-Embedding]
	 Test loss: 0.495405
[Meta-Embedding]
	 Test auc: 0.794670


100%|██████████| 773/773 [00:07<00:00, 103.01it/s]
  1%|▏         | 11/773 [00:00<00:07, 104.71it/s]

[Meta-Embedding]
	 Training loss: 0.492623
[Meta-Embedding]
	 Training auc: 0.798889
[Meta-Embedding]
	 Test loss: 0.495321
[Meta-Embedding]
	 Test auc: 0.794854


100%|██████████| 773/773 [00:07<00:00, 103.25it/s]


[Meta-Embedding]
	 Training loss: 0.490863
[Meta-Embedding]
	 Training auc: 0.800784
[Meta-Embedding]
	 Test loss: 0.495347
[Meta-Embedding]
	 Test auc: 0.794853
Model saved in path: saver/model-DisNet_Rel_Meta


In [72]:
# batchsize = 50
# '''
# Testing
# '''
# tf.reset_default_graph()
# model = Meta_Model(ID_col, item_col, context_col, num_words_dict, model=MODEL,
#                    emb_size=EMB_SIZE, alpha=ALPHA,
#                    warm_lr=LR, cold_lr=LR/10., ME_lr=LR, latent_size=LATENT_SIZE, is_pretrain=False, is_meta=False)

# save_path = 'saver/model-'+MODEL + '_Pop'
# sess = tf.Session()
# sess.run(tf.global_variables_initializer())
# var_list = []
# for var in tf.global_variables():
#     if MODEL != 'CFM' and str(var).find('Adam') < 0 and str(var).find('beta') < 0:
#         print('Define saver: ', var)
#         var_list.append(var)
    
#     if MODEL == 'CFM' and str(var).find('table_Location') < 0 and str(var).find('table') >= 0:
#         var_list.append(var)
# saver = tf.train.Saver(var_list=var_list)
# saver.restore(sess, save_path)

# minibatchsize = MINIBATCHSIZE
# print(batchsize)
# i = 1
# test_n_ID = len(new_x_test_a[ID_col].drop_duplicates())
# print(test_n_ID)

# test_pred_test = predict_on_batch(sess, model.predict_warm, 
#                                   new_x_test_b, new_t_test_b)
# logloss_base_cold = test_loss_test = log_loss(new_y_test_b, test_pred_test)
# auc_base_cold = test_auc_test = roc_auc_score(new_y_test_b, test_pred_test)

# print("COLD-START BASELINE:")
# print("\t Loss: {:.4f}".format(logloss_base_cold))
# print("\t AUC: {:.4f}".format(auc_base_cold))

# for i in tqdm(range(int(np.ceil(test_n_ID//batchsize)))):
#     batch_x = new_x_test_a[i*batchsize:(i+1)*batchsize]
#     batch_t = new_t_test_a[i*batchsize:(i+1)*batchsize]
#     batch_y = new_y_test_a[i*batchsize:(i+1)*batchsize]
#     for j in range(1):
#         model.train_warm(sess, batch_x, batch_t, batch_y, 
#                          embedding_only=True)
# test_pred_test = predict_on_batch(sess, model.predict_warm, 
#                                   new_x_test_b, new_t_test_b)
# logloss_base_batcha = test_loss_test = log_loss(new_y_test_b, test_pred_test)
# print("[baseline]\n\ttest-test loss:\t{:.4f}, improvement: {:.2%}".format(
#     test_loss_test, 1-test_loss_test/logloss_base_cold))
# auc_base_batcha = test_auc_test = roc_auc_score(new_y_test_b, test_pred_test)
# print("[baseline]\n\ttest-test auc:\t{:.4f}, improvement: {:.2%}".format(
#     test_auc_test, test_auc_test/auc_base_cold-1))
# print("="*60)

In [73]:
tf.reset_default_graph()
model = Meta_Model(ID_col, item_col, context_col, num_words_dict, model=MODEL,
                   emb_size=EMB_SIZE, alpha=ALPHA,
                   warm_lr=LR, cold_lr=LR/10., ME_lr=LR, latent_size=LATENT_SIZE, is_pretrain=False, is_meta=True)

save_path = 'saver/model-'+MODEL + '_Rel_Meta'
sess = tf.Session()
sess.run(tf.global_variables_initializer())
var_list = []
for var in tf.global_variables():
    if str(var).find('Adam') < 0 and str(var).find('beta') < 0:
        print('Define saver: ', var)
        var_list.append(var)
saver = tf.train.Saver(var_list=var_list)
saver.restore(sess, save_path)

test_pred_test = predict_rel_on_batch(sess, model.predict_Rel_ME, 
                                  new_x_test_b, new_t_test_b, new_r_test_b)
logloss_ME_batcha = test_loss_test = log_loss(new_y_test_b, test_pred_test)
print("[Meta-Embedding]\n\ttest-test loss:\t{:.4f}, improvement: {:.2%}".format(
    test_loss_test, 1-test_loss_test/logloss_base_cold))
auc_ME_batcha = test_auc_test = roc_auc_score(new_y_test_b, test_pred_test)
print("[Meta-Embedding]\n\ttest-test auc:\t{:.4f}, improvement: {:.2%}".format(
    test_auc_test, test_auc_test/auc_base_cold-1))
print(auc_ME_batcha)

batchsize = 20
for i in tqdm(range(int(np.ceil(test_n_ID/batchsize)))):
    batch_x = new_x_test_a[i*batchsize:(i+1)*batchsize]
    batch_t = new_t_test_a[i*batchsize:(i+1)*batchsize]
    batch_r = new_r_test_a[i*batchsize:(i+1)*batchsize]
    batch_y = new_y_test_a[i*batchsize:(i+1)*batchsize]
    IDs = batch_x[ID_col].to_numpy()
    embeddings = model.get_rel_meta_embedding(
        sess, batch_x, batch_t, batch_r
    )
    model.assign_meta_embedding(sess, IDs, embeddings)
    for j in range(6):
        model.train_warm(sess, batch_x, batch_t, batch_y, 
                         embedding_only=True)
    
test_pred_test = predict_on_batch(sess, model.predict_warm, 
                                  new_x_test_b, new_t_test_b)
logloss_ME_batcha = test_loss_test = log_loss(new_y_test_b, test_pred_test)
print("[Meta-Embedding]\n\ttest-test loss:\t{:.4f}, improvement: {:.2%}".format(
    test_loss_test, 1-test_loss_test/logloss_base_cold))
auc_ME_batcha = test_auc_test = roc_auc_score(new_y_test_b, test_pred_test)
print("[Meta-Embedding]\n\ttest-test auc:\t{:.4f}, improvement: {:.2%}".format(
    test_auc_test, test_auc_test/auc_base_cold-1))
print(auc_ME_batcha)

context_input:  Tensor("base_model/context_net/concat/concat:0", shape=(?, 128), dtype=float32)
context_input:  Tensor("base_model/context_net_1/concat/concat:0", shape=(?, 128), dtype=float32)
context_input:  Tensor("base_model/context_net_2/concat/concat:0", shape=(?, 128), dtype=float32)
Define saver:  <tf.Variable 'base_model/embeddings/table_ISBN:0' shape=(270171, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Book-Author:0' shape=(101582, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Year-Of-Publication:0' shape=(117, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Publisher:0' shape=(16729, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_User-ID:0' shape=(92108, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Age:0' shape=(142, 128) dtype=float32_ref>
Define saver:  <tf.Variable 'base_model/embeddings/table_Locati

  0%|          | 0/488 [00:00<?, ?it/s]

[Meta-Embedding]
	test-test loss:	0.4953, improvement: 0.27%
[Meta-Embedding]
	test-test auc:	0.7949, improvement: 0.11%
0.7948531520003079


100%|██████████| 488/488 [00:42<00:00, 11.50it/s]


[Meta-Embedding]
	test-test loss:	0.4951, improvement: 0.32%
[Meta-Embedding]
	test-test auc:	0.7952, improvement: 0.15%
0.7951982267592896
