In [5]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
    
arg_dict = {'weights_path': '',
            'data_path': '../Data/',
            'proj_path': '',
            'dataset': 'amazon-book-dense',
            'pretrain': 1,
            'verbose': 50,
            'epoch': 1,
            'current_epoch': 1001,
            'save_model': 5,
            'embed_size': 64,
            'kge_size': 64,
            'layer_size': '[64, 32, 16]',
            'batch_size': 1024,
            'batch_size_kg': 2048,
            'regs': '[1e-5,1e-5]',
            'lr': 0.0001,
            'model_type': 'kgat',
            'adj_type': 'si',
            'alg_type': 'bi',
            'adj_uni_type': 'sum',
            'gpu_id': 0,
            'node_dropout': '[0.1]',
            'mess_dropout': '[0.1,0.1,0.1]',
            'Ks': '[20, 40, 60, 80, 100]',
            'save_flag': 1,
            'test_flag': 'part',
            'report': 0,
            'use_att': True,
            'use_kge': True,
            'l1_flag': True}

In [6]:
args = dotdict(arg_dict)

In [8]:
'''
Created on Dec 18, 2018
Tensorflow Implementation of Knowledge Graph Attention Network (KGAT) model in:
Wang Xiang et al. KGAT: Knowledge Graph Attention Network for Recommendation. In KDD 2019.
@author: Xiang Wang (xiangwang@u.nus.edu)
'''
import utility.metrics as metrics
import multiprocessing
import heapq
import numpy as np

from utility.loader_bprmf import BPRMF_loader

from utility.loader_cke import CKE_loader
from utility.loader_nfm import NFM_loader
from utility.loader_kgat import KGAT_loader
from utility.loader_cfkg import CFKG_loader

cores = multiprocessing.cpu_count() // 2

Ks = eval(args.Ks)

if args.model_type == 'bprmf':
    data_generator = BPRMF_loader(args=args, path=args.data_path + args.dataset)
    batch_test_flag = False

elif args.model_type == 'cke':
    data_generator = CKE_loader(args=args, path=args.data_path + args.dataset)
    batch_test_flag = False

elif args.model_type in ['cfkg']:
    data_generator = CFKG_loader(args=args, path=args.data_path + args.dataset)
    batch_test_flag = True

elif args.model_type in ['fm','nfm']:
    data_generator = NFM_loader(args=args, path=args.data_path + args.dataset)
    batch_test_flag = True

elif args.model_type in ['kgat']:
    data_generator = KGAT_loader(args=args, path=args.data_path + args.dataset)
    batch_test_flag = False
    
elif args.model_type in ['kgatv2']:
    data_generator = KGAT_loader(args=args, path=args.data_path + args.dataset)
    batch_test_flag = False

USR_NUM, ITEM_NUM = data_generator.n_users, data_generator.n_items
N_TRAIN, N_TEST = data_generator.n_train, data_generator.n_test
BATCH_SIZE = args.batch_size

def ranklist_by_heapq(user_pos_test, test_items, rating, Ks):
    item_score = {}
    for i in test_items:
        item_score[i] = rating[i]

    K_max = max(Ks)
    K_max_item_score = heapq.nlargest(K_max, item_score, key=item_score.get)

    r = []
    for i in K_max_item_score:
        if i in user_pos_test:
            r.append(1)
        else:
            r.append(0)
    auc = 0.
    return r, auc

def get_auc(item_score, user_pos_test):
    item_score = sorted(item_score.items(), key=lambda kv: kv[1])
    item_score.reverse()
    item_sort = [x[0] for x in item_score]
    posterior = [x[1] for x in item_score]

    r = []
    for i in item_sort:
        if i in user_pos_test:
            r.append(1)
        else:
            r.append(0)
    auc = metrics.auc(ground_truth=r, prediction=posterior)
    return auc

def ranklist_by_sorted(user_pos_test, test_items, rating, Ks):
    item_score = {}
    for i in test_items:
        item_score[i] = rating[i]

    K_max = max(Ks)
    K_max_item_score = heapq.nlargest(K_max, item_score, key=item_score.get)

    r = []
    for i in K_max_item_score:
        if i in user_pos_test:
            r.append(1)
        else:
            r.append(0)
    auc = get_auc(item_score, user_pos_test)
    return r, auc


def get_performance(user_pos_test, r, auc, Ks):
    precision, recall, ndcg, hit_ratio = [], [], [], []

    for K in Ks:
        precision.append(metrics.precision_at_k(r, K))
        recall.append(metrics.recall_at_k(r, K, len(user_pos_test)))
        ndcg.append(metrics.ndcg_at_k(r, K))
        hit_ratio.append(metrics.hit_at_k(r, K))

    return {'recall': np.array(recall), 'precision': np.array(precision),
            'ndcg': np.array(ndcg), 'hit_ratio': np.array(hit_ratio), 'auc': auc}


def test_one_user(x):
    # user u's ratings for user u
    rating = x[0]
    #uid
    u = x[1]
    #user u's items in the training set
    try:
        training_items = data_generator.train_user_dict[u]
    except Exception:
        training_items = []
    #user u's items in the test set
    user_pos_test = data_generator.test_user_dict[u]

    all_items = set(range(ITEM_NUM))

    test_items = list(all_items - set(training_items))

    if args.test_flag == 'part':
        r, auc = ranklist_by_heapq(user_pos_test, test_items, rating, Ks)
    else:
        r, auc = ranklist_by_sorted(user_pos_test, test_items, rating, Ks)

    # # .......checking.......
    # try:
    #     assert len(user_pos_test) != 0
    # except Exception:
    #     print(u)
    #     print(training_items)
    #     print(user_pos_test)
    #     exit()
    # # .......checking.......

    return get_performance(user_pos_test, r, auc, Ks)


def test(sess, model, users_to_test, drop_flag=False, batch_test_flag=False):
    result = {'precision': np.zeros(len(Ks)), 'recall': np.zeros(len(Ks)), 'ndcg': np.zeros(len(Ks)),
              'hit_ratio': np.zeros(len(Ks)), 'auc': 0.}

    pool = multiprocessing.Pool(cores)

    if args.model_type in ['ripple']:

        u_batch_size = BATCH_SIZE
        i_batch_size = BATCH_SIZE // 20
    elif args.model_type in ['fm', 'nfm']:
        u_batch_size = BATCH_SIZE
        i_batch_size = BATCH_SIZE
    else:
        u_batch_size = BATCH_SIZE * 2
        i_batch_size = BATCH_SIZE

    test_users = users_to_test
    n_test_users = len(test_users)
    n_user_batchs = n_test_users // u_batch_size + 1

    count = 0

    for u_batch_id in range(n_user_batchs):
        start = u_batch_id * u_batch_size
        end = (u_batch_id + 1) * u_batch_size

        user_batch = test_users[start: end]

        if batch_test_flag:

            n_item_batchs = ITEM_NUM // i_batch_size + 1
            rate_batch = np.zeros(shape=(len(user_batch), ITEM_NUM))

            i_count = 0
            for i_batch_id in range(n_item_batchs):
                i_start = i_batch_id * i_batch_size
                i_end = min((i_batch_id + 1) * i_batch_size, ITEM_NUM)

                item_batch = range(i_start, i_end)

                feed_dict = data_generator.generate_test_feed_dict(model=model,
                                                                   user_batch=user_batch,
                                                                   item_batch=item_batch,
                                                                   drop_flag=drop_flag)
                i_rate_batch = model.eval(sess, feed_dict=feed_dict)
                i_rate_batch = i_rate_batch.reshape((-1, len(item_batch)))

                rate_batch[:, i_start: i_end] = i_rate_batch
                i_count += i_rate_batch.shape[1]

            assert i_count == ITEM_NUM

        else:
            item_batch = range(ITEM_NUM)
            feed_dict = data_generator.generate_test_feed_dict(model=model,
                                                               user_batch=user_batch,
                                                               item_batch=item_batch,
                                                               drop_flag=drop_flag)
            rate_batch = model.eval(sess, feed_dict=feed_dict)
            rate_batch = rate_batch.reshape((-1, len(item_batch)))

        user_batch_rating_uid = zip(rate_batch, user_batch)
        batch_result = pool.map(test_one_user, user_batch_rating_uid)
        count += len(batch_result)

        for re in batch_result:
            result['precision'] += re['precision']/n_test_users
            result['recall'] += re['recall']/n_test_users
            result['ndcg'] += re['ndcg']/n_test_users
            result['hit_ratio'] += re['hit_ratio']/n_test_users
            result['auc'] += re['auc']/n_test_users


    assert count == n_test_users
    pool.close()
    return result

[n_users, n_items]=[70679, 24915]
[n_train, n_test]=[652514, 193920]
[n_entities, n_relations, n_triples]=[113486, 39, 804829]
[batch_size, batch_size_kg]=[1024, 1263]
	convert ratings into adj mat done.
	convert 34 relational triples into adj mat done. @0.6497s


  d_inv = np.power(rowsum, -1).flatten()


	generate si-normalized adjacency matrix.
	reordering indices...
	reorganize all kg data done.
	sort meta-data done.
	sort all data done.


In [14]:
count = 0
for m in data_generator.adj_list:
    count += m.nnz
print(count)

2914686


In [5]:
'''
Created on Dec 18, 2018
Tensorflow Implementation of Knowledge Graph Attention Network (KGAT) model in:
Wang Xiang et al. KGAT: Knowledge Graph Attention Network for Recommendation. In KDD 2019.
@author: Xiang Wang (xiangwang@u.nus.edu)
'''
import tensorflow as tf
from utility.helper import *
from time import time

from BPRMF import BPRMF
from CKE import CKE
from CFKG import CFKG
from NFM import NFM
from KGAT import KGAT
from KGATv2 import KGATv2

from tqdm import tqdm

import os
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

def load_pretrained_data(args):
    pre_model = 'mf'
    if args.pretrain == -2:
        pre_model = 'kgat'
    pretrain_path = '%spretrain/%s/%s.npz' % (args.proj_path, args.dataset, pre_model)
    try:
        pretrain_data = np.load(pretrain_path)
        print('load the pretrained bprmf model parameters.')
    except Exception:
        pretrain_data = None
    return pretrain_data


if __name__ == '__main__':
    # get argument settings.
    tf.set_random_seed(2019)
    np.random.seed(2019)

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)

    """
    *********************************************************
    Load Data from data_generator function.
    """
    config = dict()
    config['n_users'] = data_generator.n_users
    config['n_items'] = data_generator.n_items
    config['n_relations'] = data_generator.n_relations
    config['n_entities'] = data_generator.n_entities

    if args.model_type in ['kgat', 'kgatv2', 'cfkg']:
        "Load the laplacian matrix."
        config['A_in'] = sum(data_generator.lap_list)

        "Load the KG triplets."
        config['all_h_list'] = data_generator.all_h_list
        config['all_r_list'] = data_generator.all_r_list
        config['all_t_list'] = data_generator.all_t_list
        config['all_v_list'] = data_generator.all_v_list

    t0 = time()

    """
    *********************************************************
    Use the pretrained data to initialize the embeddings.
    """
    if args.pretrain in [-1, -2]:
        pretrain_data = load_pretrained_data(args)
    else:
        pretrain_data = None

    """
    *********************************************************
    Select one of the models.
    """
    if args.model_type == 'bprmf':
        model = BPRMF(data_config=config, pretrain_data=pretrain_data, args=args)

    elif args.model_type == 'cke':
        model = CKE(data_config=config, pretrain_data=pretrain_data, args=args)

    elif args.model_type in ['cfkg']:
        model = CFKG(data_config=config, pretrain_data=pretrain_data, args=args)

    elif args.model_type in ['nfm', 'fm']:
        model = NFM(data_config=config, pretrain_data=pretrain_data, args=args)

    elif args.model_type in ['kgat']:
        model = KGAT(data_config=config, pretrain_data=pretrain_data, args=args)
    elif args.model_type in ['kgatv2']:
        model = KGATv2(data_config=config, pretrain_data=pretrain_data, args=args)
        
    saver = tf.train.Saver()

    """
    *********************************************************
    Save the model parameters.
    """
    if args.save_flag == 1:
        if args.model_type in ['bprmf', 'cke', 'fm', 'cfkg']:
            weights_save_path = '%sweights/%s/%s/l%s_r%s' % (args.weights_path, args.dataset, args.model_type,
                                                             str(args.lr), '-'.join([str(r) for r in eval(args.regs)]))

        elif args.model_type in ['ncf', 'nfm', 'kgat', 'kgatv2']:
            layer = '-'.join([str(l) for l in eval(args.layer_size)])
            weights_save_path = '%sweights/%s/%s/%s/l%s_r%s' % (
                args.weights_path, args.dataset, args.model_type, layer, str(args.lr), '-'.join([str(r) for r in eval(args.regs)]))

        ensureDir(weights_save_path)
        save_saver = tf.train.Saver(max_to_keep=2)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    """
    *********************************************************
    Reload the model parameters to fine tune.
    """
    if args.pretrain == 1:
        if args.model_type in ['bprmf', 'cke', 'fm', 'cfkg']:
            pretrain_path = '%sweights/%s/%s/l%s_r%s' % (args.weights_path, args.dataset, args.model_type, str(args.lr),
                                                             '-'.join([str(r) for r in eval(args.regs)]))

        elif args.model_type in ['ncf', 'nfm', 'kgat', 'kgatv2']:
            layer = '-'.join([str(l) for l in eval(args.layer_size)])
            pretrain_path = '%sweights/%s/%s/%s/l%s_r%s' % (
                args.weights_path, args.dataset, args.model_type, layer, str(args.lr), '-'.join([str(r) for r in eval(args.regs)]))
        pretrain_path = 'weights/amazon-book-dense/kgat/64-32-16/l0.0001_r1e-05-1e-05'
        ckpt = tf.train.get_checkpoint_state(os.path.dirname(pretrain_path + '/checkpoint'))
        if ckpt and ckpt.model_checkpoint_path:
            print('a', ckpt)
            print('b', ckpt.model_checkpoint_path)
            sess.run(tf.global_variables_initializer())
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('load the pretrained model parameters from: ', pretrain_path)

            # *********************************************************
            # get the performance from the model to fine tune.
            if args.report != 1:
                users_to_test = list(data_generator.test_user_dict.keys())

                ret = test(sess, model, users_to_test, drop_flag=False, batch_test_flag=batch_test_flag)
                cur_best_pre_0 = ret['recall'][0]

                pretrain_ret = 'pretrained model recall=[%.5f, %.5f], precision=[%.5f, %.5f], hit=[%.5f, %.5f],' \
                               'ndcg=[%.5f, %.5f], auc=[%.5f]' % \
                               (ret['recall'][0], ret['recall'][-1],
                                ret['precision'][0], ret['precision'][-1],
                                ret['hit_ratio'][0], ret['hit_ratio'][-1],
                                ret['ndcg'][0], ret['ndcg'][-1], ret['auc'])
                print(pretrain_ret)

                # *********************************************************
                # save the pretrained model parameters of mf (i.e., only user & item embeddings) for pretraining other models.
                if args.save_flag == -1:
                    user_embed, item_embed = sess.run(
                        [model.weights['user_embedding'], model.weights['item_embedding']],
                        feed_dict={})
                    # temp_save_path = '%spretrain/%s/%s/%s_%s.npz' % (args.proj_path, args.dataset, args.model_type, str(args.lr),
                    #                                                  '-'.join([str(r) for r in eval(args.regs)]))
                    temp_save_path = '%spretrain/%s/%s.npz' % (args.proj_path, args.dataset, args.model_type)
                    ensureDir(temp_save_path)
                    np.savez(temp_save_path, user_embed=user_embed, item_embed=item_embed)
                    print('save the weights of fm in path: ', temp_save_path)
                    exit()

                # *********************************************************
                # save the pretrained model parameters of kgat (i.e., user & item & kg embeddings) for pretraining other models.
                if args.save_flag == -2:
                    user_embed, entity_embed, relation_embed = sess.run(
                        [model.weights['user_embed'], model.weights['entity_embed'], model.weights['relation_embed']],
                        feed_dict={})

                    temp_save_path = '%spretrain/%s/%s.npz' % (args.proj_path, args.dataset, args.model_type)
                    ensureDir(temp_save_path)
                    np.savez(temp_save_path, user_embed=user_embed, entity_embed=entity_embed, relation_embed=relation_embed)
                    print('save the weights of kgat in path: ', temp_save_path)
                    exit()

        else:
            sess.run(tf.global_variables_initializer())
            cur_best_pre_0 = 0.
            print('without pretraining.')
    else:
        sess.run(tf.global_variables_initializer())
        cur_best_pre_0 = 0.
        print('without pretraining.')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


using xavier initialization
#params: 11954960
a model_checkpoint_path: "weights/amazon-book-dense/kgat/64-32-16/l0.0001_r1e-05-1e-05/weights-600"
all_model_checkpoint_paths: "weights/amazon-book-dense/kgat/64-32-16/l0.0001_r1e-05-1e-05/weights-580"
all_model_checkpoint_paths: "weights/amazon-book-dense/kgat/64-32-16/l0.0001_r1e-05-1e-05/weights-600"

b weights/amazon-book-dense/kgat/64-32-16/l0.0001_r1e-05-1e-05/weights-600
INFO:tensorflow:Restoring parameters from weights/amazon-book-dense/kgat/64-32-16/l0.0001_r1e-05-1e-05/weights-600
load the pretrained model parameters from:  weights/amazon-book-dense/kgat/64-32-16/l0.0001_r1e-05-1e-05
pretrained model recall=[0.13414, 0.30470], precision=[0.01167, 0.00543], hit=[0.20664, 0.42788],ndcg=[0.08223, 0.12895], auc=[0.00000]


In [5]:
model.weights

{'user_embed': <tf.Variable 'user_embed:0' shape=(70679, 64) dtype=float32_ref>,
 'entity_embed': <tf.Variable 'entity_embed:0' shape=(113487, 64) dtype=float32_ref>,
 'relation_embed': <tf.Variable 'relation_embed:0' shape=(80, 64) dtype=float32_ref>,
 'trans_W': <tf.Variable 'Variable:0' shape=(80, 64, 64) dtype=float32_ref>,
 'W_gc_0': <tf.Variable 'W_gc_0:0' shape=(64, 64) dtype=float32_ref>,
 'b_gc_0': <tf.Variable 'b_gc_0:0' shape=(1, 64) dtype=float32_ref>,
 'W_bi_0': <tf.Variable 'W_bi_0:0' shape=(64, 64) dtype=float32_ref>,
 'b_bi_0': <tf.Variable 'b_bi_0:0' shape=(1, 64) dtype=float32_ref>,
 'W_mlp_0': <tf.Variable 'W_mlp_0:0' shape=(128, 64) dtype=float32_ref>,
 'b_mlp_0': <tf.Variable 'b_mlp_0:0' shape=(1, 64) dtype=float32_ref>,
 'W_gc_1': <tf.Variable 'W_gc_1:0' shape=(64, 32) dtype=float32_ref>,
 'b_gc_1': <tf.Variable 'b_gc_1:0' shape=(1, 32) dtype=float32_ref>,
 'W_bi_1': <tf.Variable 'W_bi_1:0' shape=(64, 32) dtype=float32_ref>,
 'b_bi_1': <tf.Variable 'b_bi_1:0' shap

In [6]:
for i in range(80000, 80010):
    print(i)
    print(model.A_in[i, :])

80000
  (0, 1079)	0.038461538461538464
  (0, 2360)	0.038461538461538464
  (0, 2665)	0.038461538461538464
  (0, 2964)	0.038461538461538464
  (0, 3812)	0.038461538461538464
  (0, 6344)	0.038461538461538464
  (0, 6651)	0.038461538461538464
  (0, 7593)	0.038461538461538464
  (0, 10610)	0.038461538461538464
  (0, 13316)	0.038461538461538464
  (0, 13767)	0.038461538461538464
  (0, 14177)	0.038461538461538464
  (0, 14716)	0.038461538461538464
  (0, 25057)	0.038461538461538464
  (0, 28392)	0.038461538461538464
  (0, 29414)	0.038461538461538464
  (0, 33344)	0.038461538461538464
  (0, 38633)	0.038461538461538464
  (0, 39919)	0.038461538461538464
  (0, 44436)	0.038461538461538464
  (0, 46086)	0.038461538461538464
  (0, 49499)	0.038461538461538464
  (0, 50925)	0.038461538461538464
  (0, 54458)	0.038461538461538464
  (0, 58504)	0.038461538461538464
  (0, 62901)	0.038461538461538464
  (0, 78631)	2.0
  (0, 95595)	0.5
  (0, 95596)	0.5
  (0, 95598)	0.5
  (0, 95602)	0.5
  (0, 95604)	1.5
  (0, 95621)	0.5

In [7]:
data_generator.all_kg_dict[80009]

[(68133, 40),
 (67462, 40),
 (66852, 40),
 (61508, 40),
 (56370, 40),
 (45360, 40),
 (41103, 40),
 (40755, 40),
 (40019, 40),
 (39645, 40),
 (39494, 40),
 (38649, 40),
 (37229, 40),
 (35683, 40),
 (33344, 40),
 (33051, 40),
 (32167, 40),
 (28304, 40),
 (25855, 40),
 (24892, 40),
 (24269, 40),
 (24240, 40),
 (24222, 40),
 (24139, 40),
 (21073, 40),
 (20003, 40),
 (19748, 40),
 (19732, 40),
 (19664, 40),
 (19508, 40),
 (18805, 40),
 (18653, 40),
 (17547, 40),
 (17300, 40),
 (15577, 40),
 (15476, 40),
 (15397, 40),
 (14716, 40),
 (14403, 40),
 (14278, 40),
 (14053, 40),
 (13775, 40),
 (13539, 40),
 (13214, 40),
 (12910, 40),
 (12503, 40),
 (11809, 40),
 (11637, 40),
 (11529, 40),
 (11299, 40),
 (11267, 40),
 (10960, 40),
 (10951, 40),
 (9603, 40),
 (9115, 40),
 (8412, 40),
 (6349, 40),
 (6206, 40),
 (5374, 40),
 (4859, 40),
 (4530, 40),
 (4097, 40),
 (3339, 40),
 (2497, 40),
 (2033, 40),
 (1886, 40),
 (1079, 40),
 (95621, 1),
 (95604, 1),
 (95602, 1),
 (95598, 1),
 (95596, 1),
 (95595, 1)