In [2]:
"""
imports
"""
import pandas as pd
import numpy as np
import random
import os
from collections import defaultdict
from time import time
from tqdm import tqdm
import time
import argparse
import scipy.sparse as sp
import os
import mindspore as ms
from mindspore import nn
from mindspore.common.initializer import initializer
from mindspore.common.parameter import ParameterTuple
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from mindspore import Tensor
from mindspore.nn.layer.activation import get_activation
import mindspore.context as context





In [None]:
"""
load data
"""
class Data(object):
    def __init__(self,npzpath="./data/viedo10/video10.npz"):

        self.user_item = defaultdict(set)
        self.item_user = defaultdict(set)

        self.user_vali_item = dict()
        self.user_test_item = dict()

        _data = np.load(npzpath, allow_pickle=True)
        self.train_data = _data['train_data']
        self.test_data = _data['test_data'].tolist()
        vali_data = _data['vali_data'].tolist()

        # todo consider using os.path.join
        p = npzpath.split('/')
        self.path = p[0] + '/' + p[1] + '/' + p[2]

        self.n_users, self.n_items = self.train_data.max(axis=0) + 1
        self.R = sp.dok_matrix((self.n_users, self.n_items), dtype=np.float32)

        for u, i in self.train_data:
            self.user_item[u].add(i)
            self.item_user[i].add(u)

            self.R[u, i] = 1.

        self.train_number = np.shape(self.train_data)[0]
        print(self.n_users, self.n_items,self.train_number, self.train_number/(self.n_users*self.n_items))

        for u in self.test_data.keys():
            self.user_test_item[u]=[self.test_data[u][0]]
            self.user_test_item[u].extend(self.test_data[u][1])

        for u in vali_data.keys():
            self.user_vali_item[u] = [vali_data[u][0]]
            self.user_vali_item[u].extend(vali_data[u][1])

        # self.nodesum = self.get_nodesum(depth)


    def gen_batch_train_data(self, neg_number, batch_size):
        np.random.shuffle(self.train_data)
        batch = np.zeros((batch_size, 3), dtype=np.uint32)
        idx = 0
        for u,i in self.train_data:
            for neg_num in range(neg_number):
                neg_item = random.randint(0, self.n_items - 1)
                while (neg_item in self.user_item[u]):
                    neg_item = random.randint(0, self.n_items  - 1)
                batch[idx, :] = [u,i, neg_item]
                idx += 1

                if (idx == batch_size):
                    yield batch
                    idx = 0

        if (idx > 0):
            yield batch[:idx]


    def gen_batch_test_data(self, test_neg_number, data='test'):
        size = test_neg_number + 1
        batch = np.zeros((size, 2), dtype=np.uint32)

        idx = 0
        if(data=='test'):
            for user, items in self.user_test_item.items():
                for item in items:
                    batch[idx, :] = [user, item]
                    idx += 1

                yield items[0], batch
                idx = 0

        elif(data=='vali'):
            for user, items in self.user_vali_item.items():
                for item in items:
                    batch[idx, :] = [user, item]
                    idx += 1

                yield items[0], batch
                idx = 0
        else:
            print("data type error.")
            exit(-1)

    def get_adj_mat(self):
        try:
            t1 = time()
            mean_adj_mat = sp.load_npz(self.path + '/s_mean_adj_mat.npz')
            print('already load adj matrix', mean_adj_mat.shape, time() - t1)

        except Exception:
            mean_adj_mat = self.create_adj_mat()
            sp.save_npz(self.path + '/s_mean_adj_mat.npz', mean_adj_mat)

        return  mean_adj_mat

    def get_adj_mat_nonorm(self):
        # try:
        #     t1 = time()
        #     adj_mat = sp.load_npz(self.path + '/adj_mat.npz')
        #     print('already load adj matrix', adj_mat.shape, time() - t1)

        # except Exception:
        adj_mat = sp.dok_matrix((self.n_users + self.n_items, self.n_users + self.n_items), dtype=np.float32)
        adj_mat = adj_mat.tolil()
        R = self.R.tolil()
        adj_mat[:self.n_users, self.n_users:] = R
        adj_mat[self.n_users:, :self.n_users] = R.T

        rowsum = np.array(adj_mat.sum(1)).flatten()
        d_mat_inv = sp.diags(rowsum)

        adj_mat = adj_mat+d_mat_inv

        adj_mat = adj_mat.tocsr()
        sp.save_npz(self.path + '/adj_mat.npz', adj_mat)

        return adj_mat

    def get_nodesum(self,depth):
        adj_mat = self.get_adj_mat_nonorm()
        edge_mat = adj_mat.dot(adj_mat)
        for i in range(depth-1):
            if(i!=0):
                edge_mat = edge_mat.dot(adj_mat)
            else:
                pass
        nodesum = edge_mat.sum(1).flatten()
        return nodesum

    def create_adj_mat(self):
        t1 = time()
        adj_mat = sp.dok_matrix((self.n_users+self.n_items, self.n_users+self.n_items), dtype=np.float32)
        adj_mat = adj_mat.tolil()
        R = self.R.tolil()

        adj_mat[:self.n_users, self.n_users:] = R
        adj_mat[self.n_users:, :self.n_users] = R.T
        adj_mat = adj_mat.todok()
        print('already create adjacency matrix', adj_mat.shape, time() - t1)

        t2 = time()

        def normalized_adj_single(adj):
            rowsum = np.array(adj.sum(1))

            d_inv = np.power(rowsum, -1).flatten()
            d_inv[np.isinf(d_inv)] = 0.
            d_mat_inv = sp.diags(d_inv)

            norm_adj = d_mat_inv.dot(adj)
            # norm_adj = adj.dot(d_mat_inv)
            print('generate single-normalized adjacency matrix.')
            return norm_adj.tocoo()

        mean_adj_mat = normalized_adj_single(adj_mat)

        print('already normalize adjacency matrix', time() - t2)
        return mean_adj_mat.tocsr()

In [None]:
def leave_one_out(purchased_item, recommend_list, top_k_recommand_number):
    top_recommend_list=recommend_list[:top_k_recommand_number]
    if (purchased_item in top_recommend_list):
        return 1, np.log2(2.0) / np.log2(top_recommend_list.index(purchased_item) + 2.0)
    else:
        return 0, 0

def NDCG_k(recommend_list, purchased_list):
    Z_u = 0
    temp=0
    for j in range(min(len(recommend_list), len(purchased_list))):
        Z_u = Z_u + 1 / np.log2(j + 2)
    for j in range(len(recommend_list)):
        if recommend_list[j] in purchased_list:
            temp = temp + 1 / np.log2(j + 2)
    if Z_u == 0:
        temp = 0
    else:
        temp = temp / Z_u
    return temp

def top_k(recommend_list, purchased_list):
    temp = []
    for j in recommend_list:
        if j in purchased_list:
            temp.append(j)
    if len(temp):
        HR = 1
    else:
        HR = 0
    co_length=len(temp)
    re_length=len(recommend_list)
    pu_length=len(purchased_list)

    if re_length == 0:
        p = 0.0
    else:
        p = co_length / float(re_length)

    if pu_length == 0:
        r = 0.0
    else:
        r = co_length / float(pu_length)

    if r != 0 or p != 0:
        f=2.0 * p * r / (p + r)
    else:
        f=0.0
    return p, r, f, HR

In [2]:
class params:
    test_user_number = 0
    neg_number = 1
    test_neg_number = 100
    learning_rate = 0.0001
    batch_size = 1024
    pretrain = 10
    learner = "adam"
    n_fold = 1
    mess_dropout = 0.0
    node_dropout = 0.1
    depth = 10
    alpha = 0.5
    loss = 0
    l2_regeularization = 0.0001
    number_users = 0
    number_items = 0
    global_dimention = 50
    verbose =1
    note = 'edge-simi'
    edge = 'add'
    save = 1
    outward = 0.5
    epochs = 500

''

In [1]:

"""
define the model
"""
class LECF(nn.Cell):
    def __init__(self,data):
        super(LECF,self).__init__()
        # initial parameters
        self.data = data
        self.user_embedding_weight = ms.Parameter(default_input=initializer('XavierUniform',[params.number_users,params.global_dimention] \
            ,ms.int32),name="user_embedding_matrix",requires_grad=True,layerwise_parallel=False)
        self.item_embedding_weight = ms.Parameter(default_input=initializer('XavierUniform',[params.number_items,params.global_dimention] \
            ,ms.int32),name="item_embedding_matrix",requires_grad=True,layerwise_parallel=False)
        self.edge_weight = ms.Parameter(default_input=initializer('XavierUniform',[2 * params.global_dimention,params.global_dimention] \
            ,ms.int32),name="edge_weight",requires_grad=True,layerwise_parallel=False)
        self.dl = 1
        if (params.edge == 'concat'): self.dl = 2
        self.test_user_g_embeddings = ms.Parameter(default_input=initializer('ones',shape=[params.number_users,params.global_dimention * self.dl] \
            , dtype=ms.float32),name='test_user_g_embeddings',requires_grad=True,layerwise_parallel=False)
        self.test_item_g_embeddings = ms.Parameter(default_input=initializer('ones',shape=[params.number_items,params.global_dimention * self.dl] \
            , dtype=ms.float32),name='test_item_g_embeddings',requires_grad=True,layerwise_parallel=False)
        self.node_dropout = Tensor(0,ms.float32)
        self.mess_dropout = Tensor(0,ms.float32)
        self.user_id = Tensor(0,ms.int32)
        self.item_id = Tensor(0,ms.int32)
        self.neg_item_id = Tensor(0,ms.int32)
        
        # build the model (front pass)
        self.A_fold_hat_c = self.__get_fold_hat__(params.outward)
        self.A_fold_hat_e = self.__get_fold_hat__(-1)
        self.concat0 = P.Concat(axis=0)
        self.concat1 = P.Concat(axis=0)
        self.concat2 = P.Concat(axis=0)
        self.ego_embeddings = self.concat0(self.user_embedding_weight,self.item_embedding_weight)
        self.matmul = P.MatMul(transpose_a=False,transpose_b=False)
        for K in range(params.depth):
            if (K == 0):
                self.A_fold_hat = self.A_fold_hat_e
            else : 
                self.A_fold_hat = self.A_fold_hat_c
            
            temp_embed = []
            for G in range(params.n_fold):
                temp_embed.append(self.matmul(self.A_fold_hat[G],self.ego_embeddings))
            
            if (K == 0):
                if (params.edge == "add"):
                    self.ego_embeddings += self.concat1(temp_embed,0)
                else:
                    pass
                # todo
            else: 
                self.ego_embeddings = self.concat2(temp_embed,0)
            self.dropout = nn.Dropout(keep_prob=1-self.mess_dropout)
            if (params.mess_dropout != 0):
                self.ego_embeddings = self.dropout(self.ego_embeddings)

        # initialize LOSS
        self.reduce_sum_in_loss = P.ReduceSum()
        self.sigmoid_in_loss = nn.Sigmoid()
###############todo : find another loss fcn.
        self.loss_first_item = P.L2Loss()
        self.loss_first_user = P.L2Loss()
        self.loss_first_neg_item_embedding = P.L2Loss()
        self.log = P.Log()
        """
        # may be used in creat_loss
        self.div = P.RealDiv()
        self.add = P.TensorAdd()
        """
    
    def __get_fold_hat__(self, outward):
        mean_adj_mat = self.data.get_adj_mat()

        A_fold_hat = []

        if(outward==-1):
            mat = 0.5*mean_adj_mat
        else:
            mat= outward*mean_adj_mat + (1-outward)*sp.eye(mean_adj_mat.shape[0])

        fold_len = (self.data.n_users + self.data.n_items) // params.n_fold
        for i_fold in range(params.n_fold):
            start = i_fold * fold_len
            if (i_fold == self.para.n_fold - 1):
                end = self.data.n_users + self.data.n_items
            else:
                end = (i_fold + 1) * fold_len
############## todo : figure what is happening here.
            coo = mat[start:end].tocoo().astype(np.float32)
            indices = np.mat([coo.row, coo.col]).transpose()
            temp = tf.SparseTensor(indices, coo.data, coo.shape)

            if (self.para.node_dropout != 0):
                random_tensor = 1 - self.node_dropout
                random_tensor += tf.random_uniform([mat[start:end].count_nonzero()])
                dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
                temp = tf.sparse_retain(temp, dropout_mask) * tf.div(1., 1 - self.node_dropout)

            A_fold_hat.append(temp)
        return A_fold_hat
    
    def construct(self, ID):
        """
        
        :param inputs: 
        :param kwargs: 
        :return: 
        """
        self.user_id = ID[0]
        self.item_id = ID[1]
        self.neg_item_id = ID[2]

        self.first_user_embedding = []
        for i in range(self.user_id[i]):
            self.first_user_embedding.append(self.user_embedding_weight[i])
        """
        # create_loss
        self.mf_loss = - self.reduce_sum_in_loss(self.log(self.sigmoid_in_loss(self.y - self.neg_y) + 1e-6))
        self.first_user_loss = self.loss_first_user(self.first_user_embedding)
        self.first_item_loss = self.loss_first_item(self.first_item_embedding)
        self.first_neg_item_embedding_loss = self.loss_first_neg_item_embedding(self.first_neg_item_embedding)
        self.reg_loss = params.l2_regeularization * (self.first_neg_item_embedding_loss + self.first_item_loss + \
            self.first_user_loss) / params.batch_size
        self.loss = self.mf_loss + self.reg_loss
        """

SyntaxError: EOF while scanning triple-quoted string literal (<ipython-input-1-074928840ed5>, line 31)

In [None]:
dataset_dir = "./data/video10/video10.npz"
data = Data(npzpath=dataset_dir)
params.test_user_number = len(list(data.user_test_item.keys()))
params.train_number = data.train_number * params.neg_number

In [3]:
context.set_context(mode=context.PYNATIVE_MODE,device_target="CPU",save_graphs=False)
current