# 定义超参数类

In [1]:
import torch
import multiprocessing
import sys
import os
from os.path import join
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
"""定义超参数"""
class params:
  def __init__(self,bpr_batch_size=2048,latent_dim_rec=64,lightGCN_n_layers=3,
               dropout=0,keep_prob=0.6,A_n_fold=100,seed=2024,
               test_u_batch_size=100,epochs=1000,dataset="gowalla",
               load=1,path="./checkpoints",topks='[20]',
               multicore=0,lr=0.001,decay=1e-4,pretrain=0,
               A_split=False,bigdata=False,comment="lgn"):
    self.config={}
    self.config['bpr_batch_size'] = bpr_batch_size
    self.config['latent_dim_rec'] = latent_dim_rec
    self.config['lightGCN_n_layers']= lightGCN_n_layers
    self.config['dropout'] = dropout
    self.config['keep_prob']  = keep_prob
    self.config['A_n_fold'] = A_n_fold
    self.config['test_u_batch_size'] = test_u_batch_size
    self.config['multicore'] = multicore
    self.config['lr'] = lr
    self.config['decay'] = decay
    self.config['pretrain'] = pretrain
    self.config['A_split'] = A_split
    self.config['bigdata'] = bigdata
    GPU = torch.cuda.is_available()
    self.device = torch.device('cuda' if GPU else "cpu")
    self.CORES = multiprocessing.cpu_count() // 2
    self.seed = seed
    self.ROOT_PATH = os.getcwd()
    self.CODE_PATH = join(self.ROOT_PATH, 'code')
    self.DATA_PATH = join(self.ROOT_PATH, 'data')
    self.BOARD_PATH = join(self.CODE_PATH, 'runs')
    self.FILE_PATH = join(self.CODE_PATH, 'checkpoints')
    sys.path.append(join(self.CODE_PATH, 'sources'))
    if not os.path.exists(self.FILE_PATH):
        os.makedirs(self.FILE_PATH, exist_ok=True)
    self.dataset = dataset
    self.TRAIN_epochs = epochs
    self.LOAD = load
    self.PATH = path
    self.topks = eval(topks)
    self.comment = comment
    self.logo = r"""
        ███╗   ███╗  ██████╗ ██╗
        ████╗ ████║ ██╔════╝ ██║
        ██╔████╔██║ ██║      ██║
        ██║╚██╔╝██║ ██║      ██║
        ██║ ╚═╝ ██║ ╚██████╗ ███████╗
        ╚═╝     ╚═╝  ╚═════╝ ╚══════╝
      """
  def cprint(self,words : str):
    print(f"\033[0;30;43m{words}\033[0m")

检验代码能跑嘛

In [2]:
world = params()
print(world.logo)


        ███╗   ███╗  ██████╗ ██╗
        ████╗ ████║ ██╔════╝ ██║
        ██╔████╔██║ ██║      ██║
        ██║╚██╔╝██║ ██║      ██║
        ██║ ╚═╝ ██║ ╚██████╗ ███████╗
        ╚═╝     ╚═╝  ╚═════╝ ╚══════╝
      


# 定义数据加载器类

In [3]:
import os
from os.path import join
import sys
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from scipy.sparse import csr_matrix
import scipy.sparse as sp
from time import time
world = params()
class Loader():
    """
    Dataset type for pytorch \n
    Incldue graph information
    gowalla dataset
    """
    def __init__(self,config = world.config,path="../data/gowalla"):
        # train or test
        world.cprint(f'loading [{path}]')
        self.split = config['A_split']
        self.folds = config['A_n_fold']
        self.mode_dict = {'train': 0, "test": 1}
        self.mode = self.mode_dict['train']
        self.n_user = 0
        self.m_item = 0
        train_file = path + '/content/train.txt'
        test_file = path + '/content/test.txt'
        self.path = path
        trainUniqueUsers, trainItem, trainUser = [], [], []
        testUniqueUsers, testItem, testUser = [], [], []
        self.traindataSize = 0
        self.testDataSize = 0

        with open(train_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    trainUniqueUsers.append(uid)
                    trainUser.extend([uid] * len(items))
                    trainItem.extend(items)
                    self.m_item = max(self.m_item, max(items))
                    self.n_user = max(self.n_user, uid)
                    self.traindataSize += len(items)
        self.trainUniqueUsers = np.array(trainUniqueUsers)
        self.trainUser = np.array(trainUser)
        self.trainItem = np.array(trainItem)

        with open(test_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    testUniqueUsers.append(uid)
                    testUser.extend([uid] * len(items))
                    testItem.extend(items)
                    self.m_item = max(self.m_item, max(items))
                    self.n_user = max(self.n_user, uid)
                    self.testDataSize += len(items)
        self.m_item += 1
        self.n_user += 1
        self.testUniqueUsers = np.array(testUniqueUsers)
        self.testUser = np.array(testUser)
        self.testItem = np.array(testItem)

        self.Graph = None
        print(f"{self.trainDataSize} interactions for training")
        print(f"{self.testDataSize} interactions for testing")
        print(f"{world.dataset} Sparsity : {(self.trainDataSize + self.testDataSize) / self.n_users / self.m_items}")

        # (users,items), bipartite graph
        self.UserItemNet = csr_matrix((np.ones(len(self.trainUser)), (self.trainUser, self.trainItem)),
                                      shape=(self.n_user, self.m_item))
        self.users_D = np.array(self.UserItemNet.sum(axis=1)).squeeze()
        self.users_D[self.users_D == 0.] = 1
        self.items_D = np.array(self.UserItemNet.sum(axis=0)).squeeze()
        self.items_D[self.items_D == 0.] = 1.
        # pre-calculate
        self._allPos = self.getUserPosItems(list(range(self.n_user)))
        self.__testDict = self.__build_test()
        print(f"{world.dataset} is ready to go")

    @property
    def n_users(self):
        return self.n_user

    @property
    def m_items(self):
        return self.m_item

    @property
    def trainDataSize(self):
        return self.traindataSize

    @property
    def testDict(self):
        return self.__testDict

    @property
    def allPos(self):
        return self._allPos

    def _split_A_hat(self,A):
        A_fold = []
        fold_len = (self.n_users + self.m_items) // self.folds
        for i_fold in range(self.folds):
            start = i_fold*fold_len
            if i_fold == self.folds - 1:
                end = self.n_users + self.m_items
            else:
                end = (i_fold + 1) * fold_len
            A_fold.append(self._convert_sp_mat_to_sp_tensor(A[start:end]).coalesce().to(world.device))
        return A_fold

    def _convert_sp_mat_to_sp_tensor(self, X):
        coo = X.tocoo().astype(np.float32)
        row = torch.Tensor(coo.row).long()
        col = torch.Tensor(coo.col).long()
        index = torch.stack([row, col])
        data = torch.FloatTensor(coo.data)
        return torch.sparse.FloatTensor(index, data, torch.Size(coo.shape))

    def getSparseGraph(self):
        print("loading adjacency matrix")
        if self.Graph is None:
            try:
                pre_adj_mat = sp.load_npz(self.path + '/s_pre_adj_mat.npz')
                print("successfully loaded...")
                norm_adj = pre_adj_mat
            except :
                print("generating adjacency matrix")
                s = time()
                adj_mat = sp.dok_matrix((self.n_users + self.m_items, self.n_users + self.m_items), dtype=np.float32)
                adj_mat = adj_mat.tolil()
                R = self.UserItemNet.tolil()
                adj_mat[:self.n_users, self.n_users:] = R
                adj_mat[self.n_users:, :self.n_users] = R.T
                adj_mat = adj_mat.todok()
                # adj_mat = adj_mat + sp.eye(adj_mat.shape[0])

                rowsum = np.array(adj_mat.sum(axis=1))
                d_inv = np.power(rowsum, -0.5).flatten()
                d_inv[np.isinf(d_inv)] = 0.
                d_mat = sp.diags(d_inv)

                norm_adj = d_mat.dot(adj_mat)
                norm_adj = norm_adj.dot(d_mat)
                norm_adj = norm_adj.tocsr()
                end = time()
                print(f"costing {end-s}s, saved norm_mat...")
                sp.save_npz(self.path + '/s_pre_adj_mat.npz', norm_adj)

            if self.split == True:
                self.Graph = self._split_A_hat(norm_adj)
                print("done split matrix")
            else:
                self.Graph = self._convert_sp_mat_to_sp_tensor(norm_adj)
                self.Graph = self.Graph.coalesce().to(world.device)
                print("don't split the matrix")
        return self.Graph

    def __build_test(self):
        """
        return:
            dict: {user: [items]}
        """
        test_data = {}
        for i, item in enumerate(self.testItem):
            user = self.testUser[i]
            if test_data.get(user):
                test_data[user].append(item)
            else:
                test_data[user] = [item]
        return test_data

    def getUserItemFeedback(self, users, items):
        """
        users:
            shape [-1]
        items:
            shape [-1]
        return:
            feedback [-1]
        """
        # print(self.UserItemNet[users, items])
        return np.array(self.UserItemNet[users, items]).astype('uint8').reshape((-1,))

    def getUserPosItems(self, users):
        posItems = []
        for user in users:
            posItems.append(self.UserItemNet[user].nonzero()[1])
        return posItems


# 定义light-gcn模型

In [4]:
import torch
from torch import nn
import numpy as np
import multiprocessing
CORES = multiprocessing.cpu_count() // 2

class LightGCN(nn.Module):
    def __init__(self,
                 config:dict,
                 dataset:Loader,
                 world:params):
        super(LightGCN, self).__init__()
        self.config = config
        self.dataset = dataset
        self.world = world
        self.__init_weight()

    def __init_weight(self):
        self.num_users  = self.dataset.n_users
        self.num_items  = self.dataset.m_items
        self.latent_dim = self.config['latent_dim_rec']
        self.n_layers = self.config['lightGCN_n_layers']
        self.keep_prob = self.config['keep_prob']
        self.A_split = self.config['A_split']
        self.embedding_user = torch.nn.Embedding(
            num_embeddings=self.num_users, embedding_dim=self.latent_dim)
        self.embedding_item = torch.nn.Embedding(
            num_embeddings=self.num_items, embedding_dim=self.latent_dim)
        if self.config['pretrain'] == 0:
            nn.init.normal_(self.embedding_user.weight, std=0.1)
            nn.init.normal_(self.embedding_item.weight, std=0.1)
            world.cprint('use NORMAL distribution initilizer')
        else:
            self.embedding_user.weight.data.copy_(torch.from_numpy(self.config['user_emb']))
            self.embedding_item.weight.data.copy_(torch.from_numpy(self.config['item_emb']))
            print('use pretarined data')
        self.f = nn.Sigmoid()
        self.Graph = self.dataset.getSparseGraph()
        print(f"lgn is already to go(dropout:{self.config['dropout']})")

    def __dropout_x(self, x, keep_prob):
        size = x.size()
        index = x.indices().t()
        values = x.values()
        random_index = torch.rand(len(values)) + keep_prob
        random_index = random_index.int().bool()
        index = index[random_index]
        values = values[random_index]/keep_prob
        g = torch.sparse.FloatTensor(index.t(), values, size)
        return g

    def __dropout(self, keep_prob):
        if self.A_split:
            graph = []
            for g in self.Graph:
                graph.append(self.__dropout_x(g, keep_prob))
        else:
            graph = self.__dropout_x(self.Graph, keep_prob)
        return graph

    def computer(self):
        """
        propagate methods for lightGCN
        """
        users_emb = self.embedding_user.weight
        items_emb = self.embedding_item.weight
        all_emb = torch.cat([users_emb, items_emb])
        embs = [all_emb]
        if self.config['dropout']:
            if self.training:
                print("droping")
                g_droped = self.__dropout(self.keep_prob)
            else:
                g_droped = self.Graph
        else:
            g_droped = self.Graph

        for layer in range(self.n_layers):
            if self.A_split:
                temp_emb = []
                for f in range(len(g_droped)):
                    temp_emb.append(torch.sparse.mm(g_droped[f], all_emb))
                side_emb = torch.cat(temp_emb, dim=0)
                all_emb = side_emb
            else:
                all_emb = torch.sparse.mm(g_droped, all_emb)
            embs.append(all_emb)
        embs = torch.stack(embs, dim=1)

        light_out = torch.mean(embs, dim=1)
        users, items = torch.split(light_out, [self.num_users, self.num_items])
        return users, items

    def getUsersRating(self, users):
        all_users, all_items = self.computer()
        users_emb = all_users[users.long()]
        items_emb = all_items
        rating = self.f(torch.matmul(users_emb, items_emb.t()))
        return rating

    def getEmbedding(self, users, pos_items, neg_items):
        all_users, all_items = self.computer()
        users_emb = all_users[users]
        pos_emb = all_items[pos_items]
        neg_emb = all_items[neg_items]
        users_emb_ego = self.embedding_user(users)
        pos_emb_ego = self.embedding_item(pos_items)
        neg_emb_ego = self.embedding_item(neg_items)
        return users_emb, pos_emb, neg_emb, users_emb_ego, pos_emb_ego, neg_emb_ego

    def shuffle(self,*arrays, **kwargs):

        require_indices = kwargs.get('indices', False)

        if len(set(len(x) for x in arrays)) != 1:
            raise ValueError('All inputs to shuffle must have '
                             'the same length.')

        shuffle_indices = np.arange(len(arrays[0]))
        np.random.shuffle(shuffle_indices)

        if len(arrays) == 1:
            result = arrays[0][shuffle_indices]
        else:
            result = tuple(x[shuffle_indices] for x in arrays)

        if require_indices:
            return result, shuffle_indices
        else:
            return result
    def minibatch(self,*tensors, **kwargs):

        batch_size = kwargs.get('batch_size', self.world.config['bpr_batch_size'])

        if len(tensors) == 1:
            tensor = tensors[0]
            for i in range(0, len(tensor), batch_size):
                yield tensor[i:i + batch_size]
        else:
            for i in range(0, len(tensors[0]), batch_size):
                yield tuple(x[i:i + batch_size] for x in tensors)

    def bpr_loss(self, users, pos, neg):
        (users_emb, pos_emb, neg_emb,
        userEmb0,  posEmb0, negEmb0) = self.getEmbedding(users.long(), pos.long(), neg.long())
        reg_loss = (1/2)*(userEmb0.norm(2).pow(2) +
                         posEmb0.norm(2).pow(2)  +
                         negEmb0.norm(2).pow(2))/float(len(users))
        pos_scores = torch.mul(users_emb, pos_emb)
        pos_scores = torch.sum(pos_scores, dim=1)
        neg_scores = torch.mul(users_emb, neg_emb)
        neg_scores = torch.sum(neg_scores, dim=1)

        loss = torch.mean(torch.nn.functional.softplus(neg_scores - pos_scores))

        return loss, reg_loss

    def UniformSample_original_python(self,dataset):
      """
      the original impliment of BPR Sampling in LightGCN
      :return:
          np.array
      """
      user_num = dataset.trainDataSize
      users = np.random.randint(0, dataset.n_users, user_num)
      allPos = dataset.allPos
      S = []
      sample_time1 = 0.
      sample_time2 = 0.
      for i, user in enumerate(users):
          start = time()
          posForUser = allPos[user]
          if len(posForUser) == 0:
              continue
          posindex = np.random.randint(0, len(posForUser))
          positem = posForUser[posindex]
          while True:
              negitem = np.random.randint(0, dataset.m_items)
              if negitem in posForUser:
                  continue
              else:
                  break
          S.append([user, positem, negitem])
      return np.array(S)

    def BPR_train_original(self,dataset, recommend_model, loss_class, epoch, neg_k=1, w=None):
        Recmodel = recommend_model
        Recmodel.train()
        bpr: BPRLoss = loss_class
        S = self.UniformSample_original_python(dataset)
        users = torch.Tensor(S[:, 0]).long()
        posItems = torch.Tensor(S[:, 1]).long()
        negItems = torch.Tensor(S[:, 2]).long()

        users = users.to(self.world.device)
        posItems = posItems.to(self.world.device)
        negItems = negItems.to(self.world.device)
        users, posItems, negItems = self.shuffle(users, posItems, negItems)
        total_batch = len(users) // self.world.config['bpr_batch_size'] + 1
        aver_loss = 0.
        for (batch_i,
             (batch_users,
              batch_pos,
              batch_neg)) in enumerate(self.minibatch(users,
                                   posItems,
                                   negItems,
                                   batch_size=self.world.config['bpr_batch_size'])):
            cri = bpr.stageOne(batch_users, batch_pos, batch_neg)
            aver_loss += cri
        aver_loss = aver_loss / total_batch
        return f"loss{aver_loss:.3f}"

    def getLabel(self,test_data, pred_data):
        r = []
        for i in range(len(test_data)):
            groundTrue = test_data[i]
            predictTopK = pred_data[i]
            pred = list(map(lambda x: x in groundTrue, predictTopK))
            pred = np.array(pred).astype("float")
            r.append(pred)
        return np.array(r).astype('float')

    def RecallPrecision_ATk(self,test_data, r, k):
        """
        test_data should be a list? cause users may have different amount of pos items. shape (test_batch, k)
        pred_data : shape (test_batch, k) NOTE: pred_data should be pre-sorted
        k : top-k
        """
        right_pred = r[:, :k].sum(1)
        precis_n = k
        recall_n = np.array([len(test_data[i]) for i in range(len(test_data))])
        recall = np.sum(right_pred/recall_n)
        precis = np.sum(right_pred)/precis_n
        return {'recall': recall, 'precision': precis}

    def NDCGatK_r(self,test_data,r,k):
        """
        Normalized Discounted Cumulative Gain
        rel_i = 1 or 0, so 2^{rel_i} - 1 = 1 or 0
        """
        assert len(r) == len(test_data)
        pred_data = r[:, :k]

        test_matrix = np.zeros((len(pred_data), k))
        for i, items in enumerate(test_data):
            length = k if k <= len(items) else len(items)
            test_matrix[i, :length] = 1
        test_matrix[0, :length] = 1
        max_r = test_matrix
        idcg = np.sum(max_r * 1./np.log2(np.arange(2, k + 2)), axis=1)
        dcg = pred_data*(1./np.log2(np.arange(2, k + 2)))
        dcg = np.sum(dcg, axis=1)
        idcg[idcg == 0.] = 1.
        ndcg = dcg/idcg
        ndcg[np.isnan(ndcg)] = 0.
        return np.sum(ndcg)

    def test_one_batch(self,X):
        sorted_items = X[0].numpy()
        #sorted_items = X[0]
        groundTrue = X[1]
        r = self.getLabel(groundTrue, sorted_items)
        pre, recall, ndcg = [], [], []
        for k in world.topks:
            ret = self.RecallPrecision_ATk(groundTrue, r, k)
            pre.append(ret['precision'])
            recall.append(ret['recall'])
            ndcg.append(self.NDCGatK_r(groundTrue,r,k))
        return {'recall':np.array(recall),
                'precision':np.array(pre),
                'ndcg':np.array(ndcg)}


    def Test(self,dataset, Recmodel, epoch):
        u_batch_size = self.world.config['test_u_batch_size']
        testDict: dict = dataset.testDict
        # eval mode with no dropout
        Recmodel = Recmodel.eval()
        max_K = max(self.world.topks)

        results = {'precision': np.zeros(len(world.topks)),
                   'recall': np.zeros(len(world.topks)),
                   'ndcg': np.zeros(len(world.topks))}
        with torch.no_grad():
            users = list(testDict.keys())
            try:
                assert u_batch_size <= len(users) / 10
            except AssertionError:
                print(f"test_u_batch_size is too big for this dataset, try a small one {len(users) // 10}")
            users_list = []
            rating_list = []
            groundTrue_list = []
            # auc_record = []
            # ratings = []
            total_batch = len(users) // u_batch_size + 1
            for batch_users in self.minibatch(users, batch_size=u_batch_size):
                allPos = dataset.getUserPosItems(batch_users)
                groundTrue = [testDict[u] for u in batch_users]
                batch_users_gpu = torch.Tensor(batch_users).long()
                batch_users_gpu = batch_users_gpu.to(self.world.device)

                rating = Recmodel.getUsersRating(batch_users_gpu)
                #rating = rating.cpu()
                exclude_index = []
                exclude_items = []
                for range_i, items in enumerate(allPos):
                    exclude_index.extend([range_i] * len(items))
                    exclude_items.extend(items)
                rating[exclude_index, exclude_items] = -(1<<10)
                _, rating_K = torch.topk(rating, k=max_K)
                rating = rating.cpu().numpy()
                # aucs = [
                #         utils.AUC(rating[i],
                #                   dataset,
                #                   test_data) for i, test_data in enumerate(groundTrue)
                #     ]
                # auc_record.extend(aucs)
                del rating
                users_list.append(batch_users)
                rating_list.append(rating_K.cpu())
                groundTrue_list.append(groundTrue)
            assert total_batch == len(users_list)
            X = zip(rating_list, groundTrue_list)
            pre_results = []
            for x in X:
                pre_results.append(self.test_one_batch(x))
            scale = float(u_batch_size/len(users))
            for result in pre_results:
                results['recall'] += result['recall']
                results['precision'] += result['precision']
                results['ndcg'] += result['ndcg']
            results['recall'] /= float(len(users))
            results['precision'] /= float(len(users))
            results['ndcg'] /= float(len(users))
            # results['auc'] = np.mean(auc_record)
            print(results)
            return results

    def pred(self,dataset,Recmodel):
        u_batch_size = self.world.config['test_u_batch_size']
        testDict: dict = dataset.testDict
        Recmodel = Recmodel.eval()
        max_K = max(self.world.topks)
        with torch.no_grad():
            users = list(testDict.keys())
            try:
                assert u_batch_size <= len(users) / 10
            except AssertionError:
                print(f"test_u_batch_size is too big for this dataset, try a small one {len(users) // 10}")
            users_list = []
            rating_list = []
            groundTrue_list = []
            # auc_record = []
            # ratings = []
            total_batch = len(users) // u_batch_size + 1
            for batch_users in self.minibatch(users, batch_size=u_batch_size):
                allPos = dataset.getUserPosItems(batch_users)
                batch_users_gpu = torch.Tensor(batch_users).long()
                batch_users_gpu = batch_users_gpu.to(self.world.device)

                rating = Recmodel.getUsersRating(batch_users_gpu)
                #rating = rating.cpu()
                exclude_index = []
                exclude_items = []
                for range_i, items in enumerate(allPos):
                    exclude_index.extend([range_i] * len(items))
                    exclude_items.extend(items)
                rating[exclude_index, exclude_items] = -(1<<10)
                _, rating_K = torch.topk(rating, k=max_K)
                del rating
                rating_list.append(rating_K.cpu())
            print("输出成功！")
            return rating_list

    def forward(self, users, items):
        all_users, all_items = self.computer()
        users_emb = all_users[users]
        items_emb = all_items[items]
        inner_pro = torch.mul(users_emb, items_emb)
        gamma     = torch.sum(inner_pro, dim=1)
        return gamma

# 定义Bpr_loss

In [5]:
import torch
from torch import nn, optim
import numpy as np
from torch import log
from time import time
from sklearn.metrics import roc_auc_score
import random
import os

class BPRLoss:
    def __init__(self,
                 recmodel : LightGCN,
                 config : dict):
        self.model = recmodel
        self.weight_decay = config['decay']
        self.lr = config['lr']
        self.opt = optim.Adam(recmodel.parameters(), lr=self.lr)

    def stageOne(self, users, pos, neg):
        loss, reg_loss = self.model.bpr_loss(users, pos, neg)
        reg_loss = reg_loss*self.weight_decay
        loss = loss + reg_loss

        self.opt.zero_grad()
        loss.backward()
        self.opt.step()

        return loss.cpu().item()

# 主函数

In [None]:
!pip install Procedure



In [6]:
import torch
import numpy as np
from pprint import pprint
world = params()
#定义随机数种子，保证结果的可复现性
np.random.seed(world.seed)
if torch.cuda.is_available():
  torch.cuda.manual_seed(world.seed)
  torch.cuda.manual_seed_all(world.seed)
torch.manual_seed(world.seed)
print(">>SEED:", world.seed)
dataset = Loader(path="")
print('===========config================')
pprint(world.config)
print("cores for test:", world.CORES)
print("comment:", world.comment)
print("LOAD:", world.LOAD)
print("Weight path:", world.PATH)
print("Test Topks:", world.topks)
print("using bpr loss")
print('===========end===================')
Recmodel = LightGCN(world.config, dataset, world)
Recmodel = Recmodel.to(world.device)
bpr = BPRLoss(Recmodel, world.config)
file = f"lgn-{world.dataset}-{world.config['lightGCN_n_layers']}-{world.config['latent_dim_rec']}.pth.tar"
weight_path = os.path.join(world.FILE_PATH, file)
print(f"load and save to {weight_path}")
if world.LOAD:
    try:
        Recmodel.load_state_dict(torch.load(weight_path, map_location=torch.device('cpu')))
        world.cprint(f"loaded model weights from {weight_path}")
    except FileNotFoundError:
        print(f"{weight_path} not exists, start from beginning")
Neg_k = 1
#Recmodel.Test(dataset, Recmodel, 1, world.config['multicore'])
for epoch in range(world.TRAIN_epochs):
    if epoch % 10 == 0:
        world.cprint("[TEST]")
        Recmodel.Test(dataset, Recmodel, epoch)
    output_information = Recmodel.BPR_train_original(dataset, Recmodel, bpr, epoch, neg_k=Neg_k)
    print(f'EPOCH[{epoch + 1}/{world.TRAIN_epochs}] {output_information}')
    torch.save(Recmodel.state_dict(), weight_path)

>>SEED: 2024
[0;30;43mloading [][0m
810128 interactions for training
217242 interactions for testing
gowalla Sparsity : 0.0008396216228570436
gowalla is ready to go
{'A_n_fold': 100,
 'A_split': False,
 'bigdata': False,
 'bpr_batch_size': 2048,
 'decay': 0.0001,
 'dropout': 0,
 'keep_prob': 0.6,
 'latent_dim_rec': 64,
 'lightGCN_n_layers': 3,
 'lr': 0.001,
 'multicore': 0,
 'pretrain': 0,
 'test_u_batch_size': 100}
cores for test: 1
comment: lgn
LOAD: 1
Weight path: ./checkpoints
Test Topks: [20]
using bpr loss
[0;30;43muse NORMAL distribution initilizer[0m
loading adjacency matrix
generating adjacency matrix
costing 116.80713701248169s, saved norm_mat...


  return torch.sparse.FloatTensor(index, data, torch.Size(coo.shape))


don't split the matrix
lgn is already to go(dropout:0)
load and save to /content/code/checkpoints/lgn-gowalla-3-64.pth.tar
/content/code/checkpoints/lgn-gowalla-3-64.pth.tar not exists, start from beginning
[0;30;43m[TEST][0m
29858
[13.072087]
{'precision': array([0.00022774]), 'recall': array([0.00067513]), 'ndcg': array([0.00043781])}
EPOCH[1/1000] loss0.548
EPOCH[2/1000] loss0.239
EPOCH[3/1000] loss0.160
EPOCH[4/1000] loss0.129


KeyboardInterrupt: 

# 生成最后的输出文件，请将该生成的文件提交到kaggle

In [7]:
import torch
import numpy as np
from pprint import pprint
import csv
world = params()
#定义随机数种子，保证结果的可复现性
np.random.seed(world.seed)
if torch.cuda.is_available():
  torch.cuda.manual_seed(world.seed)
  torch.cuda.manual_seed_all(world.seed)
torch.manual_seed(world.seed)
print(">>SEED:", world.seed)
dataset = Loader(path="")
print('===========config================')
pprint(world.config)
print("cores for test:", world.CORES)
print("comment:", world.comment)
print("LOAD:", world.LOAD)
print("Weight path:", world.PATH)
print("Test Topks:", world.topks)
print("using bpr loss")
print('===========end===================')
Recmodel = LightGCN(world.config, dataset, world)
Recmodel = Recmodel.to(world.device)
file = f"lgn-{world.dataset}-{world.config['lightGCN_n_layers']}-{world.config['latent_dim_rec']}.pth.tar"
weight_path = os.path.join(world.FILE_PATH, file)
print(f"load and save to {weight_path}")
if world.LOAD:
    try:
        Recmodel.load_state_dict(torch.load(weight_path, map_location=torch.device('cpu')))
        world.cprint(f"loaded model weights from {weight_path}")
    except FileNotFoundError:
        print(f"{weight_path} not exists, start from beginning")
Recmodel.Test(dataset, Recmodel, 1)
u_pred = Recmodel.pred(dataset,Recmodel)
output_file = 'submission.csv'
col_list = ['id']
col_list+= [f"col_{i}" for i in range(1,21)]
cnt = 0
with open(output_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    tensor_list = []
    for u_pred_sub in u_pred:
        for i in range(len(u_pred_sub)):
            list_ = [cnt]
            cnt+=1
            list_ += u_pred_sub[i].tolist()
            tensor_list.append(list_)
    writer.writerow(col_list)
    writer.writerows(tensor_list)

print(f"张量已成功写入到 {output_file}")

>>SEED: 2024
[0;30;43mloading [][0m
810128 interactions for training
217242 interactions for testing
gowalla Sparsity : 0.0008396216228570436
gowalla is ready to go
{'A_n_fold': 100,
 'A_split': False,
 'bigdata': False,
 'bpr_batch_size': 2048,
 'decay': 0.0001,
 'dropout': 0,
 'keep_prob': 0.6,
 'latent_dim_rec': 64,
 'lightGCN_n_layers': 3,
 'lr': 0.001,
 'multicore': 0,
 'pretrain': 0,
 'test_u_batch_size': 100}
cores for test: 1
comment: lgn
LOAD: 1
Weight path: ./checkpoints
Test Topks: [20]
using bpr loss
[0;30;43muse NORMAL distribution initilizer[0m
loading adjacency matrix
successfully loaded...
don't split the matrix
lgn is already to go(dropout:0)
load and save to /content/code/checkpoints/lgn-gowalla-3-64.pth.tar
[0;30;43mloaded model weights from /content/code/checkpoints/lgn-gowalla-3-64.pth.tar[0m
29858
[2948.0498845]
{'precision': array([0.03607408]), 'recall': array([0.11810592]), 'ndcg': array([0.09873568])}
输出成功！
张量已成功写入到 submission.csv
