# Configuration
torch : 1.13.1</br>
cuda : 11.7</br>
torch_geometric : 2.3.0</br>


In [2]:
# %conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia
# %pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-1.13.0+cu117.html
# %pip install torch_geometric
# %pip install pandas

Collecting pandas
  Downloading pandas-1.3.5-cp37-cp37m-win_amd64.whl (10.0 MB)
     ---------------------------------------- 10.0/10.0 MB 9.4 MB/s eta 0:00:00
Collecting pytz>=2017.3
  Downloading pytz-2023.3-py2.py3-none-any.whl (502 kB)
     ------------------------------------- 502.3/502.3 kB 30.8 MB/s eta 0:00:00
Installing collected packages: pytz, pandas
Successfully installed pandas-1.3.5 pytz-2023.3
Note: you may need to restart the kernel to use updated packages.


In [1]:
import torch
import torch_geometric
print(torch.__version__)
print(torch.version.cuda)
print(torch_geometric.__version__)

  from .autonotebook import tqdm as notebook_tqdm


1.13.1
11.7
2.3.0


In [38]:
import os
from os.path import join
import torch
from enum import Enum
# from parse import parse_args
import multiprocessing

# os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
# args = parse_args()

# ROOT_PATH = os.path.dirname(os.path.dirname(__file__))
# CODE_PATH = join(ROOT_PATH, 'code')
# DATA_PATH = join(ROOT_PATH, 'data')
# BOARD_PATH = join(CODE_PATH, 'runs')
# FILE_PATH = join(CODE_PATH, 'checkpoints')
# import sys
# sys.path.append(join(CODE_PATH, 'sources'))


# if not os.path.exists(FILE_PATH):
#     os.makedirs(FILE_PATH, exist_ok=True)

config = {}
all_dataset = ['lastfm', 'gowalla', 'yelp2018', 'amazon-book']
all_models  = ['MMGCF']
# config['batch_size'] = 4096
config['emb_size'] = 64
config['bpr_batch_size'] = 1024 # the batch size for bpr loss training procedure"
config['latent_dim_rec'] = 64 # the embedding size
config['n_layers']= 2 # the layer num 
config['dropout'] = 0 # using the dropout or not
config['keep_prob']  = 0.6 # the batch size for bpr loss training procedure
config['A_n_fold'] = 100 # the fold num used to split large adj matrix, like gowalla
config['test_u_batch_size'] = 100 # the batch size of users for testing
config['multicore'] = 0 # whether we use multiprocessing or not in test
config['lr'] = 0.001 # the learning rate
config['decay'] = 1e-4 # the weight decay for l2 normalizaton
config['pretrain'] = 0 # whether we use pretrained weight or not
config['A_split'] = False #
config['bigdata'] = False #

GPU = torch.cuda.is_available()
device = torch.device('cuda' if GPU else "cpu")
CORES = multiprocessing.cpu_count() // 2
seed = 2020

dataset = 'gowalla'
model_name = 'lgn'
if dataset not in all_dataset:
    raise NotImplementedError(f"Haven't supported {dataset} yet!, try {all_dataset}")
if model_name not in all_models:
    raise NotImplementedError(f"Haven't supported {model_name} yet!, try {all_models}")




TRAIN_epochs = 1000
LOAD = 0
PATH = "./checkpoints" # path to save weights
topks = eval("[20]") # @k test list
tensorboard = 1 # enable tensorboard
comment = "lgn"
# let pandas shut up
from warnings import simplefilter
simplefilter(action="ignore", category=FutureWarning)


# Data Loader

In [47]:
import os
import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch
from torch.utils.data import Dataset, DataLoader, random_split


class Gowalla(object):
    """
    To load the Gowalla data
    Current Version could be used as abstrct class after
    Gowalla only contains
    edge : "user", "item", "timestamp"
    """
    def __init__(self):
        super(Gowalla,self).__init__()
    

    def load_data(self, path, User_feature_path = None, Item_feature_path = None):
        """ 
        Load the data from file path
        Parameter:
        train_path : str => file path for train data set
        test_path : str => file path for test data set
        User_feature_path : str , default = None => file path for user data 
        Item_feature_path : str , default = None => file path for item data 
        """
        if os.path.exists(path):
            self.edges = pd.read_table(path, names=["user","item","timestamp"], sep=" ",dtype={'user':np.int64,'item':np.int64})
            self.num_user_node = len(self.edges['user'].unique())
            self.num_item_node = len(self.edges['item'].unique())
            print(f"User node : {self.num_user_node}\n \
                  Item node : {self.num_item_node}\n \
                  edges : {len(self.edges)}")
        else:
            FileNotFoundError(f"There are No train data in {path}")

        if User_feature_path != None:
            if os.path.exists(User_feature_path):
                self.user_feature = pd.read_table(User_feature_path)
            else:
                FileNotFoundError(f"There are No user feature data in {User_feature_path}")
        else:
            User_feature_path = None
        if Item_feature_path != None:
            if os.path.exists(Item_feature_path):
                self.item_feature = pd.read_table(Item_feature_path)
            else:
                FileNotFoundError(f"There are No user feature data in {Item_feature_path}")
        else:
            Item_feature_path = None
        self.get_adj_matrix()
        return self.edges
    
    def get_adj_matrix(self):
        adj_M = sp.coo_matrix((np.ones(len(self.edges)),(self.edges['user'],self.edges['item'])), dtype=np.float32)
        self.ui_matrix = adj_M
        self.iu_matrix = adj_M.transpose()


# class Gowalla(object):
#     """
#     To load the Gowalla data
#     Current Version could be used as abstrct class after
#     Gowalla only contains
#     edge : "user", "item", "timestamp"
#     """
#     def __init__(self):
#         super(Gowalla,self).__init__()
    

#     def load_data(self, train_path, test_path, User_feature_path = None, Item_feature_path = None):
#         """ 
#         Load the data from file path
#         Parameter:
#         train_path : str => file path for train data set
#         test_path : str => file path for test data set
#         User_feature_path : str , default = None => file path for user data 
#         Item_feature_path : str , default = None => file path for item data 
#         """
#         if os.path.exists(train_path):
#             self.train_edges = pd.read_table(train_path, names=["user","item","timestamp"], sep=" ",dtype={'user':np.int64,'item':np.int64})
#             self.train_num_user_node = len(self.train_edges['user'].unique())
#             self.train_num_item_node = len(self.train_edges['item'].unique())
#             print(f"User node : {self.train_num_user_node}\n \
#                   Item node : {self.train_num_item_node}\n \
#                   edges : {len(self.train_edges)}")
#         else:
#             FileNotFoundError(f"There are No train data in {train_path}")

#         if os.path.exists(test_path):
#             self.test_edges = pd.read_table(test_path, names=["user","item","timestamp"], sep=" ",dtype={'user':np.int64,'item':np.int64})
#             self.test_num_user_node = len(self.test_edges['user'].unique())
#             self.test_num_item_node = len(self.test_edges['item'].unique())
#             print(f"User node : {self.test_num_user_node}\n \
#                   Item node : {self.test_num_item_node}\n \
#                   edges : {len(self.test_edges)}")
#         else:
#             FileNotFoundError(f"There are No test data in {test_path}")
        
#         if User_feature_path != None:
#             if os.path.exists(User_feature_path):
#                 self.user_feature = pd.read_table(User_feature_path)
#             else:
#                 FileNotFoundError(f"There are No user feature data in {User_feature_path}")
#         else:
#             User_feature_path = None
#         if Item_feature_path != None:
#             if os.path.exists(Item_feature_path):
#                 self.item_feature = pd.read_table(Item_feature_path)
#             else:
#                 FileNotFoundError(f"There are No user feature data in {Item_feature_path}")
#         else:
#             Item_feature_path = None
#         self.get_adj_matrix()
#         return self.train_edges, self.test_edges 
    
#     def get_adj_matrix(self):
#         train_adj_M = sp.coo_matrix((np.ones(len(self.train_edges)),(self.train_edges['user'],self.train_edges['item'])), dtype=np.float32)
#         test_adj_M = sp.coo_matrix((np.ones(len(self.train_edges)),(self.train_edges['user'],self.train_edges['item'])), dtype=np.float32)
#         self.train_ui_matrix = train_adj_M
#         self.train_iu_matrix = train_adj_M.transpose()
#         self.test_ui_matrix = test_adj_M

In [48]:
data = Gowalla()
train = data.load_data('C:\\Users\\PC\\Desktop\\MGCCF\\data\\Gowalla\\train.data')

User node : 29858
                   Item node : 40988
                   edges : 821971


In [50]:
import torch.nn as nn
class MMGCF(nn.Module):
    def __init__(self, data):
        super(MMGCF, self).__init__()
        self.n_user = data.num_user_node
        self.n_item = data.num_item_node
        self.emb_size = config['emb_size']
        self.layers = config['n_layers']

    def init_weight(self):
        # xavier init
        initializer = nn.init.xavier_uniform_

        embedding_dict = nn.ParameterDict({
            'user_emb': nn.Parameter(initializer(torch.empty(self.n_user,
                                                 self.emb_size))),
            'item_emb': nn.Parameter(initializer(torch.empty(self.n_item,
                                                 self.emb_size)))
        })
        
        #init the paramenter
        weight_dict = nn.ParameterDict()
        layers = [self.emb_size] + self.layers
        for k in range(len(self.layers)):
            weight_dict.update({'W_gc_%d'%k: nn.Parameter(initializer(torch.empty(layers[k],
                                                                      layers[k+1])))})
            weight_dict.update({'b_gc_%d'%k: nn.Parameter(initializer(torch.empty(1, layers[k+1])))})

            weight_dict.update({'W_ag_%d'%k: nn.Parameter(initializer(torch.empty(layers[k],
                                                                      layers[k+1])))})
            weight_dict.update({'b_ag_%d'%k: nn.Parameter(initializer(torch.empty(1, layers[k+1])))})

        return embedding_dict, weight_dict
    
    def 