In [None]:
# Part of the code adapted from https://github.com/CRIPAC-DIG/SR-GNN/blob/e21cfa431f74c25ae6e4ae9261deefe11d1cb488/pytorch_code/
# as well as https://github.com/userbehavioranalysis/SR-GNN_PyTorch-Geometric

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pickle

In [None]:
import numpy as np
import networkx as nx

In [None]:
!pip install torch_geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch_geometric
  Downloading torch_geometric-2.3.1.tar.gz (661 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m661.6/661.6 kB[0m [31m41.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: torch_geometric
  Building wheel for torch_geometric (pyproject.toml) ... [?25l[?25hdone
  Created wheel for torch_geometric: filename=torch_geometric-2.3.1-py3-none-any.whl size=910476 sha256=80d4e6c2022d9e1777480e94e12ab177b2bddb745d3c55405b7eff1eb8950d4a
  Stored in directory: /root/.cache/pip/wheels/ac/dc/30/e2874821ff308ee67dcd7a66dbde912411e19e35a1addda028
Successfully built torch_geometric
Installing collected packages: torch_geometric
Successfully installed torch_geomet

In [None]:
import torch_geometric

In [None]:
import numpy as np
import pandas as pd
import os
import torch_geometric.transforms as T

import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch_geometric
from torch.nn import Parameter
from torch_geometric.utils.convert import to_networkx
import networkx as nx
import urllib.request
import tarfile
from torch_geometric.nn import GraphSAGE
from torch_geometric.nn import SAGEConv
from torch_geometric.nn import GATConv
from torch_geometric.loader import NeighborLoader
from torch_geometric.utils import to_networkx

In [None]:
from torch_geometric.data import InMemoryDataset, Data


class MultiSessionsGraph(InMemoryDataset):
    """Every session is a graph."""
    def __init__(self, root, phrase, transform=None, pre_transform=None):
        """
        Args:
            root: 'sample', 'yoochoose1_4', 'yoochoose1_64' or 'diginetica'
            phrase: 'train' or 'test'
        """
        assert phrase in ['train', 'test']
        self.phrase = phrase
        super(MultiSessionsGraph, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
     
    @property
    def raw_file_names(self):
        return [self.phrase + '.txt']
    
    @property
    def processed_file_names(self):
        return [self.phrase + '.pt']
    
    def download(self):
        pass
    
    def process(self):
        data = pickle.load(open(self.raw_dir + '/' + self.raw_file_names[0], 'rb'))
        data_list = []
        
        for sequences, y in zip(data[0], data[1]):
            i = 0
            nodes = {}    # dict{15: 0, 16: 1, 18: 2, ...}
            senders = []
            x = []
            for node in sequences:
                if node not in nodes:
                    nodes[node] = i
                    x.append([node])
                    i += 1
                senders.append(nodes[node])
            receivers = senders[:]
            del senders[-1]    # the last item is a receiver
            del receivers[0]    # the first item is a sender
            edge_index = torch.tensor([senders, receivers], dtype=torch.long)
            x = torch.tensor(x, dtype=torch.long)
            y = torch.tensor([y], dtype=torch.long)
            data_list.append(Data(x=x, edge_index=edge_index, y=y))
            
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [None]:
n_node = 37483

In [None]:
# If replicating the code, first run preprocess.py and then use the obtained file here
cur_dir = os.getcwd()
train_dataset = MultiSessionsGraph(cur_dir + '/drive/MyDrive/AM220proj/yoochoose1_64', phrase='train')

In [None]:
from torch_geometric.data import DataLoader

In [None]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)



In [None]:
test_dataset = MultiSessionsGraph(cur_dir + '/drive/MyDrive/AM220proj/yoochoose1_64', phrase='test')
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
import torch.nn as nn
import math
from torch_geometric.nn import GCNConv, GATConv, GatedGraphConv
from torch.nn import Linear
from torch_geometric.nn import TopKPooling
from torch_geometric.nn.glob import global_add_pool, global_mean_pool

In [None]:
class GNNModel(nn.Module):
    """
    Args:
        hidden_size: the number of units in a hidden layer.
        n_node: the number of items in the whole item set for embedding layer.
    """
    def __init__(self, hidden_size, n_node):
        super(GNNModel, self).__init__()
        self.hidden_size, self.n_node = hidden_size, n_node
        self.embedding = nn.Embedding(self.n_node, self.hidden_size)
        self.conv2 = GCNConv(self.hidden_size, self.hidden_size)
        self.pool1 = TopKPooling(64, ratio=0.8)
        self.conv3 = GCNConv(self.hidden_size, self.hidden_size)
        self.pool3 = TopKPooling(64, ratio=0.8)
        self.lin = Linear(self.hidden_size, self.n_node)
        self.loss_function = nn.CrossEntropyLoss()
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, data):
        x, edge_index, batch = (data.x - 1), data.edge_index, data.batch
        # print(x, edge_index, batch, np.shape(x),np.shape(edge_index),np.shape(batch))

        # print(np.shape(self.embedding(x)),"firstlayer")

        x = self.embedding(x).squeeze()


        x = self.conv2(x, edge_index).relu()


        x = self.conv3(x, edge_index).relu()


        x = global_mean_pool(x, batch)

        x = self.lin(x)

        # embedding = self.embedding(x).squeeze()
        # hidden = self.gated(embedding, edge_index)
        # hidden2 = F.relu(hidden)
  
        return x

In [None]:
def forward(model, loader, device, epoch, top_k=20, optimizer=None, train_flag=True):
    if train_flag:
        model.train()
    else:
        model.eval()
        hit, mrr = [], []

    mean_loss = 0.0
    updates_per_epoch = len(loader)

    for i, batch in enumerate(loader):
        if train_flag:
            optimizer.zero_grad()
        # print(np.shape(batch))
        scores = model(batch.to(device))
        # print(np.shape(scores))
        targets = batch.y - 1
        loss = model.loss_function(scores.float(), targets)
        # loss_srgnn.append(loss)

        if train_flag:
            loss.backward()
            optimizer.step()
            if i % 1500 == 0:
              print("Epoch: ", epoch, "Batch ", i, "loss: ", loss.item())
        else:
            sub_scores = scores.topk(top_k)[1]    # batch * top_k
            for score, target in zip(sub_scores.detach().cpu().numpy(), targets.detach().cpu().numpy()):
                hit.append(np.isin(target, score))
                if len(np.where(score == target)[0]) == 0:
                    mrr.append(0)
                else:
                    mrr.append(1 / (np.where(score == target)[0][0] + 1))

        mean_loss += loss / batch.num_graphs

    if train_flag:
      print("Epoch: ", epoch, "train loss: ", mean_loss.item()/len(loader))
    else:
      hit = np.mean(hit) * 100
      mrr = np.mean(mrr) * 100
      print("Epoch: ", epoch, "test loss: ", mean_loss.item()/len(loader))
      print("Epoch: ", epoch, "Test hit", hit, "Test mrr", mrr)
      return hit, mrr

In [None]:
device = 'cuda'
model = GNNModel(hidden_size=64, n_node=n_node).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
import time

In [None]:
besthit = 0
start_time = time.time()
bestmrr = 0
loss_gcn = []
for epoch in range(10):
    forward(model, train_loader, device, epoch, optimizer=optimizer, train_flag=True)
    with torch.no_grad():
        hit, mrr = forward(model, test_loader, device, epoch,  train_flag=False)
        if hit >= besthit:
          besthit = hit
        if mrr >= bestmrr:
          bestmrr = mrr
print("time elapsed:", time.time() - start_time)
print(f"Best HIT@20: {besthit:.4f}, Best MRR@20: {bestmrr:.4f}")

Epoch:  0 Batch  0 loss:  10.53860855102539
Epoch:  0 Batch  1500 loss:  7.0568108558654785
Epoch:  0 Batch  3000 loss:  6.565487861633301
Epoch:  0 Batch  4500 loss:  6.527947425842285
Epoch:  0 train loss:  0.10874190281006704
Epoch:  0 test loss:  0.09183677655881275
Epoch:  0 Test hit 42.590074779061865 Test mrr 14.37655000325036
Epoch:  1 Batch  0 loss:  5.779108047485352
Epoch:  1 Batch  1500 loss:  6.092808723449707
Epoch:  1 Batch  3000 loss:  6.104969024658203
Epoch:  1 Batch  4500 loss:  5.7630133628845215
Epoch:  1 train loss:  0.09222798594966479
Epoch:  1 test loss:  0.08756491794193363
Epoch:  1 Test hit 49.96779849010698 Test mrr 18.15954287113623
Epoch:  2 Batch  0 loss:  5.599277973175049
Epoch:  2 Batch  1500 loss:  5.22727632522583
Epoch:  2 Batch  3000 loss:  5.076815128326416
Epoch:  2 Batch  4500 loss:  6.171772003173828
Epoch:  2 train loss:  0.08493181908419388
Epoch:  2 test loss:  0.08800305818256579
Epoch:  2 Test hit 52.17002397223515 Test mrr 19.43556364782

In [None]:
# Next we evaluate GAT

In [None]:
class AttentionModel(nn.Module):
    """
    Args:
        hidden_size: the number of units in a hidden layer.
        n_node: the number of items in the whole item set for embedding layer.
    """
    def __init__(self, hidden_size, n_node):
        super(AttentionModel, self).__init__()
        self.hidden_size, self.n_node = hidden_size, n_node
        self.embedding = nn.Embedding(self.n_node, self.hidden_size)
        self.conv2 = GATConv(self.hidden_size, self.hidden_size)
        self.pool1 = TopKPooling(64, ratio=0.8)
        self.conv3 = GATConv(self.hidden_size, self.hidden_size)
        self.pool3 = TopKPooling(64, ratio=0.8)
        self.lin = Linear(self.hidden_size, self.n_node)
        self.loss_function = nn.CrossEntropyLoss()
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, data):
        x, edge_index, batch = (data.x - 1), data.edge_index, data.batch
        # print(x, edge_index, batch, np.shape(x),np.shape(edge_index),np.shape(batch))

        # print(np.shape(self.embedding(x)),"firstlayer")

        x = self.embedding(x).squeeze()


        x = self.conv2(x, edge_index).relu()


        x = self.conv3(x, edge_index).relu()


        x = global_mean_pool(x, batch)

        x = self.lin(x)

        # embedding = self.embedding(x).squeeze()
        # hidden = self.gated(embedding, edge_index)
        # hidden2 = F.relu(hidden)
  
        return x

In [None]:
device = 'cuda'
model = AttentionModel(hidden_size=64, n_node=n_node).to(device)

In [None]:
besthit = 0
start_time = time.time()
bestmrr = 0
loss_gat = []
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    forward(model, train_loader, device, epoch, optimizer=optimizer, train_flag=True)
    with torch.no_grad():
        hit, mrr = forward(model, test_loader, device, epoch,  train_flag=False)
        if hit >= besthit:
          besthit = hit
        if mrr >= bestmrr:
          bestmrr = mrr
print("time elapsed:", time.time() - start_time)
print(f"Best HIT@20: {besthit:.4f}, Best MRR@20: {bestmrr:.4f}")

Epoch:  0 Batch  0 loss:  10.54663372039795
Epoch:  0 Batch  1500 loss:  6.990447521209717
Epoch:  0 Batch  3000 loss:  6.3286213874816895
Epoch:  0 Batch  4500 loss:  6.841845512390137
Epoch:  0 train loss:  0.10735925588640788
Epoch:  0 test loss:  0.09043144906958399
Epoch:  0 Test hit 45.037389530931335 Test mrr 15.12693112960938
Epoch:  1 Batch  0 loss:  6.191983222961426
Epoch:  1 Batch  1500 loss:  5.18905782699585
Epoch:  1 Batch  3000 loss:  5.009959697723389
Epoch:  1 Batch  4500 loss:  5.54498815536499
Epoch:  1 train loss:  0.0905238379244161
Epoch:  1 test loss:  0.08621592925396881
Epoch:  1 Test hit 51.33994060610397 Test mrr 19.072901641539
Epoch:  2 Batch  0 loss:  6.023955821990967
Epoch:  2 Batch  1500 loss:  5.096771240234375
Epoch:  2 Batch  3000 loss:  4.853274822235107
Epoch:  2 Batch  4500 loss:  5.2791666984558105
Epoch:  2 train loss:  0.0829603201790252
Epoch:  2 test loss:  0.08664252065139177
Epoch:  2 Test hit 53.34001216501485 Test mrr 20.280114655164784


In [None]:
# Next we try GraphSAGE

In [None]:
class SageModel(nn.Module):
    """
    Args:
        hidden_size: the number of units in a hidden layer.
        n_node: the number of items in the whole item set for embedding layer.
    """
    def __init__(self, hidden_size, n_node):
        super(SageModel, self).__init__()
        self.hidden_size, self.n_node = hidden_size, n_node
        self.embedding = nn.Embedding(self.n_node, self.hidden_size)
        self.conv2 = SAGEConv(self.hidden_size, self.hidden_size)
        self.pool1 = TopKPooling(64, ratio=0.8)
        self.conv3 = SAGEConv(self.hidden_size, self.hidden_size)
        self.pool3 = TopKPooling(64, ratio=0.8)
        self.lin = Linear(self.hidden_size, self.n_node)
        self.loss_function = nn.CrossEntropyLoss()
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, data):
        x, edge_index, batch = (data.x - 1), data.edge_index, data.batch
        # print(x, edge_index, batch, np.shape(x),np.shape(edge_index),np.shape(batch))

        # print(np.shape(self.embedding(x)),"firstlayer")

        x = self.embedding(x).squeeze()


        x = self.conv2(x, edge_index).relu()


        x = self.conv3(x, edge_index).relu()


        x = global_mean_pool(x, batch)

        x = self.lin(x)

        # embedding = self.embedding(x).squeeze()
        # hidden = self.gated(embedding, edge_index)
        # hidden2 = F.relu(hidden)
  
        return x

In [None]:
device = 'cuda'
model = SageModel(hidden_size=64, n_node=n_node).to(device)

In [None]:
besthit = 0
start_time = time.time()
bestmrr = 0
loss_sage = []
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    forward(model, train_loader, device, epoch, optimizer=optimizer, train_flag=True)
    with torch.no_grad():
        hit, mrr = forward(model, test_loader, device, epoch,  train_flag=False)
        if hit >= besthit:
          besthit = hit
        if mrr >= bestmrr:
          bestmrr = mrr
print("time elapsed:", time.time() - start_time)
print(f"Best HIT@20: {besthit:.4f}, Best MRR@20: {bestmrr:.4f}")

Epoch:  0 Batch  0 loss:  10.532249450683594
Epoch:  0 Batch  1500 loss:  7.17373514175415
Epoch:  0 Batch  3000 loss:  6.62449836730957
Epoch:  0 Batch  4500 loss:  6.669823169708252
Epoch:  0 train loss:  0.10436256699083586
Epoch:  0 test loss:  0.08676075880805742
Epoch:  0 Test hit 50.187842141042616 Test mrr 17.5845922381155
Epoch:  1 Batch  0 loss:  5.990737438201904
Epoch:  1 Batch  1500 loss:  6.345929145812988
Epoch:  1 Batch  3000 loss:  5.12469482421875
Epoch:  1 Batch  4500 loss:  5.570568084716797
Epoch:  1 train loss:  0.08691341835734753
Epoch:  1 test loss:  0.08318619542591087
Epoch:  1 Test hit 56.282872374682455 Test mrr 21.576320700820137
Epoch:  2 Batch  0 loss:  4.448413372039795
Epoch:  2 Batch  1500 loss:  5.19327974319458
Epoch:  2 Batch  3000 loss:  5.017430782318115
Epoch:  2 Batch  4500 loss:  4.6934332847595215
Epoch:  2 train loss:  0.07972135972811689
Epoch:  2 test loss:  0.08349359717467283
Epoch:  2 Test hit 58.78743425525064 Test mrr 22.9399315434294

In [None]:
# SR-GNN as proposed in https://arxiv.org/abs/1811.00855

In [None]:
# The implementation of SR-GNN adopted from https://github.com/userbehavioranalysis/SR-GNN_PyTorch-Geometric

In [None]:
class Embedding2Score(nn.Module):
    def __init__(self, hidden_size):
        super(Embedding2Score, self).__init__()
        self.hidden_size = hidden_size
        self.W_1 = nn.Linear(self.hidden_size, self.hidden_size)
        self.W_2 = nn.Linear(self.hidden_size, self.hidden_size)
        self.q = nn.Linear(self.hidden_size, 1)
        self.W_3 = nn.Linear(2 * self.hidden_size, self.hidden_size)

    def forward(self, session_embedding, all_item_embedding, batch):
        sections = torch.bincount(batch)
        v_i = torch.split(session_embedding, tuple(sections.cpu().numpy()))    # split whole x back into graphs G_i
        v_n_repeat = tuple(nodes[-1].view(1, -1).repeat(nodes.shape[0], 1) for nodes in v_i)    # repeat |V|_i times for the last node embedding

        # Eq(6)
        alpha = self.q(torch.sigmoid(self.W_1(torch.cat(v_n_repeat, dim=0)) + self.W_2(session_embedding)))    # |V|_i * 1
        s_g_whole = alpha * session_embedding    # |V|_i * hidden_size
        s_g_split = torch.split(s_g_whole, tuple(sections.cpu().numpy()))    # split whole s_g into graphs G_i
        s_g = tuple(torch.sum(embeddings, dim=0).view(1, -1) for embeddings in s_g_split)
        
        # Eq(7)
        v_n = tuple(nodes[-1].view(1, -1) for nodes in v_i)
        s_h = self.W_3(torch.cat((torch.cat(v_n, dim=0), torch.cat(s_g, dim=0)), dim=1))
        
        # Eq(8)
        z_i_hat = torch.mm(s_h, all_item_embedding.weight.transpose(1, 0))
        
        return z_i_hat


In [None]:
class SRGNNModel(nn.Module):
    """
    Args:
        hidden_size: the number of units in a hidden layer.
        n_node: the number of items in the whole item set for embedding layer.
    """
    def __init__(self, hidden_size, n_node):
        super(SRGNNModel, self).__init__()
        self.hidden_size, self.n_node = hidden_size, n_node
        self.embedding = nn.Embedding(self.n_node, self.hidden_size)
        self.gated = GatedGraphConv(self.hidden_size, num_layers=1)
        self.e2s = Embedding2Score(self.hidden_size)
        self.loss_function = nn.CrossEntropyLoss()
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, data):
        x, edge_index, batch = data.x - 1, data.edge_index, data.batch

        embedding = self.embedding(x).squeeze()
        hidden = self.gated(embedding, edge_index)
        hidden2 = F.relu(hidden)
  
        return self.e2s(hidden2, self.embedding, batch)

In [None]:
device = 'cuda'
model = SRGNNModel(hidden_size=64, n_node=n_node).to(device)

In [None]:
besthit = 0
start_time = time.time()
bestmrr = 0
loss_srgnn = []
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    forward(model, train_loader, device, epoch, optimizer=optimizer, train_flag=True)
    with torch.no_grad():
        hit, mrr = forward(model, test_loader, device, epoch,  train_flag=False)
        if hit >= besthit:
          besthit = hit
        if mrr >= bestmrr:
          bestmrr = mrr
print("time elapsed:", time.time() - start_time)
print(f"Best HIT@20: {besthit:.4f}, Best MRR@20: {bestmrr:.4f}")

Epoch:  0 Batch  0 loss:  10.540619850158691
Epoch:  0 Batch  1500 loss:  6.941524982452393
Epoch:  0 Batch  3000 loss:  4.65255880355835
Epoch:  0 Batch  4500 loss:  5.3174943923950195
Epoch:  0 train loss:  0.09347086447745459
Epoch:  0 test loss:  0.0752538598102072
Epoch:  0 Test hit 63.47275394468495 Test mrr 25.50715651893541
Epoch:  1 Batch  0 loss:  4.603010654449463
Epoch:  1 Batch  1500 loss:  5.073220729827881
Epoch:  1 Batch  3000 loss:  4.496406555175781
Epoch:  1 Batch  4500 loss:  3.6636247634887695
Epoch:  1 train loss:  0.07315508040589858
Epoch:  1 test loss:  0.07183805155808648
Epoch:  1 Test hit 66.81634405524348 Test mrr 27.525610926601583
Epoch:  2 Batch  0 loss:  4.192098140716553
Epoch:  2 Batch  1500 loss:  4.29026985168457
Epoch:  2 Batch  3000 loss:  3.8466243743896484
Epoch:  2 Batch  4500 loss:  4.131102561950684
Epoch:  2 train loss:  0.06829721523403709
Epoch:  2 test loss:  0.07032028652164975
Epoch:  2 Test hit 68.10440445096425 Test mrr 28.32048773246

In [None]:
# import pickle
# data = {
#     "loss_sage": loss_sage,
#     "loss_gcn": loss_gcn,
#     "loss_gat": loss_gat,
#     "loss_srgnn": loss_srgnn
# }
# !touch data.pkl
# with open("data.pkl", "wb") as f:
#     pickle.dump(data, f)

In [None]:
# Next we consider framing this as a hypergraph problem

In [None]:
train_data = pickle.load(open('/content/drive/MyDrive/AM220proj/yoochoose1_64/raw/train.txt', 'rb'))
test_data = pickle.load(open('/content/drive/MyDrive/AM220proj/yoochoose1_64/raw/test.txt', 'rb'))

In [None]:
# code adapted from https://github.com/wangjlgz/Hypergraph-Session-Recommendation/blob/main/main.py

In [None]:
def translation(data, item_dic):

    datax = []
    for i in range(len(data[0])):
        datax.append([item_dic[s] for s in data[0][i]])
    datay = [item_dic[s] for s in data[1]]

    return (datax, datay)

class Data():
    def __init__(self, data, window):
        inputs = data[0]
        self.inputs = np.asarray(inputs) 
        self.targets = np.asarray(data[1])
        self.length = len(inputs)
        self.window = window


    def generate_batch(self, batch_size, shuffle = False):
        if shuffle:
            shuffled_arg = np.arange(self.length)
            np.random.shuffle(shuffled_arg)
            self.inputs = self.inputs[shuffled_arg]
            self.targets = self.targets[shuffled_arg]
        n_batch = int(self.length / batch_size)
        if self.length % batch_size != 0:
            n_batch += 1
        slices = np.split(np.arange(n_batch * batch_size), n_batch)
        slices[-1] = slices[-1][:(self.length - batch_size * (n_batch - 1))]
        return slices

    def get_slice(self, iList):
        inputs, targets = self.inputs[iList], self.targets[iList]
        items, n_node, H, HT, G, EG, alias_inputs, node_masks, node_dic = [], [], [], [], [], [], [], [], []
        num_edge, edge_mask, edge_inputs = [], [], []

        for u_input in inputs:
            temp_s = u_input
            
            temp_l = list(set(temp_s))    
            temp_dic = {temp_l[i]: i for i in range(len(temp_l))}        
            n_node.append(temp_l)
            alias_inputs.append([temp_dic[i] for i in temp_s])
            node_dic.append(temp_dic)

            min_s = min(self.window, len(u_input))
            num_edge.append(int((1 + min_s) * len(u_input) - (1 + min_s) * min_s / 2))


        max_n_node = np.max([len(i) for i in n_node])

        max_n_edge = max(num_edge)

        max_se_len = max([len(i) for i in alias_inputs])

        edge_mask = [[1] * len(le) + [0] * (max_n_edge - len(le)) for le in alias_inputs]

        for idx in range(len(inputs)):
            u_input = inputs[idx]
            effect_len = len(alias_inputs[idx])
            node = n_node[idx]
            items.append(node + (max_n_node - len(node)) * [0])

            effect_list = alias_inputs[idx]
            ws = np.ones(max_n_edge)
            cols = []
            rows = []
            edg = []
            e_idx = 0

            for w in range(1 + min(self.window, effect_len-1)):
                edge_idx = list(np.arange(e_idx, e_idx + effect_len-w))
                edg += edge_idx
                for ww in range(w + 1):
                    rows += effect_list[ww:ww+effect_len-w]
                    cols += edge_idx

                e_idx += len(edge_idx)


            u_H = sp.coo_matrix(([1.0]*len(rows), (rows, cols)), shape=(max_n_node, max_n_edge))
            HT.append(np.asarray(u_H.T.todense()))


            node_masks.append((max_se_len - len(alias_inputs[idx])) * [0] + [1]*len(alias_inputs[idx]))
            alias_inputs[idx] = (max_se_len - len(alias_inputs[idx])) * [0] + alias_inputs[idx]


            edge_inputs.append(edg + (max_n_edge - len(edg))*[0])

        return alias_inputs, H, HT, G, EG, items, targets, node_masks, edge_mask, edge_inputs

In [None]:
llen = [len(train_data[0][i]) for i in range(len(train_data[0]))] + [len(test_data[0][i]) for i in range(len(test_data[0]))]
print(max(llen),sum(llen)*1.0/len(llen))
l = []
for i in range(len(train_data[0])):
    l += list(train_data[0][i])
l += list(train_data[1])

for i in range(len(test_data[0])):
    l += list(test_data[0][i])
l += list(test_data[1])
l = set(l)
print('total number of items', len(l))

item_dic = {}
for i in l:
    item_dic[i] = len(item_dic) + 1 #start from 1

del l
train_data = translation(train_data, item_dic)
test_data = translation(test_data, item_dic)

n_node = len(item_dic) + 1

145 5.158024412986751
total number of items 17745


In [None]:
train_data = Data(train_data, 1)
test_data = Data(test_data, 1)

  self.inputs = np.asarray(inputs)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


class EncoderLayer(nn.Module):
    ''' Compose with two layers '''

    def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
        super(EncoderLayer, self).__init__()
        self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
        self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)

    def forward(self, enc_input, slf_attn_mask=None):
        enc_output, enc_slf_attn = self.slf_attn(
            enc_input, enc_input, enc_input, mask=slf_attn_mask)
        enc_output = self.pos_ffn(enc_output)
        return enc_output, enc_slf_attn

class MultiHeadAttention(nn.Module):
    ''' Multi-Head Attention module '''

    def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
        super().__init__()

        self.dropout = dropout

        self.n_head = n_head
        self.d_k = d_k
        self.d_v = d_v

        self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False)
        self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False)
        self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False)
        self.fc = nn.Linear(n_head * d_v, d_model, bias=False)

        self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5, attn_dropout = self.dropout)

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)


    def forward(self, q, k, v, mask=None):

        d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
        sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1)

        residual = q
        q = self.layer_norm(q)

        # Pass through the pre-attention projection: b x lq x (n*dv)
        # Separate different heads: b x lq x n x dv
        q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
        k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
        v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)

        # Transpose for attention dot product: b x n x lq x dv
        q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
        
        if mask is not None:
            mask = mask.unsqueeze(1)   # For head axis broadcasting.

        q, attn = self.attention(q, k, v, mask=mask)

        # Transpose to move the head dimension back: b x lq x n x dv
        # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv)
        q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1)
        q = F.dropout(self.fc(q), self.dropout, training=self.training)
        q += residual

        return q, attn


class PositionwiseFeedForward(nn.Module):
    ''' A two-feed-forward-layer module '''

    def __init__(self, d_in, d_hid, dropout=0.1):
        super().__init__()
        self.w_1 = nn.Linear(d_in, d_hid) # position-wise
        self.w_2 = nn.Linear(d_hid, d_in) # position-wise
        self.layer_norm = nn.LayerNorm(d_in, eps=1e-6)
        self.dropout = dropout

    def forward(self, x):

        residual = x
        x = self.layer_norm(x)

        x = self.w_2(F.relu(self.w_1(x)))
        x = F.dropout(x, self.dropout, training=self.training)
        x += residual

        return x


class PositionalEncoding(nn.Module):

    def __init__(self, d_hid, n_position=200):
        super(PositionalEncoding, self).__init__()

        # Not a parameter
        self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid))

    def _get_sinusoid_encoding_table(self, n_position, d_hid):
        ''' Sinusoid position encoding table '''
        # TODO: make it with torch instead of numpy

        def get_position_angle_vec(position):
            return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)]

        sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)])
        sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
        sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1

        return torch.FloatTensor(sinusoid_table).unsqueeze(0)

    def forward(self, x):
        return x + self.pos_table[:, :x.size(1)].clone().detach()


class ScaledDotProductAttention(nn.Module):
    ''' Scaled Dot-Product Attention '''

    def __init__(self, temperature, attn_dropout=0.1):
        super().__init__()
        self.temperature = temperature
        self.dropout = attn_dropout

    def forward(self, q, k, v, mask=None):

        attn = torch.matmul(q / self.temperature, k.transpose(2, 3))

        if mask is not None:
            attn = attn.masked_fill(mask == 0, -1e9)

        #print(F.softmax(attn, dim=-1))

        attn =  F.dropout(F.softmax(attn, dim=-1), self.dropout, training=self.training)
        output = torch.matmul(attn, v)

        return output, attn


In [None]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter



class ScaledDotProductAttention_hyper(nn.Module):
    ''' Scaled Dot-Product Attention for Hypergraph'''

    def __init__(self, temperature, attn_dropout=0.1):
        super().__init__()
        self.temperature = temperature
        self.dropout = attn_dropout

    def forward(self, q, k, v, mask=None):

        attn = torch.matmul(q / self.temperature, k.transpose(1, 2))

        if mask is not None:
            attn = attn.masked_fill(mask == 0, -1e9)

        attn =  F.dropout(F.softmax(attn, dim=-1), self.dropout, training=self.training)
        output = torch.matmul(attn, v)

        return output, attn

class HyperGraphAttentionLayerSparse(nn.Module):

    def __init__(self, in_features, out_features, dropout, alpha, transfer, concat=True, bias=False):
        super(HyperGraphAttentionLayerSparse, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat


        self.transfer = transfer

        if self.transfer:
            self.weight = Parameter(torch.Tensor(self.in_features, self.out_features))
        else:
            self.register_parameter('weight', None)

        self.weight2 = Parameter(torch.Tensor(self.in_features, self.out_features))
        self.weight3 = Parameter(torch.Tensor(self.out_features, self.out_features))

        if bias:
            self.bias = Parameter(torch.Tensor(self.out_features))
        else:
            self.register_parameter('bias', None)

        self.word_context = nn.Embedding(1, self.out_features)
      
       
        self.leakyrelu = nn.LeakyReLU(self.alpha)

        self.attention1 = ScaledDotProductAttention_hyper(temperature=self.out_features ** 0.5, attn_dropout = self.dropout)
        self.attention2 = ScaledDotProductAttention_hyper(temperature=self.out_features ** 0.5, attn_dropout = self.dropout)
        

        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.out_features)
        if self.weight is not None:
            self.weight.data.uniform_(-stdv, stdv)
        self.weight2.data.uniform_(-stdv, stdv)
        self.weight3.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)
        
        nn.init.uniform_(self.word_context.weight.data, -stdv, stdv)


    def forward(self, x, adj):
        residual = x

        x_4att = x.matmul(self.weight2)


        if self.transfer:
            x = x.matmul(self.weight)
            if self.bias is not None:
                x = x + self.bias        

        N1 = adj.shape[1] #number of edge
        N2 = adj.shape[2] #number of node


        q1 = self.word_context.weight[0:].view(1, 1, -1).repeat(x.shape[0], N1, 1).view(x.shape[0], N1, self.out_features)
        edge, att1 = self.attention1(q1, x_4att, x, mask = adj) 


        edge_4att = edge.matmul(self.weight3)
      
        node, attn = self.attention2(x_4att, edge_4att, edge, mask = adj.transpose(1, 2)) 


        if self.concat:

            node = F.relu(node)
            edge = F.relu(edge)

        return node, edge

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'

In [None]:
import datetime
import math
import numpy as np
import torch
from torch import nn
from torch.nn import Module, Parameter
import torch.nn.functional as F
from tqdm import tqdm

def get_pad_mask(seq, pad_idx):
    return (seq != pad_idx).unsqueeze(-2)



class HGNN_ATT(nn.Module):
    def __init__(self, dataset, input_size, n_hid, output_size, step, dropout=0.3):
        super(HGNN_ATT, self).__init__()
        self.dropout = dropout
        self.step = step
        self.dataset = dataset
        self.gat1 = HyperGraphAttentionLayerSparse(input_size, n_hid, self.dropout, 0.2, transfer=False, concat=False)
        self.gat2 = HyperGraphAttentionLayerSparse(n_hid, output_size, self.dropout, 0.2, transfer=True,  concat=False)
        
    def forward(self, x, H, G, EG):   

        residual = x

        x,y = self.gat1(x, H)

        if self.step == 2:

            x = F.dropout(x, self.dropout, training=self.training)
            x += residual
            x,y = self.gat2(x, H)

        x = F.dropout(x, self.dropout, training=self.training)
        x += residual

        return x, x



class SessionGraph(Module):
    def __init__(self, opt, n_node):
        super(SessionGraph, self).__init__()
        self.hidden_size = opt.hiddenSize
        self.n_node = n_node
        self.batch_size = opt.batchSize
        self.nonhybrid = opt.nonhybrid
        self.embedding = nn.Embedding(self.n_node, self.hidden_size)
        self.embedding2 = nn.Embedding(self.n_node, self.hidden_size)
        self.dropout = opt.dropout
        self.dataset = opt.dataset
        # for self-attention
        n_layers = 1
        n_head = 1
   
        
        self.layer_norm = nn.LayerNorm(self.hidden_size, eps=1e-6)
        self.layer_norm1 = nn.LayerNorm(self.hidden_size, eps=1e-6)

        self.layer_stack = nn.ModuleList([
            EncoderLayer(self.hidden_size, self.hidden_size, n_head, self.hidden_size, self.hidden_size, dropout=opt.dropout)
            for _ in range(n_layers)])

        self.reset_parameters()
        

        self.hgnn = HGNN_ATT(self.dataset, self.hidden_size, self.hidden_size, self.hidden_size, opt.step, dropout = self.dropout)

        self.loss_function = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=opt.lr, weight_decay=opt.l2)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)

    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def compute_scores(self, enc_output, enc_output2, mask, edge_mask, hidden):



        for enc_layer in self.layer_stack:
            enc_output, enc_slf_attn = enc_layer(enc_output, slf_attn_mask = get_pad_mask(mask, 0))
                    
        ht = enc_output[torch.arange(mask.shape[0]).long(), mask.shape[1]-1]  # batch_size x latent_size

        ht = self.layer_norm(ht)

        hidden = ht

        b = self.embedding.weight[1:]  # n_nodes x latent_size
        scores = torch.matmul(hidden, b.transpose(1, 0))

        return scores




    def forward(self, inputs, HT, G, EG): 
        nodes = self.embedding(inputs) 
        #nodes = self.layer_norm1(nodes)       
        nodes, hidden = self.hgnn(nodes, HT, G, EG)
        nodes2 = self.embedding2(inputs) 
        return nodes,hidden,nodes2


def trans_to_cuda(variable):
    if torch.cuda.is_available():
        return variable.cuda()
    else:
        return variable


def trans_to_cpu(variable):
    if torch.cuda.is_available():
        return variable.cpu()
    else:
        return variable


def forward(model, alias_inputs, H, HT, G, EG, items, targets, node_masks, edge_mask, edge_inputs):
    
    alias_inputs = trans_to_cuda(torch.Tensor(alias_inputs).long())
    items = trans_to_cuda(torch.Tensor(items).long())
    HT = trans_to_cuda(torch.Tensor(HT).float())
    G = trans_to_cuda(torch.Tensor(G).float())
    EG = trans_to_cuda(torch.Tensor(EG).float())
    node_masks = trans_to_cuda(torch.Tensor(node_masks).long())
    edge_mask = trans_to_cuda(torch.Tensor(edge_mask).long())
    nodes, hidden, nodes2 = model(items, HT, G, EG)
    get = lambda i: nodes[i][alias_inputs[i]]
    seq_hidden = torch.stack([get(i) for i in torch.arange(len(alias_inputs)).long()])

    get2 = lambda i: nodes2[i][alias_inputs[i]]
    seq_hidden2 = torch.stack([get2(i) for i in torch.arange(len(alias_inputs)).long()])
    return targets, model.compute_scores(seq_hidden, seq_hidden2, node_masks, edge_mask, hidden)


def train_model(model, train_data, opt):
    model.scheduler.step()
    print('start training: ', datetime.datetime.now())
    model.train()
    total_loss = 0.0
    slices = train_data.generate_batch(opt.batchSize, True)
    for step in tqdm(range(len(slices)), total=len(slices), ncols=70, leave=False, unit='b'):
        i = slices[step]
        alias_inputs, H, HT, G, EG, items, targets, node_masks, edge_mask, edge_inputs = train_data.get_slice(i)    
        model.optimizer.zero_grad()
        targets, scores = forward(model, alias_inputs, H, HT, G, EG, items, targets, node_masks, edge_mask, edge_inputs)
        targets = trans_to_cuda(torch.Tensor(targets).long())
        loss = model.loss_function(scores, targets - 1)
        loss.backward()
        model.optimizer.step()
        total_loss += loss
    print('\tLoss:\t%.3f' % total_loss)

def test_model(model, test_data, opt):
    
    model.eval()
    hit20, mrr20, hit10, mrr10 = [], [], [], []
    slices = test_data.generate_batch(min(128,test_data.length), False)
    for step in tqdm(range(len(slices)), total=len(slices), ncols=70, leave=False, unit='b'):
        i = slices[step]
        alias_inputs, H, HT, G, EG, items, targets, node_masks, edge_mask, edge_inputs = test_data.get_slice(i)
        targets, scores = forward(model, alias_inputs, H, HT, G, EG, items, targets, node_masks, edge_mask, edge_inputs)
        sub_scores = scores.topk(20)[1]
        sub_scores = trans_to_cpu(sub_scores).detach().numpy()

        for score, target in zip(sub_scores, targets):
            hit20.append(np.isin(target - 1, score))
            if len(np.where(score == target - 1)[0]) == 0:
                mrr20.append(0)
            else:
                mrr20.append(1.0 / (np.where(score == target - 1)[0][0] + 1))

            hit10.append(np.isin(target - 1, score[:10]))
            if len(np.where(score[:10] == target - 1)[0]) == 0:
                mrr10.append(0)
            else:
                mrr10.append(1.0 / (np.where(score[:10] == target - 1)[0][0] + 1))
    hit20 = np.mean(hit20) * 100
    mrr20 = np.mean(mrr20) * 100
    hit10 = np.mean(hit10) * 100
    mrr10 = np.mean(mrr10) * 100
    return hit20, mrr20, hit10, mrr10

In [None]:
import time
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from nltk.corpus import stopwords
import nltk
from nltk.wsd import lesk
from nltk.corpus import wordnet as wn
from scipy.sparse.linalg import eigs, eigsh
import sys
import re
import collections
from collections import Counter
import numpy as np
from multiprocessing import Process, Queue
import pandas as pd
import os
import random
from argparse import Namespace

opt = Namespace(dataset='yoochoose1_64', batchSize=100, hiddenSize=100, epoch=5, worker=3, lr=0.001, lr_dc=0.1, lr_dc_step=3, l2=0.0, step=2, window=1, patience=3, nonhybrid=False, validation=False, valid_portion=0.1, dropout=0.3)
model = trans_to_cuda(SessionGraph(opt, n_node))

start = time.time()
best_result = [0, 0, 0, 0]
best_epoch = [0, 0]
bad_counter = 0




for epoch in range(opt.epoch):
    print('-------------------------------------------------------')
    print('epoch: ', epoch)

    train_model(model, train_data, opt)

    hit, mrr, hit10, mrr10 = test_model(model, test_data, opt)


    flag = 0
    if hit >= best_result[0]:
        best_result[0] = hit
        best_result[2] = hit10
        best_epoch[0] = epoch
        flag = 1
    if mrr >= best_result[1]:
        best_result[1] = mrr
        best_result[3] = mrr10
        best_epoch[1] = epoch
        flag = 1

    print('Result:\n')
    print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tHIT@10:\t%.4f\tMRR@10:\t%.4f\tEpoch:\t%d\n'% (hit, mrr, hit10, mrr10, epoch))

    print('Best Result:')
    print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tHIT@10:\t%.4f\tMRR@10:\t%.4f\tEpoch:\t%d\n'% (best_result[0], best_result[1], best_result[2], best_result[3], best_epoch[0]))
    bad_counter += 1 - flag
    if bad_counter >= opt.patience:
        break
print('-------------------------------------------------------')
end = time.time()
print("Run time: %f s" % (end - start))

-------------------------------------------------------
epoch:  0
start training:  2023-04-21 01:46:26.991403




	Loss:	20454.527




Result:

	Recall@20:	68.7037	MMR@20:	29.4960	HIT@10:	58.1863	MRR@10:	28.7538	Epoch:	0

Best Result:
	Recall@20:	68.7037	MMR@20:	29.4960	HIT@10:	58.1863	MRR@10:	28.7538	Epoch:	0

-------------------------------------------------------
epoch:  1
start training:  2023-04-21 02:06:00.046000




	Loss:	16202.825




Result:

	Recall@20:	70.1170	MMR@20:	30.2697	HIT@10:	59.4028	MRR@10:	29.5118	Epoch:	1

Best Result:
	Recall@20:	70.1170	MMR@20:	30.2697	HIT@10:	59.4028	MRR@10:	29.5118	Epoch:	1

-------------------------------------------------------
epoch:  2
start training:  2023-04-21 02:25:32.483958




	Loss:	14783.261




Result:

	Recall@20:	70.9471	MMR@20:	31.2187	HIT@10:	60.7660	MRR@10:	30.5006	Epoch:	2

Best Result:
	Recall@20:	70.9471	MMR@20:	31.2187	HIT@10:	60.7660	MRR@10:	30.5006	Epoch:	2

-------------------------------------------------------
epoch:  3
start training:  2023-04-21 02:44:59.147623




	Loss:	14542.952




Result:

	Recall@20:	70.9864	MMR@20:	31.4660	HIT@10:	60.6802	MRR@10:	30.7367	Epoch:	3

Best Result:
	Recall@20:	70.9864	MMR@20:	31.4660	HIT@10:	60.6802	MRR@10:	30.7367	Epoch:	3

-------------------------------------------------------
epoch:  4
start training:  2023-04-21 03:04:22.192918




	Loss:	14401.284




Result:

	Recall@20:	71.0866	MMR@20:	31.5683	HIT@10:	60.7786	MRR@10:	30.8395	Epoch:	4

Best Result:
	Recall@20:	71.0866	MMR@20:	31.5683	HIT@10:	60.7786	MRR@10:	30.8395	Epoch:	4

-------------------------------------------------------
Run time: 5849.861582 s


In [None]:
'''
Hypergraph networks following implementation in 
https://github.com/xiaxin1998/DHCN/blob/main/main.py
'''

In [None]:
!pip uninstall torch --yes

Found existing installation: torch 2.0.0+cu118
Uninstalling torch-2.0.0+cu118:
  Successfully uninstalled torch-2.0.0+cu118


In [None]:
!pip install torch==1.7.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch==1.7.1
  Downloading torch-1.7.1-cp39-cp39-manylinux1_x86_64.whl (776.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m776.8/776.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.15.1+cu118 requires torch==2.0.0, but you have torch 1.7.1 which is incompatible.
torchtext 0.15.1 requires torch==2.0.0, but you have torch 1.7.1 which is incompatible.
torchdata 0.6.0 requires torch==2.0.0, but you have torch 1.7.1 which is incompatible.
torchaudio 2.0.1+cu118 requires torch==2.0.0, but you have torch 1.7.1 which is incompatible.[0m[31m
[0mSuccessfully installed torch-1.7.1


In [None]:
!pip uninstall numpy --yes
!pip install numpy==1.18.1

Found existing installation: numpy 1.22.4
Uninstalling numpy-1.22.4:
  Successfully uninstalled numpy-1.22.4
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting numpy==1.18.1
  Downloading numpy-1.18.1.zip (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m74.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: numpy
  Building wheel for numpy (pyproject.toml) ... [?25l[?25hdone
  Created wheel for numpy: filename=numpy-1.18.1-cp39-cp39-linux_x86_64.whl size=13113811 sha256=bdea0c82caf45d7f4cef62f3617dd31279d594c4fc54dda947a098c47b95b141
  Stored in directory: /root/.cache/pip/wheels/d7/8f/69/b233132b552877f7f613530ca919c4f566aec2b4c88119979f
Successfully built numpy
Installing collected packa

In [None]:
import numpy as np
from scipy.sparse import csr_matrix
from operator import itemgetter

def data_masks(all_sessions, n_node):
    indptr, indices, data = [], [], []
    indptr.append(0)
    for j in range(len(all_sessions)):
        session = np.unique(all_sessions[j])
        length = len(session)
        s = indptr[-1]
        indptr.append((s + length))
        for i in range(length):
            indices.append(session[i]-1)
            data.append(1)
    matrix = csr_matrix((data, indices, indptr), shape=(len(all_sessions), n_node))

    return matrix

def split_validation(train_set, valid_portion):
    train_set_x, train_set_y = train_set
    n_samples = len(train_set_x)
    sidx = np.arange(n_samples, dtype='int32')
    np.random.shuffle(sidx)
    n_train = int(np.round(n_samples * (1. - valid_portion)))
    valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
    valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
    train_set_x = [train_set_x[s] for s in sidx[:n_train]]
    train_set_y = [train_set_y[s] for s in sidx[:n_train]]

    return (train_set_x, train_set_y), (valid_set_x, valid_set_y)

class Data():
    def __init__(self, data, shuffle=False, n_node=None):
        self.raw = np.asarray(data[0])
        H_T = data_masks(self.raw, n_node)
        BH_T = H_T.T.multiply(1.0/H_T.sum(axis=1).reshape(1, -1))
        BH_T = BH_T.T
        H = H_T.T
        DH = H.T.multiply(1.0/H.sum(axis=1).reshape(1, -1))
        DH = DH.T
        DHBH_T = np.dot(DH,BH_T)

        self.adjacency = DHBH_T.tocoo()
        self.n_node = n_node
        self.targets = np.asarray(data[1])
        self.length = len(self.raw)
        self.shuffle = shuffle

    def get_overlap(self, sessions):
        matrix = np.zeros((len(sessions), len(sessions)))
        for i in range(len(sessions)):
            seq_a = set(sessions[i])
            seq_a.discard(0)
            for j in range(i+1, len(sessions)):
                seq_b = set(sessions[j])
                seq_b.discard(0)
                overlap = seq_a.intersection(seq_b)
                ab_set = seq_a | seq_b
                matrix[i][j] = float(len(overlap))/float(len(ab_set))
                matrix[j][i] = matrix[i][j]
        matrix = matrix + np.diag([1.0]*len(sessions))
        degree = np.sum(np.array(matrix), 1)
        degree = np.diag(1.0/degree)
        return matrix, degree

    def generate_batch(self, batch_size):
        if self.shuffle:
            shuffled_arg = np.arange(self.length)
            np.random.shuffle(shuffled_arg)
            self.raw = self.raw[shuffled_arg]
            self.targets = self.targets[shuffled_arg]
        n_batch = int(self.length / batch_size)
        if self.length % batch_size != 0:
            n_batch += 1
        slices = np.split(np.arange(n_batch * batch_size), n_batch)
        slices[-1] = np.arange(self.length-batch_size, self.length)
        return slices

    def get_slice(self, index):
        items, num_node = [], []
        inp = self.raw[index]
        for session in inp:
            num_node.append(len(np.nonzero(session)[0]))
        max_n_node = np.max(num_node)
        session_len = []
        reversed_sess_item = []
        mask = []
        for session in inp:
            nonzero_elems = np.nonzero(session)[0]
            session_len.append([len(nonzero_elems)])
            items.append(session + (max_n_node - len(nonzero_elems)) * [0])
            mask.append([1]*len(nonzero_elems) + (max_n_node - len(nonzero_elems)) * [0])
            reversed_sess_item.append(list(reversed(session)) + (max_n_node - len(nonzero_elems)) * [0])


        return self.targets[index]-1, session_len,items, reversed_sess_item, mask

In [None]:
import datetime
import math
import numpy as np
import torch
from torch import nn, backends
from torch.nn import Module, Parameter
import torch.nn.functional as F
import torch.sparse
from scipy.sparse import coo
import time
from numba import jit
import heapq

def trans_to_cuda(variable):
    if torch.cuda.is_available():
        return variable.cuda()
    else:
        return variable
def trans_to_cpu(variable):
    if torch.cuda.is_available():
        return variable.cpu()
    else:
        return variable

class HyperConv(Module):
    def __init__(self, layers,dataset,emb_size=100):
        super(HyperConv, self).__init__()
        self.emb_size = emb_size
        self.layers = layers
        self.dataset = dataset

    def forward(self, adjacency, embedding):
        item_embeddings = embedding
        item_embedding_layer0 = item_embeddings
        final = [item_embedding_layer0]
        for i in range(self.layers):
            item_embeddings = torch.sparse.mm(trans_to_cuda(adjacency), item_embeddings)
            final.append(item_embeddings)
        final = torch.stack(final,dim=0)
      #  final1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in final]))
      #  item_embeddings = torch.sum(final1, 0)
        item_embeddings = torch.sum(final, dim=0) / (self.layers+1)
        return item_embeddings


class LineConv(Module):
    def __init__(self, layers,batch_size,emb_size=100):
        super(LineConv, self).__init__()
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.layers = layers
    def forward(self, item_embedding, D, A, session_item, session_len):
        zeros = torch.cuda.FloatTensor(1,self.emb_size).fill_(0)
        # zeros = torch.zeros([1,self.emb_size])
        item_embedding = torch.cat([zeros, item_embedding], 0)
        seq_h = []
        for i in torch.arange(len(session_item)):
            seq_h.append(torch.index_select(item_embedding, 0, session_item[i]))
        seq_h1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in seq_h]))
        session_emb_lgcn = torch.div(torch.sum(seq_h1, 1), session_len)
        session = [session_emb_lgcn]
        DA = torch.mm(D, A).float()
        for i in range(self.layers):
            session_emb_lgcn = torch.mm(DA, session_emb_lgcn)
            session.append(session_emb_lgcn)
        session = torch.stack(session, dim = 0)
        #session1 = trans_to_cuda(torch.tensor([item.cpu().detach().numpy() for item in session]))
        #session_emb_lgcn = torch.sum(session1, 0)
        session_emb_lgcn = torch.sum(session, dim=0)/ (self.layers+1)
        return session_emb_lgcn


class DHCN(Module):
    def __init__(self, adjacency, n_node,lr, layers,l2, beta,dataset,emb_size=100, batch_size=100):
        super(DHCN, self).__init__()
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.n_node = n_node
        self.L2 = l2
        self.lr = lr
        self.layers = layers
        self.beta = beta
        self.dataset = dataset

        values = adjacency.data
        indices = np.vstack((adjacency.row, adjacency.col))
        if dataset == 'Nowplaying':
            index_fliter = (values < 0.05).nonzero()
            values = np.delete(values, index_fliter)
            indices1 = np.delete(indices[0], index_fliter)
            indices2 = np.delete(indices[1], index_fliter)
            indices = [indices1, indices2]
        i = torch.LongTensor(indices)
        v = torch.FloatTensor(values)
        shape = adjacency.shape
        adjacency = torch.sparse.FloatTensor(i, v, torch.Size(shape))
        self.adjacency = adjacency
        self.embedding = nn.Embedding(self.n_node, self.emb_size)
        self.pos_embedding = nn.Embedding(200, self.emb_size)
        self.HyperGraph = HyperConv(self.layers,dataset)
        self.LineGraph = LineConv(self.layers, self.batch_size)
        self.w_1 = nn.Linear(2 * self.emb_size, self.emb_size)
        self.w_2 = nn.Parameter(torch.Tensor(self.emb_size, 1))
        self.glu1 = nn.Linear(self.emb_size, self.emb_size)
        self.glu2 = nn.Linear(self.emb_size, self.emb_size, bias=False)
        self.loss_function = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        self.init_parameters()

    def init_parameters(self):
        stdv = 1.0 / math.sqrt(self.emb_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

     
    def generate_sess_emb(self,item_embedding, session_item, session_len, reversed_sess_item, mask):
        zeros = torch.cuda.FloatTensor(1, self.emb_size).fill_(0)
        # zeros = torch.zeros(1, self.emb_size)
        item_embedding = torch.cat([zeros, item_embedding], 0)
        get = lambda i: item_embedding[reversed_sess_item[i]]
        seq_h = torch.cuda.FloatTensor(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size).fill_(0)
        # seq_h = torch.zeros(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size)
        for i in torch.arange(session_item.shape[0]):
            seq_h[i] = get(i)
        hs = torch.div(torch.sum(seq_h, 1), session_len)
        mask = mask.float().unsqueeze(-1)
        len = seq_h.shape[1]
        pos_emb = self.pos_embedding.weight[:len]
        pos_emb = pos_emb.unsqueeze(0).repeat(self.batch_size, 1, 1)

        hs = hs.unsqueeze(-2).repeat(1, len, 1)
        nh = self.w_1(torch.cat([pos_emb, seq_h], -1))
        nh = torch.tanh(nh)
        nh = torch.sigmoid(self.glu1(nh) + self.glu2(hs))
        beta = torch.matmul(nh, self.w_2)
        beta = beta * mask
        select = torch.sum(beta * seq_h, 1)
        return select

    def generate_sess_emb_npos(self,item_embedding, session_item, session_len, reversed_sess_item, mask):
        zeros = torch.cuda.FloatTensor(1, self.emb_size).fill_(0)
        # zeros = torch.zeros(1, self.emb_size)
        item_embedding = torch.cat([zeros, item_embedding], 0)
        get = lambda i: item_embedding[reversed_sess_item[i]]
        seq_h = torch.cuda.FloatTensor(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size).fill_(0)
        # seq_h = torch.zeros(self.batch_size, list(reversed_sess_item.shape)[1], self.emb_size)
        for i in torch.arange(session_item.shape[0]):
            seq_h[i] = get(i)
        hs = torch.div(torch.sum(seq_h, 1), session_len)
        mask = mask.float().unsqueeze(-1)
        len = seq_h.shape[1]
        # pos_emb = self.pos_embedding.weight[:len]
        # pos_emb = pos_emb.unsqueeze(0).repeat(self.batch_size, 1, 1)

        hs = hs.unsqueeze(-2).repeat(1, len, 1)
        nh = seq_h
        nh = torch.tanh(nh)
        nh = torch.sigmoid(self.glu1(nh) + self.glu2(hs))
        beta = torch.matmul(nh, self.w_2)
        beta = beta * mask
        select = torch.sum(beta * seq_h, 1)
        return select

    def SSL(self, sess_emb_hgnn, sess_emb_lgcn):
        def row_shuffle(embedding):
            corrupted_embedding = embedding[torch.randperm(embedding.size()[0])]
            return corrupted_embedding
        def row_column_shuffle(embedding):
            corrupted_embedding = embedding[torch.randperm(embedding.size()[0])]
            corrupted_embedding = corrupted_embedding[:,torch.randperm(corrupted_embedding.size()[1])]
            return corrupted_embedding
        def score(x1, x2):
            return torch.sum(torch.mul(x1, x2), 1)

        pos = score(sess_emb_hgnn, sess_emb_lgcn)
        neg1 = score(sess_emb_lgcn, row_column_shuffle(sess_emb_hgnn))
        one = torch.cuda.FloatTensor(neg1.shape[0]).fill_(1)
        # one = zeros = torch.ones(neg1.shape[0])
        con_loss = torch.sum(-torch.log(1e-8 + torch.sigmoid(pos))-torch.log(1e-8 + (one - torch.sigmoid(neg1))))
        return con_loss

    def forward(self, session_item, session_len, D, A, reversed_sess_item, mask):
        item_embeddings_hg = self.HyperGraph(self.adjacency, self.embedding.weight)
        if self.dataset == 'Tmall':
            sess_emb_hgnn = self.generate_sess_emb_npos(item_embeddings_hg, session_item, session_len, reversed_sess_item, mask)
        else:
            sess_emb_hgnn = self.generate_sess_emb(item_embeddings_hg, session_item, session_len, reversed_sess_item, mask)
        session_emb_lg = self.LineGraph(self.embedding.weight, D, A, session_item, session_len)
        con_loss = self.SSL(sess_emb_hgnn, session_emb_lg)
        return item_embeddings_hg, sess_emb_hgnn, self.beta*con_loss


@jit(nopython=True)
def find_k_largest(K, candidates):
    n_candidates = []
    for iid, score in enumerate(candidates[:K]):
        n_candidates.append((score, iid))
    heapq.heapify(n_candidates)
    for iid, score in enumerate(candidates[K:]):
        if score > n_candidates[0][0]:
            heapq.heapreplace(n_candidates, (score, iid + K))
    n_candidates.sort(key=lambda d: d[0], reverse=True)
    ids = [item[1] for item in n_candidates]
    # k_largest_scores = [item[0] for item in n_candidates]
    return ids#, k_largest_scores

def forward(model, i, data):
    tar, session_len, session_item, reversed_sess_item, mask = data.get_slice(i)
    A_hat, D_hat = data.get_overlap(session_item)
    session_item = trans_to_cuda(torch.Tensor(session_item).long())
    session_len = trans_to_cuda(torch.Tensor(session_len).long())
    A_hat = trans_to_cuda(torch.Tensor(A_hat))
    D_hat = trans_to_cuda(torch.Tensor(D_hat))
    tar = trans_to_cuda(torch.Tensor(tar).long())
    mask = trans_to_cuda(torch.Tensor(mask).long())
    reversed_sess_item = trans_to_cuda(torch.Tensor(reversed_sess_item).long())
    item_emb_hg, sess_emb_hgnn, con_loss = model(session_item, session_len, D_hat, A_hat, reversed_sess_item, mask)
    scores = torch.mm(sess_emb_hgnn, torch.transpose(item_emb_hg, 1,0))
    return tar, scores, con_loss


def train_test(model, train_data, test_data):
    print('start training: ', datetime.datetime.now())
    torch.autograd.set_detect_anomaly(True)
    total_loss = 0.0
    slices = train_data.generate_batch(model.batch_size)
    for i in slices:
        model.zero_grad()
        targets, scores, con_loss = forward(model, i, train_data)
        loss = model.loss_function(scores + 1e-8, targets)
        loss = loss + con_loss
        loss.backward()
#        print(loss.item())
        model.optimizer.step()
        total_loss += loss
    print('\tLoss:\t%.3f' % total_loss)
    top_K = [5, 10, 20]
    metrics = {}
    for K in top_K:
        metrics['hit%d' % K] = []
        metrics['mrr%d' % K] = []
    print('start predicting: ', datetime.datetime.now())

    model.eval()
    slices = test_data.generate_batch(model.batch_size)
    for i in slices:
        tar, scores, con_loss = forward(model, i, test_data)
        scores = trans_to_cpu(scores).detach().numpy()
        index = []
        for idd in range(model.batch_size):
            index.append(find_k_largest(20, scores[idd]))
        index = np.array(index)
        tar = trans_to_cpu(tar).detach().numpy()
        for K in top_K:
            for prediction, target in zip(index[:, :K], tar):
                metrics['hit%d' %K].append(np.isin(target, prediction))
                if len(np.where(prediction == target)[0]) == 0:
                    metrics['mrr%d' %K].append(0)
                else:
                    metrics['mrr%d' %K].append(1 / (np.where(prediction == target)[0][0]+1))
    return metrics, total_loss



In [None]:
train_data = pickle.load(open('/content/drive/MyDrive/AM220proj/yoochoose1_64/raw/train.txt', 'rb'))
test_data = pickle.load(open('/content/drive/MyDrive/AM220proj/yoochoose1_64/raw/test.txt', 'rb'))
train_data = Data(train_data, shuffle=True, n_node=n_node)
test_data = Data(test_data, shuffle=True, n_node=n_node)

  self.raw = np.asarray(data[0])
  DH = H.T.multiply(1.0/H.sum(axis=1).reshape(1, -1))


In [None]:
from tqdm import tqdm
import heapq

In [None]:
device = 'cuda'

In [None]:
n_node = 37483
model = trans_to_cuda(DHCN(adjacency=train_data.adjacency,n_node=n_node,lr=0.001, l2=1e-5, beta=0.02, layers=3,emb_size=100, batch_size=100,dataset="yoochoose"))

top_K = [5, 10, 20]
best_results = {}
for K in top_K:
    best_results['epoch%d' % K] = [0, 0]
    best_results['metric%d' % K] = [0, 0]

for epoch in range(5):
    print('-------------------------------------------------------')
    print('epoch: ', epoch)
    metrics, total_loss = train_test(model, train_data, test_data)
    for K in top_K:
        metrics['hit%d' % K] = np.mean(metrics['hit%d' % K]) * 100
        metrics['mrr%d' % K] = np.mean(metrics['mrr%d' % K]) * 100
        if best_results['metric%d' % K][0] < metrics['hit%d' % K]:
            best_results['metric%d' % K][0] = metrics['hit%d' % K]
            best_results['epoch%d' % K][0] = epoch
        if best_results['metric%d' % K][1] < metrics['mrr%d' % K]:
            best_results['metric%d' % K][1] = metrics['mrr%d' % K]
            best_results['epoch%d' % K][1] = epoch
    print(metrics)
    for K in top_K:
        print('train_loss:\t%.4f\tRecall@%d: %.4f\tMRR%d: %.4f\tEpoch: %d,  %d' %
              (total_loss, K, best_results['metric%d' % K][0], K, best_results['metric%d' % K][1],
                best_results['epoch%d' % K][0], best_results['epoch%d' % K][1]))


-------------------------------------------------------
epoch:  0
start training:  2023-04-23 22:29:10.126486
	Loss:	24483.811
start predicting:  2023-04-23 22:55:14.917037
{'hit5': 44.28980322003578, 'mrr5': 26.108050089445438, 'hit10': 57.11091234347049, 'mrr10': 27.828169633983592, 'hit20': 67.89982110912344, 'mrr20': 28.589821653201152}
train_loss:	24483.8105	Recall@5: 44.2898	MRR5: 26.1081	Epoch: 0,  0
train_loss:	24483.8105	Recall@10: 57.1109	MRR10: 27.8282	Epoch: 0,  0
train_loss:	24483.8105	Recall@20: 67.8998	MRR20: 28.5898	Epoch: 0,  0
-------------------------------------------------------
epoch:  1
start training:  2023-04-23 22:58:37.934138
	Loss:	22489.121
start predicting:  2023-04-23 23:24:51.903041
{'hit5': 45.180679785330945, 'mrr5': 26.6468992248062, 'hit10': 58.0572450805009, 'mrr10': 28.37053695658347, 'hit20': 68.80500894454383, 'mrr20': 29.126848679693}
train_loss:	22489.1211	Recall@5: 45.1807	MRR5: 26.6469	Epoch: 1,  1
train_loss:	22489.1211	Recall@10: 58.0572	MR