In [1]:
import torch
import os
import shutil
from torch_geometric.data import Data
from torch_geometric.data import InMemoryDataset

In [2]:
x = torch.tensor([[[2],[1]], [[5],[6]]], dtype=torch.float)
y = torch.tensor([0, 1], dtype=torch.float)

edge_index = torch.tensor([[0, 1],
                           [1, 0]], dtype=torch.long)


data = Data(x=x, y=y, edge_index=edge_index)
data

Data(x=[2, 2, 1], edge_index=[2, 2], y=[2])

In [16]:
class YooChooseDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(YooChooseDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return []
    @property
    def processed_file_names(self):
        return ['./AMH_binary_classification.dataset']

    def download(self):
        pass
    
    def process(self):
        
        data_list = []

        # process by session_id
        for i in range(0, 500):
            node_features = torch.tensor([[1,2,3], [5,3,1], [1,2,4], [3,4,5]], dtype=torch.long)

            node_features = torch.LongTensor(node_features).unsqueeze(1)
            

            edge_index = torch.tensor([[0, 1, 2, 3],
                                    [1, 0, 0, 1]], dtype=torch.long)
            x = node_features

                
            label = torch.tensor([0, 1], dtype=torch.long)

            y = torch.LongTensor(label)

            data = Data(x=x, edge_index=edge_index, y=y)

            data_list.append(data)
        
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [21]:
dataset.num_classes

2

In [17]:
if os.path.exists('./processed'):
            shutil.rmtree('./processed')
        # print(root)
dataset = YooChooseDataset('./')

Processing...
Done!


In [19]:
dataset = dataset.shuffle()
print(len(dataset))
one_tenth_length = int(len(dataset) * 0.1)
train_dataset = dataset[:one_tenth_length * 7]
# val_dataset = dataset[one_tenth_length*8:one_tenth_length * 9]
test_dataset = dataset[one_tenth_length*7:]
len(train_dataset), len(test_dataset)


500


(350, 150)

In [14]:
data = dataset[2]
print(data)

Data(x=[4, 1, 3], edge_index=[2, 4], y=[2])


In [114]:
from torch_geometric.data import DataLoader
batch_size= 32
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [115]:
embed_dim = 128
from torch_geometric.nn import GraphConv, TopKPooling, GatedGraphConv, SAGEConv, SGConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
import torch.nn.functional as F
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = GraphConv(embed_dim * 2, 128)
        self.pool1 = TopKPooling(128, ratio=0.9)
        self.conv2 = GraphConv(128, 128)
        self.pool2 = TopKPooling(128, ratio=0.9)
        self.conv3 = GraphConv(128, 128)
        self.pool3 = TopKPooling(128, ratio=0.9)
        self.item_embedding = torch.nn.Embedding(num_embeddings=4, embedding_dim=embed_dim)
        self.category_embedding = torch.nn.Embedding(num_embeddings=2, embedding_dim=embed_dim)        
        self.lin1 = torch.nn.Linear(256, 256)
        self.lin2 = torch.nn.Linear(256, 128)
        self.bn1 = torch.nn.BatchNorm1d(128)
        self.bn2 = torch.nn.BatchNorm1d(64)
        self.act1 = torch.nn.ReLU()
        self.act2 = torch.nn.ReLU()        
  
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        
        item_id = x[:,:,0]
        category = x[:,:,1]
        

        emb_item = self.item_embedding(item_id).squeeze(1)
        emb_category = self.category_embedding(category).squeeze(1)
        
#         emb_item = emb_item.squeeze(1)
#         emb_cat
        x = torch.cat([emb_item, emb_category], dim=1)  
#         print(x.shape)
        x = F.relu(self.conv1(x, edge_index))
#                 print(x.shape)
        x, edge_index, _, batch, _ = self.pool1(x, edge_index, None, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index))
     
        x, edge_index, _, batch, _ = self.pool2(x, edge_index, None, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index))

        x, edge_index, _, batch, _ = self.pool3(x, edge_index, None, batch)
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = x1 + x2 + x3

        x = self.lin1(x)
        x = self.act1(x)
        x = self.lin2(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.act2(x)      
        
        outputs = []
        for i in range(x.size(0)):
            output = torch.matmul(emb_item[data.batch == i], x[i,:])

            outputs.append(output)
              
        x = torch.cat(outputs, dim=0)
        x = torch.sigmoid(x)
        
        return x

In [117]:
device = torch.device('cuda')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
crit = torch.nn.BCELoss()

In [116]:
def train():
    model.train()

    loss_all = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)

        label = data.y.to(device)
        loss = crit(output, label)
        loss.backward()
        loss_all += data.num_graphs * loss.item()
        optimizer.step()
    return loss_all / len(train_dataset)

In [123]:
from sklearn.metrics import roc_auc_score
import numpy as np
def evaluate(loader):
    model.eval()

    predictions = []
    labels = []

    with torch.no_grad():
        for data in loader:

            data = data.to(device)
            pred = model(data).detach().cpu().numpy()

            label = data.y.detach().cpu().numpy()
            predictions.append(pred)
            labels.append(label)

    predictions = np.hstack(predictions)
    labels = np.hstack(labels)
    
    return roc_auc_score(labels, predictions)

In [124]:
for epoch in range(1, 200):
    loss = train()
    train_acc = evaluate(train_loader)   
    test_acc = evaluate(test_loader)
    print('Epoch: {:03d}, Loss: {:.5f}, Train Auc: {:.5f}, Test Auc: {:.5f}'.
          format(epoch, loss, train_acc, test_acc))

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.