In [1]:
import os
import itertools
import gc

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

print('PyTroch Version', torch.__version__)
print('GPU Available:', torch.cuda.is_available())

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

PyTroch Version 1.10.1
GPU Available: True


In [2]:
import pykeen
pykeen.env()

| Key             | Value                    |
|-----------------|--------------------------|
| OS              | posix                    |
| Platform        | Linux                    |
| Release         | 5.13.0-28-generic        |
| Time            | Tue Feb  1 19:37:12 2022 |
| Python          | 3.9.7                    |
| PyKEEN          | 1.7.0                    |
| PyKEEN Hash     | UNHASHED                 |
| PyKEEN Branch   |                          |
| PyTorch         | 1.10.1                   |
| CUDA Available? | true                     |
| CUDA Version    | 11.3                     |
| cuDNN Version   | 8200                     |


In [3]:
from torch_geometric.datasets import IMDB

dataset = IMDB(root='./data/imdb')
print('Dataset:', dataset)

print('Number of graphs:', len(dataset))

data = dataset[0]
print(data)

num_classes = len(data['movie'].y.unique())
print('Number of classes:', num_classes)
print('Classes:', data['movie'].y.unique())

Dataset: IMDB()
Number of graphs: 1
HeteroData(
  [1mmovie[0m={
    x=[4278, 3066],
    y=[4278],
    train_mask=[4278],
    val_mask=[4278],
    test_mask=[4278]
  },
  [1mdirector[0m={ x=[2081, 3066] },
  [1mactor[0m={ x=[5257, 3066] },
  [1m(movie, to, director)[0m={ edge_index=[2, 4278] },
  [1m(movie, to, actor)[0m={ edge_index=[2, 12828] },
  [1m(director, to, movie)[0m={ edge_index=[2, 4278] },
  [1m(actor, to, movie)[0m={ edge_index=[2, 12828] }
)
Number of classes: 3
Classes: tensor([0, 1, 2])


In [4]:
import itertools
from torch_geometric.nn import Linear, HeteroConv, GCNConv, SAGEConv

class SplitGCN(torch.nn.Module):
    def __init__(self, metadata, emb_size, dense_size, out_size, num_layers, device):
        # TODO: Implement a function that initializes self.convs, 
        # self.bns, and self.softmax.
        super(SplitGCN, self).__init__()

        self.num_relations = len(metadata[1])
        self.device = device
        self.edge_conv_dict = {}
        for edge_type in metadata[1]:
            self.convs = nn.ModuleList()
            for _ in range (num_layers):
                self.conv = HeteroConv({
                    edge_type: SAGEConv((-1,-1), emb_size)
                })
                self.convs.append(self.conv)
            self.edge_conv_dict[edge_type] = self.convs

        # TODO: TransR

        self.linears = nn.ModuleList()
        for _ in range (self.num_relations):
            linear = Linear(emb_size, dense_size)
            self.linears.append(linear)
        
        self.clflinear = Linear(self.num_relations*dense_size, out_size)


    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()


    def forward(self, x_dict, edge_index_dict):
        # TODO: Implement a function that takes the feature tensor x and
        # edge_index tensor adj_t and returns the output tensor as
        # shown in the figure.
        
        concatenated_embs = []
        #print(len(self.convs))
        for relation, edge_tensor in edge_index_dict.items():
            convs = self.edge_conv_dict[relation]
            single_edge_index_dict = {relation: edge_tensor.to("cuda:0")}
            single_x_dict = {relation[0]: x_dict[relation[0]].to("cuda:0"), 
                            relation[2]: x_dict[relation[2]].to("cuda:0")}
            for conv in convs:
                #print(conv)
                single_x_dict = conv(single_x_dict, single_edge_index_dict)
                #x = x_dict['movie']
                #print('conv output', x.size())
                #concatenated_embs.append(x)
            concatenated_embs.append(single_x_dict['movie'])

        #print(x_dict.keys())
        #print(x_dict['movie'].size())

        print(len(concatenated_embs))
        linear_outputs = []
        for x, linear in zip(concatenated_embs, self.linears):
            x = linear(x)
            print('linear output', x.size())
            linear_outputs.append(x)
        
        print(len(linear_outputs))
        out = torch.cat(linear_outputs, dim=-1)
        print('clf layer input', out.size())
        out = self.clflinear(out)
        print('clf layer output', out.size())
        return out

model = SplitGCN(data.metadata(), 256, 256, num_classes, 2, device)
print(model)
del model
gc.collect()

SplitGCN(
  (convs): ModuleList(
    (0): HeteroConv(num_relations=1)
    (1): HeteroConv(num_relations=1)
  )
  (conv): HeteroConv(num_relations=1)
  (linears): ModuleList(
    (0): Linear(256, 256, bias=True)
    (1): Linear(256, 256, bias=True)
    (2): Linear(256, 256, bias=True)
    (3): Linear(256, 256, bias=True)
  )
  (clflinear): Linear(1024, 3, bias=True)
)


709

In [5]:
def train(model, data, optimizer, loss_fn):
    # TODO: Implement a function that trains the model by 
    # using the given optimizer and loss_fn.

    model.train()
    optimizer.zero_grad()
    outputs = model(data.x_dict, data.edge_index_dict)
    mask = data['movie'].train_mask
    labels = data['movie'].y[mask]
    loss = loss_fn(outputs[mask], labels)
    loss.backward()
    optimizer.step()

    return loss.item()

In [6]:
# Test function here
@torch.no_grad()
def test(model, data, save_model_results=False):
    # a function that tests the model by 
    # using the given split_idx and evaluator.
    model.eval()

    # The output of model on all data
    out = model(data.x_dict, data)
    pred = out.argmax(dim=-1, keepdim=True)

    accs = []
    for split in ['train_mask', 'val_mask', 'test_mask']:
        mask = data['author'][split]
        acc = (pred[mask] == data['author'].y[mask]).sum() / mask.sum()
        accs.append(float(acc))


    if save_model_results:
      print ("Saving Model Predictions")

      data = {}
      data['y_pred'] = pred.view(-1).cpu().detach().numpy()

      df = pd.DataFrame(data=data)
      # Save locally as csv
      df.to_csv('imdb.csv', sep=',', index=False)


    return accs

In [7]:
import copy

# Model Parameters
emb_dim = 256
hidden_dim = 256
output_dim = num_classes
num_layers = 3

model = SplitGCN(data.metadata(), emb_dim, hidden_dim, output_dim, num_layers, device)

model, data = model.to(device), data.to(device)

print(next(model.parameters()).device)

# Reset model parameters
# model.reset_parameters()

# Define hyperparameters
num_epochs = 20
optimizer = torch.optim.Adam(model.parameters())
loss_fn = F.cross_entropy

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + num_epochs):
    # train model
    loss = train(model, data, optimizer, loss_fn)
    
    # evaluate model 
    result = test(model, data)
    
    train_acc, valid_acc, test_acc = result
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        best_model = copy.deepcopy(model)
    print(f'Epoch: {epoch:02d}, '
            f'Loss: {loss:.4f}, '
            f'Train: {100 * train_acc:.2f}%, '
            f'Valid: {100 * valid_acc:.2f}% '
            f'Test: {100 * test_acc:.2f}%')

cuda:0


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_addmm)