## Install libraries

In [5]:
!python -c "import torch; print(torch.__version__)"

1.12.1+cu113


In [6]:
!python -c "import torch; print(torch.version.cuda)"

11.3


In [7]:
import os, sys
from google.colab import drive
drive.mount('/content/drive')
nb_path = '/content/notebooks'
#os.symlink('/content/drive/My Drive/Colab Notebooks', nb_path)
sys.path.insert(0,nb_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install --target=$nb_path torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
!pip install --target=$nb_path torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
!pip install --target=$nb_path torch-geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-+.html
Collecting torch-scatter
  Using cached torch_scatter-2.0.9.tar.gz (21 kB)
Building wheels for collected packages: torch-scatter
  Building wheel for torch-scatter (setup.py) ... [?25l[?25hdone
  Created wheel for torch-scatter: filename=torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl size=274491 sha256=ea6b31226b2d62ba3720ee1eb1a4477dcde8ee92c1e1969350aa8480f1640067
  Stored in directory: /root/.cache/pip/wheels/dd/57/a3/42ea193b77378ce634eb9454c9bc1e3163f3b482a35cdee4d1
Successfully built torch-scatter
Installing collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-+.html
Collecting torch-sparse
  Downloading torch_sparse-0.6.15.tar.gz (2.1 MB)
[K     |███████

In [3]:
sys.path.insert(0,nb_path)

## Create baseline dataset

### Dataset parameters

In [None]:
num_classes = 4

num_total_nodes = 100
num_labeled_nodes = 20
num_unlabeled_nodes = num_total_nodes - num_labeled_nodes
num_train_edges_per_class = int(num_labeled_nodes / num_classes)

print('Total nodes:', num_total_nodes)
print('Train nodes:', num_labeled_nodes)
print('Test nodes:', num_unlabeled_nodes)
print('Max number of train edges:', num_train_edges_per_class)

Total nodes: 100
Train nodes: 20
Test nodes: 80
Max number of train edges: 5


In [None]:
classes = range(num_classes)

labeled_nodes = list(range(num_labeled_nodes))
train_nodes = labeled_nodes
unlabled_nodes = list(range(num_labeled_nodes, num_total_nodes))
test_nodes = unlabled_nodes

print(train_nodes)
print(test_nodes)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


### Create trainset

#### Create meaningful relation

In [None]:
from random import randint

classes_list = []
for i in range (num_classes):
  nodes_list = []
  for j in range (num_train_edges_per_class):
    node_idx = randint(0, len(labeled_nodes)-1)
    node = labeled_nodes[node_idx]
    nodes_list.append(node)
    labeled_nodes.remove(node)
  classes_list.append(nodes_list)

classes_list

[[5, 14, 1, 2, 11], [0, 13, 3, 8, 4], [18, 12, 19, 15, 9], [6, 17, 10, 7, 16]]

In [None]:
from random import choice

edge_list = []
for node_list in classes_list:
  edge_per_class_list = []
  for idx, node in enumerate(node_list):
    tail = choice([x for x in node_list if x != node])
    if (tail,node) not in edge_per_class_list:
      edge_per_class_list.append((node, tail))
  edge_list.append(edge_per_class_list)

edge_list

[[(5, 2), (14, 2), (1, 14), (11, 1)],
 [(0, 3), (13, 0), (3, 4), (8, 3), (4, 8)],
 [(18, 15), (12, 19), (19, 18), (15, 12), (9, 15)],
 [(6, 17), (17, 16), (10, 7), (7, 6), (16, 6)]]

In [None]:
import itertools

edge_list = list(itertools.chain.from_iterable(edge_list))  

In [None]:
train_nodes = list(range(num_labeled_nodes))
train_nodes

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [None]:
training_edges = edge_list 
training_edges

[(5, 2),
 (14, 2),
 (1, 14),
 (11, 1),
 (0, 3),
 (13, 0),
 (3, 4),
 (8, 3),
 (4, 8),
 (18, 15),
 (12, 19),
 (19, 18),
 (15, 12),
 (9, 15),
 (6, 17),
 (17, 16),
 (10, 7),
 (7, 6),
 (16, 6)]

In [None]:
training_edges = sorted(training_edges, key=lambda x: x[0])
training_edges

[(0, 3),
 (1, 14),
 (3, 4),
 (4, 8),
 (5, 2),
 (6, 17),
 (7, 6),
 (8, 3),
 (9, 15),
 (10, 7),
 (11, 1),
 (12, 19),
 (13, 0),
 (14, 2),
 (15, 12),
 (16, 6),
 (17, 16),
 (18, 15),
 (19, 18)]

#### Create noise relation

In [None]:
len(training_edges)

19

We want to create a noisy relation with similar number of edges as the meaningful relation.

#### Create labels

In [None]:
classes_list

[[17, 6, 18, 1, 12], [13, 10, 7, 15, 8], [0, 14, 2, 11, 19], [5, 4, 3, 9, 16]]

In [None]:
import numpy as np

training_labels = np.zeros(num_labeled_nodes)

for i, node_list in enumerate(classes_list):
   for node in node_list:
     training_labels[node] = i

training_labels

array([2., 0., 2., 3., 3., 3., 0., 1., 1., 3., 1., 2., 0., 1., 2., 1., 3.,
       0., 0., 2.])

### Create initial node embeddings

We choose zero-embeddings as initial ones to make sure no information is provided by them and that all information should be extracted by the relations

In [None]:
emb_size = 1

In [None]:
node_embs = np.zeros((num_total_nodes, emb_size))

In [None]:
node_embs.shape

(100, 1)

## Create dataset class

In [8]:
import torch
from torch_geometric.data import InMemoryDataset, download_url

ModuleNotFoundError: ignored

In [None]:
class BaseNoiseDataset(InMemoryDataset):


## Store dataset