In [1]:
import rootutils

rootutils.setup_root("./", indicator=".project-root", pythonpath=True)

%load_ext autoreload
%autoreload 2

import torch
import torch_geometric
from topobenchmarkx.data.datasets import CustomDataset
import hydra
from hydra import initialize, compose
from topobenchmarkx.data.dataloader_fullbatch import FullBatchDataModule
from topobenchmarkx.io.load.loaders import (
    GraphLoader,
    SimplicialLoader,
    HypergraphLoader,
)
from omegaconf import DictConfig, OmegaConf
from topobenchmarkx.utils.config_resolvers import (
    get_default_transform,
    get_monitor_metric,
    get_monitor_mode,
    infer_in_channels,
)


OmegaConf.register_new_resolver("get_default_transform", get_default_transform)
OmegaConf.register_new_resolver("get_monitor_metric", get_monitor_metric)
OmegaConf.register_new_resolver("get_monitor_mode", get_monitor_mode)
OmegaConf.register_new_resolver("infer_in_channels", infer_in_channels)
OmegaConf.register_new_resolver(
    "parameter_multiplication", lambda x, y: int(int(x) * int(y))
)

initialize(config_path="../configs", job_name="job")
cfg = compose(config_name="train.yaml", return_hydra_config=True)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path="../configs", job_name="job")


In [3]:
cfg.dataset.transforms.graph2simplicial_lifting.complex_dim = 3

In [4]:
# Instantiate and load dataset
dataset = hydra.utils.instantiate(cfg.dataset, _recursive_=False)
dataset = dataset.load()

Processing...
  self._set_arrayXarray(i, j, x)
Done!


In [7]:
data = dataset[0]

In [19]:
import torch
import torch_geometric
import numpy as np

nci1 = torch_geometric.datasets.TUDataset(
    root=".",
    name="NCI1",
    use_node_attr=False,
)
node_features = [data.x.shape[1] for data in nci1][0]
print("Node features:", node_features)
n_labels = torch.unique(torch.tensor([data.y for data in nci1]))
n_labels = len(n_labels)
print("Number of labels:", n_labels)

Node features: 37
Number of labels: 2


In [14]:
nci1 = torch_geometric.datasets.TUDataset(
    root=".",
    name="NCI109",
    use_node_attr=False,
)
node_features = [data.x.shape[1] for data in nci1][0]
print("Node features:", node_features)
n_labels = torch.unique(torch.tensor([data.y for data in nci1]))
n_labels = len(n_labels)
print("Number of labels:", n_labels)

Downloading https://www.chrsmrrs.com/graphkerneldatasets/NCI109.zip
Processing...
Done!


Node features: 38
Number of labels: 2


In [15]:
nci1

NCI109(4127)

In [22]:
imdb = torch_geometric.datasets.TUDataset(
    root=".",
    name="IMDB-MULTI",
    use_node_attr=False,
)
max_degree = int(
    max(
        [torch.max(torch_geometric.utils.degree(data.edge_index[0])) for data in imdb]
    ).numpy()
)

In [2]:
max_degree

3062

In [8]:
x = torch.normal(0, 1, (1, 10))

In [16]:
x.expand(2, 4, -1)

tensor([[[-1.2375,  1.0727,  0.3450,  1.0833,  0.4681, -0.1657,  0.5076,
          -0.5245, -0.2753, -1.0917],
         [-1.2375,  1.0727,  0.3450,  1.0833,  0.4681, -0.1657,  0.5076,
          -0.5245, -0.2753, -1.0917],
         [-1.2375,  1.0727,  0.3450,  1.0833,  0.4681, -0.1657,  0.5076,
          -0.5245, -0.2753, -1.0917],
         [-1.2375,  1.0727,  0.3450,  1.0833,  0.4681, -0.1657,  0.5076,
          -0.5245, -0.2753, -1.0917]],

        [[-1.2375,  1.0727,  0.3450,  1.0833,  0.4681, -0.1657,  0.5076,
          -0.5245, -0.2753, -1.0917],
         [-1.2375,  1.0727,  0.3450,  1.0833,  0.4681, -0.1657,  0.5076,
          -0.5245, -0.2753, -1.0917],
         [-1.2375,  1.0727,  0.3450,  1.0833,  0.4681, -0.1657,  0.5076,
          -0.5245, -0.2753, -1.0917],
         [-1.2375,  1.0727,  0.3450,  1.0833,  0.4681, -0.1657,  0.5076,
          -0.5245, -0.2753, -1.0917]]])

In [13]:
# Check if torch_geometric.data has nonempty attribute 'x'

False

In [11]:
imdb[0].x

In [97]:
transform = torch_geometric.transforms.OneHotDegree(max_degree)

In [79]:
torch_geometric.utils.one_hot(torch.tensor([1]), max_degree + 1)

TypeError: zeros(): argument 'size' must be tuple of ints, but found element of type Tensor at pos 2

1

Data(edge_index=[2, 146], y=[1], num_nodes=20)

In [51]:
transform(imdb[0])

TypeError: zeros(): argument 'size' must be tuple of ints, but found element of type Tensor at pos 2

tensor(135.)

In [24]:
torch_geometric.utils.degree(imdb[0].edge_index)

RuntimeError: Index tensor must have the same number of dimensions as self tensor

In [10]:
import torch_geometric

torch_geometric.datasets.TUDataset

TypeError: TUDataset.__init__() missing 2 required positional arguments: 'root' and 'name'

In [3]:
config.trainer

{'_target_': 'lightning.pytorch.trainer.Trainer', 'default_root_dir': '${paths.output_dir}', 'min_epochs': 1, 'max_epochs': 30, 'accelerator': 'gpu', 'devices': [0], 'check_val_every_n_epoch': 1, 'deterministic': False, 'inference_mode': False}

In [4]:
callbacks: List[Callback] = instantiate_callbacks(cfg.get("callbacks"))
logger: List[Logger] = instantiate_loggers(cfg.get("logger"))

log.info(f"Instantiating trainer <{cfg.trainer._target_}>")
trainer: Trainer = hydra.utils.instantiate(
    cfg.trainer, callbacks=callbacks, logger=logger
)
trainer = hydra.utils.instantiate(config.trainer)

InterpolationResolutionError: ValueError raised while resolving interpolation: HydraConfig was not set

# Load data

In [3]:
graph_loader = hydra.utils.instantiate(config.dataset, _recursive_=False)
data = graph_loader.load()

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Processing...
  self._set_arrayXarray(i, j, x)
Done!


In [4]:
d = data[0].data_lst

In [5]:
d[0].shape

(13, 14, 2)

In [6]:
d[0]

Data(x=[13, 7], y=[1], up_laplacian_0=[13, 13], adjacency_0=[13, 13], hodge_laplacian_0=[13, 13], incidence_1=[13, 14], down_laplacian_1=[14, 14], up_laplacian_1=[14, 14], adjacency_1=[14, 14], hodge_laplacian_1=[14, 14], incidence_2=[14, 2], down_laplacian_2=[2, 2], hodge_laplacian_2=[2, 2], shape=[3], x_0=[13, 7], x_1=[14, 7], x_2=[2, 7], train_mask=[1], val_mask=[1], test_mask=[1])

In [2]:
simplicial_loader = hydra.utils.instantiate(config.dataset)
data = simplicial_loader.load()

In [4]:
from lightning import Callback, LightningDataModule, LightningModule, Trainer
from lightning.pytorch.loggers import Logger

model: LightningModule = hydra.utils.instantiate(config.model)

/Users/gbg141/Documents/TopoProjectX/TopoBenchmarkX/venv_topox/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'backbone' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['backbone'])`.
/Users/gbg141/Documents/TopoProjectX/TopoBenchmarkX/venv_topox/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'readout_workaround' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['readout_workaround'])`.
/Users/gbg141/Documents/TopoProjectX/TopoBenchmarkX/venv_topox/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'readout' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['readout'])`.


In [5]:
model

NetworkModule(
  (backbone): SANWrapper(
    (backbone): SAN()
  )
  (readout_workaround): ReadOutWorkaround()
  (readout): NodeLevelReadOut(
    (linear): Linear(in_features=16, out_features=2, bias=True)
  )
  (val_acc_best): MaxMetric()
)

In [3]:
data.num_nodes

34

In [6]:
data.incidence_1.T

tensor(indices=tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
                        14, 15,  0, 16, 17, 18, 19, 20, 21, 22, 23,  1, 16, 24,
                        25, 26, 27, 28, 29, 30, 31,  2, 17, 24, 32, 33, 34,  3,
                        35, 36,  4, 37, 38, 39,  5, 35, 37, 40,  6, 18, 25, 32,
                         7, 26, 41, 42, 43, 27, 44,  8, 36, 38,  9, 10, 33, 11,
                        19, 28, 34, 45, 46, 47, 48, 49, 39, 40, 12, 20, 50, 51,
                        13, 21, 52, 53, 54, 14, 22, 55, 56, 57, 58, 59, 60, 61,
                        62, 63, 64, 57, 62, 65, 66, 67, 29, 58, 63, 68, 30, 69,
                        70, 59, 66, 71, 72, 23, 41, 73, 74, 15, 64, 65, 69, 75,
                        76, 31, 42, 46, 48, 50, 53, 55, 60, 71, 73, 75, 77, 43,
                        44, 45, 47, 49, 51, 52, 54, 56, 61, 67, 68, 70, 72, 74,
                        76, 77],
                       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
       

### Lifting

In [5]:
# # Load data
# from topobenchmarkx.data.load.loaders import HypergraphLoader

# data_loader = HypergraphLoader(config)
# data = data_loader.load()

In [4]:
config.keys()

dict_keys(['hydra', 'task_name', 'tags', 'train', 'test', 'ckpt_path', 'seed', 'dataset', 'transforms', 'model', 'evaluator', 'callbacks', 'trainer', 'paths', 'extras'])

In [5]:
config["transforms"]

{'_target_': 'topobenchmarkx.transforms.lifting.DataLiftingTransform', 'lifting': 'HypergraphKHopLifting', 'k_value': 1, 'complex_dim': 'None', 'max_triangles': 'None', 'aggregation_method': 'None'}

In [6]:
lifting = hydra.utils.instantiate(config.transforms)

In [7]:
lifting.parameters

{'k_value': 1,
 'complex_dim': 'None',
 'max_triangles': 'None',
 'aggregation_method': 'None',
 'lifting': 'HypergraphKHopLifting'}

In [8]:
data = torch_geometric.data.Data()
data.x = torch.zeros([6, 1])
data.edge_index = torch.tensor([[0, 0, 0, 1, 1, 1, 2, 4], [1, 2, 3, 2, 3, 4, 3, 5]])

In [9]:
lifted_data = lifting(data)

In [10]:
lifted_data

Data(x=[6, 1], incidence_1=[6, 6], num_hyperedges=6, x_0=[6, 1], x_hyperedges=[6, 1])

In [None]:
cora = torch_geometric.datasets.Planetoid(
    root="../datasets/graph/", name="Cora", pre_transform=lifting
)



In [None]:
cora.data



Data(x=[2708, 1433], num_cells_0=2708, num_cells_1=10556, num_cells_2=2648, incidence_1=[2708, 10556], incidence_2=[10556, 2648], up_laplacian_1=[10556, 10556], up_laplacian_2=[2648, 2648], down_laplacian_2=[10556, 10556], down_laplacian_1=[2708, 2708], x_0=[2708, 1433], x_1=[10556, 1433], x_2=[2648, 1433])

In [None]:
from topobenchmarkx.data.datasets import CustomDataset

In [None]:
from torch.utils.data import DataLoader
from torch_geometric.data import Data


def collate_fn(batch):
    """
    args:
        batch - list of (tensor, label)

    reutrn:
        xs - a tensor of all examples in 'batch' after padding
        ys - a LongTensor of all labels in batch
    """

    for b in batch:
        values, keys = b[0], b[1]
        data = Data()
        for key, value in zip(keys, values):
            data[key] = value

    return data


d = DataLoader(dataset=CustomDataset([cora.data]), batch_size=1, collate_fn=collate_fn)
next(iter(d))

Data(num_hyperedges=2708, x_0=[2708, 1433], incidence_1=[2708, 2708], x=[2708, 1433], x_hyperedges=[2708, 1433])

In [None]:
databatch = torch_geometric.data.Batch.from_data_list(data_lst)

NameError: name 'data_lst' is not defined

In [None]:
databatch.edge_index[:, :4905]

tensor([[   0,    1,    2,  ..., 2706, 2706, 2707],
        [ 978,  736,  399,  ...,  362,  419,  921]])

In [None]:
databatch.edge_index[:, 4905:]

tensor([[2708, 2709, 2710,  ..., 5414, 5414, 5415],
        [3686, 3444, 3107,  ..., 3070, 3127, 3629]])

In [None]:
databatch.edge_index = databatch.edge_index.to_sparse()
databatch.edge_index

AllSetTransformer(
  (layers): ModuleList(
    (0): AllSetTransformerLayer(
      (vertex2edge): AllSetTransformerBlock(
        (multihead_att): MultiHeadAttention()
        (mlp): MLP(
          (0): Linear(in_features=64, out_features=64, bias=False)
          (1): Dropout(p=0.2, inplace=False)
        )
        (ln0): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (ln1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      )
      (edge2vertex): AllSetTransformerBlock(
        (multihead_att): MultiHeadAttention()
        (mlp): MLP(
          (0): Linear(in_features=64, out_features=64, bias=False)
          (1): Dropout(p=0.2, inplace=False)
        )
        (ln0): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (ln1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      )
    )
  )
)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader


class TextDataset(Dataset):
    def __init__(self, text_list, labels):
        self.text_list = text_list
        self.labels = labels

    def __len__(self):
        return len(self.text_list)

    def __getitem__(self, idx):
        text = self.text_list[idx]
        label = self.labels[idx]

        # You can perform any text preprocessing here if needed
        # For example, tokenization, numerical encoding, etc.

        return {"text": text, "label": label}


# Example usage
text_data = [
    "This is an example.",
    "Another text sample.",
    "PyTorch DataLoader with text.",
]
labels = [0, 1, 0]  # Example labels

# Create an instance of your custom dataset
custom_dataset = TextDataset(text_data, labels)

# Use DataLoader to load batches of data
batch_size = 2
data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)

# Iterate through batches
for batch in data_loader:
    texts = batch["text"]
    labels = batch["label"]

    # Perform your training/validation/test operations here
    print("Texts:", texts)
    print("Labels:", labels)

In [None]:
data_lst[0].keys()

In [None]:
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
next(iter(dataloader))

In [None]:
from torch_geometric.loader import DataLoader

dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

next(iter(dataloader))

In [None]:
config.data

### Loss

In [None]:
loss = hydra.utils.instantiate(config.model.loss)

### Backbone

In [None]:
hydra.utils.instantiate(config.model.backbone)

### ReadOut

In [None]:
readout = hydra.utils.instantiate(config.model.readout)

In [None]:
readout.parameters()

<generator object Module.parameters at 0x19ca5db60>

### Evaluator

In [None]:
evaluator = hydra.utils.instantiate(config.evaluator)

In [None]:
import torch

d = {
    "labels": torch.tensor([0, 1, 2, 1, 2, 2]),
    "logits": torch.tensor(
        [[1, 11, 11], [5, 11, 3], [2, 3, 4], [5, 16, 7], [8, 9, 10], [11, 12, 13]]
    ),
}


evaluator.eval(d)

  _warn_prf(average, modifier, msg_start, len(result))


{'labels': tensor([0, 1, 2, 1, 2, 2]),
 'logits': tensor([[ 1, 11, 11],
         [ 5, 11,  3],
         [ 2,  3,  4],
         [ 5, 16,  7],
         [ 8,  9, 10],
         [11, 12, 13]]),
 'metrics': {'acc': 0.8333333333333334,
  'pre_micro': 0.8333333333333334,
  'pre_macro': 0.5555555555555555,
  'rec_micro': 0.8333333333333334,
  'rec_macro': 0.6666666666666666,
  'f1_micro': 0.8333333333333334,
  'f1_macro': 0.6}}

In [None]:
d["logits"].argmax(dim=1)

tensor([1, 1, 2, 1, 2, 2])

### Dataloader

In [None]:
# Load data
from topobenchmarkx.data.load.loaders import HypergraphLoader
from topobenchmarkx.data.dataloader_fullbatch import FullBatchDataModule

data_loader = HypergraphLoader(config)
data = data_loader.load()
dataloader = FullBatchDataModule(data=data)

Loading hypergraph dataset name: cora
number of nodes:2708, feature dimension: 1433
number of hyperedges: 1072
Final num_hyperedges 1392
Final num_nodes 2708
Final num_class 7


FileNotFoundError: [Errno 2] No such file or directory: '/Users/gbg141/Documents/TopoProjectX/TopoBenchmarkX/data//data_splits/cora/train_prop=0.5/split_0.npz'

In [None]:
batch = next(iter(dataloader.train_dataloader()))

In [None]:
batch.x[batch.train_mask]

In [None]:
a = hydra.utils.instantiate(config.model)

In [None]:
a.hparams.backbone is a.backbone

In [None]:
id(a.hparams.backbone) == id(a.backbone)

In [None]:
b = a.backbone.__class__

In [None]:
b in []topomodelx.nn.hypergraph.unigcnii.UniGCNII

In [None]:
import topomodelx