# Imports

In [1]:
import sys

sys.path.append("../src")

from pipeline import GCLPipeline

import os
import os.path as osp
import logging
import json
import pandas as pd

import torch
import torch_geometric.transforms as T

from torch.optim import Adam

from tqdm import tqdm

import logging
import warnings

warnings.filterwarnings("ignore")

logger = logging.getLogger(__name__)
logging.basicConfig(
    format="%(asctime)s %(levelname)s:%(message)s", level=logging.DEBUG, datefmt="%I:%M:%S"
)


DATA_FOLDER = "../data"
BENCHMARK_FOLDER = "../benchmark"

OUTPUT_FOLDER = "../output"

# Strategies

In [2]:
# STRATEGIES = json.load(open(osp.join(BENCHMARK_FOLDER, "augmentation-benchmark-graphcl-infonce.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_FOLDER, "augmentation-benchmark-graphcl-jsd.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_FOLDER, "augmentation-benchmark-graphcl-barlow.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_FOLDER, "augmentation-benchmark-graphcl-bootstrap.json")))

# STRATEGIES = json.load(open(osp.join(BENCHMARK_FOLDER, "loss-benchmark-dgi.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_FOLDER, "loss-benchmark-infograph.json")))

# STRATEGIES = json.load(open(osp.join(BENCHMARK_FOLDER, "benchmark-grace-ogb.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_FOLDER, "benchmark-grace.json")))


In [3]:
CONFIG = STRATEGIES["config"]
TRY_GPU = CONFIG["try_gpu"]
DEVICE = torch.device("cuda" if torch.cuda.is_available() and TRY_GPU else "cpu")

torch.cuda.empty_cache()
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| Active memory         |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------

# Run

In [25]:
outputs = []

for strategy in STRATEGIES["strategies"]:

    logger.info({strategy["_descr"]})

    # Initialization
    pipeline = GCLPipeline.from_strategy(strategy, DEVICE)

    # Data
    dataset, num_features = GCLPipeline.init_dataset(
        strategy["dataset"], DATA_FOLDER, T.NormalizeFeatures(), strategy["batch_size"]
    )

    # Encoder
    encoder_params = strategy["encoder_params"]
    encoder_params["input_dim"] = num_features
    encoder_model = pipeline.init_encoder(encoder_params, DEVICE)

    # Training
    train_params = strategy["train_params"]
    lr = train_params["learning_rate"]
    epochs = train_params["epochs"]

    optimizer = Adam(encoder_model.parameters(), lr=lr)
    losses = []

    with tqdm(total=epochs, desc="(T)") as pbar:
        for epoch in tqdm(range(1, epochs + 1)):
            loss = pipeline.train_epoch(encoder_model, dataset, optimizer, DEVICE)
            pbar.set_postfix({"loss": loss})

            pbar.update()

    test_result = pipeline.test(encoder_model, dataset, DEVICE)
    print(f'(E): Best test F1Mi={test_result["micro_f1"]:.4f}, F1Ma={test_result["macro_f1"]:.4f}')

    outputs.append(
        {   
            "Data": strategy["dataset"],
            "Method": strategy["method"],
            "Archi": strategy["architecture"],
            "Mode": strategy["mode"],
            "Obj": strategy["objective"],
            "Neg": strategy["negative"],
            "Aug1": (
                None if strategy["augmentation1"] is None 
                else
                strategy["augmentation1"]["name"]
                if isinstance(strategy["augmentation1"], dict)
                else ', '.join([a["name"] for a in strategy["augmentation1"]])
            ),
            "Aug1Strat": strategy["augmentation1_strat"],
            "Aug2": (
                None if strategy["augmentation1"] is None
                else 
                strategy["augmentation2"]["name"]
                if isinstance(strategy["augmentation2"], dict)
                else ', '.join([a["name"] for a in strategy["augmentation2"]])
            ),
            "Aug2Strat": strategy["augmentation2_strat"],
            "MicroF1": test_result["micro_f1"].round(2),
            "MacroF1": test_result["macro_f1"].round(2),
        }
    )

08:30:58 INFO:{'########## INFOGRAPH - G2L - InfoNCE ##########'}
08:30:58 INFO:CALL GCLPipeline.from_strategy
08:30:58 INFO:	 Method: InfoGraph
08:30:58 INFO:	 Augmentation strategy 1: None
08:30:58 INFO:	 Augmentation strategy 2: None
08:30:58 INFO:Augmentations: [None, None]
08:30:58 INFO:CALL GCLPipeline.init_dataset
08:30:58 DEBUG:open file: c:/Users/alexa/Documents/Visual Studio Code/graph-contrastive-learning/data/PTC_MR/processed/data.pt
08:30:58 INFO:	 Number of features: 18
08:30:58 INFO:CALL GCLPipeline.init_encoder
08:30:58 INFO:	 Input dimension: 18
08:30:58 INFO:	 Hidden dimension: 32
08:30:58 INFO:	 Number of layers: 2
08:30:58 INFO:	 Projection dimension: None
08:30:58 INFO:	 Activation: ReLU
(T):   0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:06<00:00, 15.40it/s]t/s, loss=14.9]
(T): 100%|██████████| 100/100 [00:06<00:00, 15.40it/s, loss=14.9]
08:31:05 INFO:{'########## INFOGRAPH - G2L - JSD ##########'}
08:31:05 INFO:CALL GCLPipeline.from_strategy
08:31:05 INFO:	 Method: InfoGraph
08:31:05 INFO:	 Augmentation strategy 1: None
08:31:05 INFO:	 Augmentation strategy 2: None
08:31:05 INFO:Augmentations: [None, None]
08:31:05 INFO:CALL GCLPipeline.init_dataset
08:31:05 DEBUG:open file: c:/Users/alexa/Documents/Visual Studio Code/graph-contrastive-learning/data/PTC_MR/processed/data.pt
08:31:05 INFO:	 Number of features: 18
08:31:05 INFO:CALL GCLPipeline.init_encoder
08:31:05 INFO:	 Input dimension: 18
08:31:05 INFO:	 Hidden dimension: 32
08:31:05 INFO:	 Number of layers: 2
08:31:05 INFO:	 Projection dimension: None
08:31:05 INFO:	 Activation: ReLU


(E): Best test F1Mi=0.4000, F1Ma=0.3921


100%|██████████| 100/100 [00:06<00:00, 15.27it/s]it/s, loss=-2.03] 
(T): 100%|██████████| 100/100 [00:06<00:00, 15.27it/s, loss=-2.03]
08:31:12 INFO:{'########## INFOGRAPH - G2L - BootstrapLatent ##########'}
08:31:12 INFO:CALL GCLPipeline.from_strategy
08:31:12 INFO:	 Method: InfoGraph
08:31:12 INFO:	 Augmentation strategy 1: None
08:31:12 INFO:	 Augmentation strategy 2: None
08:31:12 INFO:Augmentations: [None, None]
08:31:12 INFO:CALL GCLPipeline.init_dataset
08:31:12 DEBUG:open file: c:/Users/alexa/Documents/Visual Studio Code/graph-contrastive-learning/data/PTC_MR/processed/data.pt
08:31:12 INFO:	 Number of features: 18
08:31:12 INFO:CALL GCLPipeline.init_encoder
08:31:12 INFO:	 Input dimension: 18
08:31:12 INFO:	 Hidden dimension: 32
08:31:12 INFO:	 Number of layers: 2
08:31:12 INFO:	 Projection dimension: None
08:31:12 INFO:	 Activation: ReLU


(E): Best test F1Mi=0.6286, F1Ma=0.6081


100%|██████████| 100/100 [00:06<00:00, 15.89it/s]t/s, loss=-42.3]
(T): 100%|██████████| 100/100 [00:06<00:00, 15.89it/s, loss=-42.3]


(E): Best test F1Mi=0.3714, F1Ma=0.2708


# Result DataFrame

In [26]:
output_df = pd.DataFrame(outputs)
output_df

Unnamed: 0,Data,Method,Archi,Mode,Obj,Neg,Aug1,Aug1Strat,Aug2,Aug2Strat,MicroF1,MacroF1
0,PTC_MR,InfoGraph,SingleBranch,G2L,InfoNCE,,,,,,0.4,0.39
1,PTC_MR,InfoGraph,SingleBranch,G2L,JSD,,,,,,0.63,0.61
2,PTC_MR,InfoGraph,SingleBranch,G2L,BootstrapLatent,,,,,,0.37,0.27


# Saving

In [27]:
# output_file = "output.csv"

# output_file = "augmentation_benchmark_graphcl_infonce.csv"
# output_file = "augmentation_benchmark_graphcl_jsd.csv"
# output_file = "augmentation_benchmark_graphcl_barlow.csv"
# output_file = "augmentation_benchmark_graphcl_bootstrap.csv"

# output_file = "loss_benchmark_dgi.csv"
# output_file = "loss_benchmark_infograph.csv"

output_path = osp.join(OUTPUT_FOLDER, output_file)
output_path

'../output\\loss_benchmark_infograph.csv'

In [28]:
# Deleting output file
# DO NOT REMOVE BEFORE MAKING A COPY
# os.remove(OUTPUT_PATH)

In [29]:
# Adding rows
output_df.to_csv(output_path, mode='a', header=not osp.exists(output_path), index=False)