In [1]:
import sys

sys.path.append("./src")

from pipeline import GCLPipeline

import os
import os.path as osp
import logging
import json
import pandas as pd

import torch
import torch_geometric.transforms as T

from torch.optim import Adam

from tqdm import tqdm

import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

import logging

# from importlib import reload
# reload(logging)
logger = logging.getLogger(__name__)
logging.basicConfig(
    format="%(asctime)s %(levelname)s:%(message)s", level=logging.DEBUG, datefmt="%I:%M:%S"
)


DATA_PATH = "./data"
BENCHMARK_PATH = "./benchmark"

OUTPUT_FOLDER = "./output"

In [2]:
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "benchmark.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "benchmark-infograph.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "benchmark-dgi.json")))

# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "augmentation-benchmark-graphcl-infonce.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "augmentation-benchmark-graphcl-jsd.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "augmentation-benchmark-graphcl-triplet.json"))) # NOT WORKING

In [3]:
CONFIG = STRATEGIES["config"]
TRY_GPU = CONFIG["try_gpu"]
DEVICE = torch.device("cuda" if torch.cuda.is_available() and TRY_GPU else "cpu")

In [4]:
outputs = []

for strategy in STRATEGIES["strategies"]:

    logger.info({strategy["_descr"]})

    # Initialization
    pipeline = GCLPipeline.from_strategy(strategy, DEVICE)

    # Data
    dataset, num_features = GCLPipeline.init_dataset(
        strategy["dataset"], DATA_PATH, T.NormalizeFeatures(), strategy["batch_size"]
    )

    # Encoder
    encoder_params = strategy["encoder_params"]
    encoder_params["input_dim"] = num_features
    encoder_model = pipeline.init_encoder(encoder_params, DEVICE)

    # Training
    train_params = strategy["train_params"]
    lr = train_params["learning_rate"]
    epochs = train_params["epochs"]

    optimizer = Adam(encoder_model.parameters(), lr=lr)
    losses = []

    with tqdm(total=epochs, desc="(T)") as pbar:
        for epoch in tqdm(range(1, epochs + 1)):
            loss = pipeline.train_epoch(encoder_model, dataset, optimizer, DEVICE)
            pbar.set_postfix({"loss": loss})

            pbar.update()

    test_result = pipeline.test(encoder_model, dataset, DEVICE)
    print(f'(E): Best test F1Mi={test_result["micro_f1"]:.4f}, F1Ma={test_result["macro_f1"]:.4f}')

    outputs.append(
        {
            "Data": strategy["dataset"],
            "Method": strategy["method"],
            "Archi": strategy["architecture"],
            "Mode": strategy["mode"],
            "Obj": strategy["objective"],
            "Neg": strategy["negative"],
            "Aug1": (
                strategy["augmentation1"]["name"]
                if isinstance(strategy["augmentation1"], dict)
                else ', '.join([a["name"] for a in strategy["augmentation1"]])
            ),
            "Aug1Strat": strategy["augmentation1_strat"],
            "Aug2": (
                strategy["augmentation2"]["name"]
                if isinstance(strategy["augmentation2"], dict)
                else ', '.join([a["name"] for a in strategy["augmentation2"]])
            ),
            "Aug2Strat": strategy["augmentation2_strat"],
            "MicroF1": test_result["micro_f1"].round(2),
            "MacroF1": test_result["macro_f1"].round(2),
        }
    )

05:29:22 INFO:{'########## GRAPHCL - G2G - Triplet - Identity/Identity ##########'}
05:29:22 INFO:CALL GCLPipeline.from_strategy
05:29:22 INFO:	 Method: GraphCL
05:29:22 INFO:	 Augmentation strategy 1: None
05:29:22 INFO:	 Augmentation strategy 2: None
05:29:22 INFO:CALL GCLPipeline.init_augmentations
05:29:22 INFO:	 Strategy: None
05:29:22 INFO:CALL GCLPipeline.init_augmentation
05:29:22 INFO:	 Name: Identity
05:29:22 INFO:CALL GCLPipeline.init_augmentations
05:29:22 INFO:	 Strategy: None
05:29:22 INFO:CALL GCLPipeline.init_augmentation
05:29:22 INFO:	 Name: Identity
05:29:22 INFO:Augmentations: [<GCL.augmentors.identity.Identity object at 0x000001DFE29F6510>, <GCL.augmentors.identity.Identity object at 0x000001DFE2321DD0>]
05:29:22 INFO:CALL GCLPipeline.init_dataset
05:29:22 DEBUG:open file: c:/Users/alexa/Documents/Visual Studio Code/graph-contrastive-learning/data/PTC_MR/processed/data.pt
05:29:22 INFO:	 Number of features: 18
05:29:22 INFO:CALL GCLPipeline.init_encoder
05:29:22 IN

RuntimeError: The size of tensor a (16384) must match the size of tensor b (128) at non-singleton dimension 0

# Result DataFrame

In [5]:
output_df = pd.DataFrame(outputs)
output_df

Unnamed: 0,Data,Method,Archi,Mode,Obj,Neg,Aug1,Aug1Strat,Aug2,Aug2Strat,MicroF1,MacroF1
0,PTC_MR,GraphCL,DualBranch,G2G,JSD,,Identity,,Identity,,0.63,0.62
1,PTC_MR,GraphCL,DualBranch,G2G,JSD,,Identity,,FeatureMasking,,0.51,0.47
2,PTC_MR,GraphCL,DualBranch,G2G,JSD,,Identity,,EdgeRemoving,,0.57,0.57
3,PTC_MR,GraphCL,DualBranch,G2G,JSD,,Identity,,RWSampling,,0.66,0.63
4,PTC_MR,GraphCL,DualBranch,G2G,JSD,,Identity,,NodeDropping,,0.54,0.53
5,PTC_MR,GraphCL,DualBranch,G2G,JSD,,FeatureMasking,,Identity,,0.46,0.46
6,PTC_MR,GraphCL,DualBranch,G2G,JSD,,FeatureMasking,,FeatureMasking,,0.6,0.58
7,PTC_MR,GraphCL,DualBranch,G2G,JSD,,FeatureMasking,,EdgeRemoving,,0.51,0.47
8,PTC_MR,GraphCL,DualBranch,G2G,JSD,,FeatureMasking,,RWSampling,,0.54,0.49
9,PTC_MR,GraphCL,DualBranch,G2G,JSD,,FeatureMasking,,NodeDropping,,0.49,0.4


# Saving

In [6]:
# output_file = "output.csv"
# output_file = "augmentation_benchmark_graphcl_infonce.csv"
output_file = "augmentation_benchmark_graphcl_jsd.csv"
output_path = osp.join(OUTPUT_FOLDER, output_file)
output_path

'./output\\augmentation_benchmark_graphcl_jsd.csv'

In [None]:
# Deleting output file
# DO NOT REMOVE BEFORE MAKING A COPY
# os.remove(OUTPUT_PATH)

In [7]:
# Adding rows
output_df.to_csv(output_path, mode='a', header=not osp.exists(output_path), index=False)