In [1]:
import sys

sys.path.append("./src")

from pipeline import GCLPipeline

import os
import os.path as osp
import logging
import json
import pandas as pd

import torch
import torch_geometric.transforms as T

from torch.optim import Adam

from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import logging
# from importlib import reload
# reload(logging)
logger = logging.getLogger(__name__)
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S')


DATA_PATH = "./data"
BENCHMARK_PATH = "./benchmark"

OUTPUT_FOLDER = "./output"
OUTPUT_FILE = "output.csv"
OUTPUT_PATH = osp.join(OUTPUT_FOLDER, OUTPUT_FILE)

In [2]:
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "benchmark.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "benchmark-infograph.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "benchmark-dgi.json")))
STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "benchmark-graphcl.json")))
# STRATEGIES["strategies"]

In [3]:
CONFIG = STRATEGIES["config"]
TRY_GPU = CONFIG["try_gpu"]
DEVICE = torch.device("cuda" if torch.cuda.is_available() and TRY_GPU else "cpu")

In [4]:
import GCL.augmentors as A

# A.RandomChoice([A.Identity, A.NodeDropping])
A.Identity().__class__.__name__

'Identity'

In [13]:
outputs = []

for strategy in STRATEGIES["strategies"]:

    print("\n")

    # Initialization
    pipeline = GCLPipeline.from_strategy(strategy, DEVICE)

    # Data
    dataset, num_features = GCLPipeline.init_dataset(
        strategy["dataset"], DATA_PATH, T.NormalizeFeatures(), strategy["batch_size"]
    )

    # Encoder
    encoder_params = strategy["encoder_params"]
    encoder_params["input_dim"] = num_features
    encoder_model = pipeline.init_encoder(encoder_params, DEVICE)

    # Training
    train_params = strategy["train_params"]
    lr = train_params["learning_rate"]
    epochs = train_params["epochs"]

    optimizer = Adam(encoder_model.parameters(), lr=lr)
    losses = []

    with tqdm(total=epochs, desc="(T)") as pbar:
        for epoch in tqdm(range(1, epochs + 1)):
            loss = pipeline.train_epoch(encoder_model, dataset, optimizer, DEVICE)
            pbar.set_postfix({"loss": loss})

            pbar.update()

    test_result = pipeline.test(encoder_model, dataset, DEVICE)
    print(f'(E): Best test F1Mi={test_result["micro_f1"]:.4f}, F1Ma={test_result["macro_f1"]:.4f}')

    outputs.append(
        {
            "Data": strategy["dataset"],
            "Method": strategy["method"],
            "Archi": strategy["architecture"],
            "Mode": strategy["mode"],
            "Obj": strategy["objective"],
            "Neg": strategy["negative"],
            "Aug1": (
                strategy["augmentation1"]["name"]
                if isinstance(strategy["augmentation1"], dict)
                else ', '.join([a["name"] for a in strategy["augmentation1"]])
            ),
            "Aug1Strat": strategy["augmentation1_strat"],
            "Aug2": (
                strategy["augmentation2"]["name"]
                if isinstance(strategy["augmentation2"], dict)
                else ', '.join([a["name"] for a in strategy["augmentation2"]])
            ),
            "Aug2Strat": strategy["augmentation2_strat"],
            "MicroF1": test_result["micro_f1"].round(2),
            "MacroF1": test_result["macro_f1"].round(2),
        }
    )

07:25:40 INFO:##### GraphCL #####
07:25:40 INFO:	 Augmentation strategy 1: None
07:25:40 INFO:	 Augmentation strategy 2: Random
07:25:40 INFO:CALL GCLPipeline.init_augmentations
07:25:40 INFO:	 Strategy: None
07:25:40 INFO:CALL GCLPipeline.init_augmentation
07:25:40 INFO:	 Name: Identity
07:25:40 INFO:CALL GCLPipeline.init_augmentations
07:25:40 INFO:	 Strategy: Random
07:25:40 INFO:CALL GCLPipeline.init_augmentation
07:25:40 INFO:	 Name: RWSampling
07:25:40 INFO:CALL GCLPipeline.init_augmentation
07:25:40 INFO:	 Name: NodeDropping
07:25:40 INFO:CALL GCLPipeline.init_augmentation
07:25:40 INFO:	 Name: FeatureMasking
07:25:40 INFO:CALL GCLPipeline.init_augmentation
07:25:40 INFO:	 Name: EdgeRemoving
07:25:40 INFO:Augmentations: [<GCL.augmentors.identity.Identity object at 0x000002231F68CD50>, <GCL.augmentors.augmentor.RandomChoice object at 0x000002231F8F8990>]
07:25:40 INFO:CALL GCLPipeline.init_dataset
07:25:40 DEBUG:open file: c:/Users/alexa/Documents/Visual Studio Code/graph-contras





100%|██████████| 100/100 [00:07<00:00, 12.59it/s]t/s, loss=3.43]
(T): 100%|██████████| 100/100 [00:07<00:00, 12.59it/s, loss=3.43]


(E): Best test F1Mi=0.5143, F1Ma=0.4875


In [14]:
output_df = pd.DataFrame(outputs)
output_df

Unnamed: 0,Data,Method,Archi,Mode,Obj,Neg,Aug1,Aug1Strat,Aug2,Aug2Strat,MicroF1,MacroF1
0,PTC_MR,GraphCL,DualBranch,G2G,InfoNCE,,Identity,,"RWSampling, NodeDropping, FeatureMasking, Edge...",Random,0.51,0.49


In [16]:
# Deleting output file
# os.remove(OUTPUT_PATH)

In [15]:
# Adding rows
output_df.to_csv(OUTPUT_PATH, mode='a', header=not osp.exists(OUTPUT_PATH), index=False)