In [10]:
import sys

sys.path.append("./src")

from pipeline import GCLPipeline

import os
import os.path as osp
import json
import pandas as pd

import torch
import torch_geometric.transforms as T

from torch.optim import Adam

from tqdm import tqdm

import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

DATA_PATH = "./data"
BENCHMARK_PATH = "./benchmark"

OUTPUT_FOLDER = "./output"
OUTPUT_FILE = "output.csv"
OUTPUT_PATH = osp.join(OUTPUT_FOLDER, OUTPUT_FILE)

In [18]:
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "benchmark.json")))
# STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "infograph_benchmark.json")))
STRATEGIES = json.load(open(osp.join(BENCHMARK_PATH, "dgi_benchmark.json")))

In [19]:
CONFIG = STRATEGIES["config"]
TRY_GPU = CONFIG["try_gpu"]
DEVICE = torch.device("cuda" if torch.cuda.is_available() and TRY_GPU else "cpu")

In [20]:
outputs = []

for strategy in STRATEGIES["strategies"]:

    print("\n")

    # Initialization
    pipeline = GCLPipeline.from_strategy(strategy, DEVICE)

    # Data
    dataset, num_features = GCLPipeline.init_dataset(
        strategy["dataset"], DATA_PATH, T.NormalizeFeatures(), strategy["batch_size"]
    )
    
    # Encoder
    encoder_params = strategy["encoder_params"]
    encoder_params["input_dim"] = num_features
    encoder_model = pipeline.init_encoder(encoder_params, DEVICE)

    # Training
    train_params = strategy["train_params"]
    lr = train_params["learning_rate"]
    epochs = train_params["epochs"]

    optimizer = Adam(encoder_model.parameters(), lr=lr)
    losses = []

    with tqdm(total=epochs, desc='(T)') as pbar:
        for epoch in tqdm(range(1, epochs+1)):
            loss = pipeline.train_epoch(
                encoder_model, dataset, optimizer, DEVICE
            )
            pbar.set_postfix({'loss': loss})

            pbar.update()

    test_result = pipeline.test(encoder_model, dataset, DEVICE)
    print(f'(E): Best test F1Mi={test_result["micro_f1"]:.4f}, F1Ma={test_result["macro_f1"]:.4f}')

    outputs.append({
        "Data": strategy["dataset"],
        "Method": strategy["method"],
        "Archi": strategy["architecture"],
        "Mode": strategy["mode"],
        "Obj": strategy["objective"],
        "Neg": strategy["negative"],

        "Aug1": strategy["augmentation1"],
        "Aug1Strat": strategy["augmentation1_strat"], 
        "Aug2": strategy["negative"],
        "Aug2Strat": strategy["negative"], 
        "MicroF1": test_result["micro_f1"].round(2),
        "MacroF1": test_result["macro_f1"].round(2)
    })





##### TransductiveDGI #####
Dataset initialization
	 # features: 1433
Encoder initialization
	 input dim: 1433
	 hidden dim: 512
	 # layers: 2
	 projection dim: None
	 activation: None


100%|██████████| 300/300 [00:12<00:00, 24.77it/s]it/s, loss=-1.34]  
(T): 100%|██████████| 300/300 [00:12<00:00, 24.77it/s, loss=-1.34]
(LR): 100%|██████████| 5000/5000 [00:16<00:00, best test F1Mi=0.831, F1Ma=0.775]

(E): Best test F1Mi=0.8309, F1Ma=0.7747





In [21]:
output_df = pd.DataFrame(outputs)
output_df

Unnamed: 0,Data,Method,Archi,Mode,Obj,Neg,Aug1,Aug1Strat,Aug2,Aug2Strat,MicroF1,MacroF1
0,Cora,TransductiveDGI,SingleBranch,G2L,JSD,,,Compose,,,0.83,0.77


In [16]:
# Deleting output file
# os.remove(OUTPUT_PATH)

In [22]:
# Adding rows
output_df.to_csv(OUTPUT_PATH, mode='a', header=not osp.exists(OUTPUT_PATH), index=False)