In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

while 'notebooks' in os.getcwd():
    os.chdir('..')

from itertools import product
import pandas as pd
import torch
import torch.nn.functional as F
from torch_sparse.tensor import SparseTensor
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from sklearn.metrics import roc_auc_score
import logging
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.offline as pyo
import numpy as np

from src.train.gcn_node_classifier import GCNNodeClassifierTrainer
from src.torch_geo_models import GCN
from src.data.node_classifier.arxiv import load_dataset_pyg,\
    data_to_sparse_symmetric_pyg, get_edge_weights_pyg

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
edge_weights_path = 'data/structural_omega_graph_sage_cossim/04-1_gamma_scored_edges.csv'
log_file = 'logs/node_classifier/09-1-run_experiment_structural_omega_graphsage_cossim_norm_gcn_pyg'

In [4]:
logging.basicConfig(
    format='%(asctime)s - %(levelname)s : %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S',
    filename=log_file,
    filemode='a',
)

In [5]:
torch.cuda.is_available()

True

In [6]:
device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)
device

device(type='cuda', index=0)

## Data Loading

In [7]:
dataset = load_dataset_pyg()
data = data_to_sparse_symmetric_pyg(dataset[0])
adj_t = data.adj_t.to(device)
split_idx = dataset.get_idx_split()
weighted_adj_t = get_edge_weights_pyg(
    edge_weights_path,
    data.adj_t.size(0),
    device)

features = data.x.cuda()
labels = data.y.cuda()
train_mask = split_idx['train'].cuda()
val_mask = split_idx['valid'].cuda()
test_mask = split_idx['test'].cuda()
evaluator = Evaluator(name='ogbn-arxiv')

In [8]:
features = data.x.cuda()
labels = data.y.cuda()
train_mask = split_idx['train'].cuda()
val_mask = split_idx['valid'].cuda()
test_mask = split_idx['test'].cuda()

### Run Experiment

In [9]:
N_LAYERS = list(range(1, 4))
RUNS = list(range(1, 6))
EDGE_WEIGHTS = [adj_t, weighted_adj_t]

search_space = list(product(RUNS, N_LAYERS, EDGE_WEIGHTS))
print(len(search_space))
search_space[0]

30


(1,
 1,
 SparseTensor(row=tensor([     0,      0,      0,  ..., 169341, 169342, 169342], device='cuda:0'),
              col=tensor([   411,    640,   1162,  ..., 163274,  27824, 158981], device='cuda:0'),
              size=(169343, 169343), nnz=2315598, density=0.01%))

In [10]:
for run, n_layers, edges in search_space:
    trainer = GCNNodeClassifierTrainer(
        adj_t=edges,
        device=device,
        evaluator=evaluator,
        weights_source='structural_omega_graph_sage_cossim',
        n_layers=n_layers,
        input_dim=features.shape[1],
        hidden_channels=features.shape[1] * 2,
        output_dim=dataset.num_classes,
        run=run)
    trainer.train(
        features,
        labels,
        train_mask,
        val_mask,
        test_mask)

KeyboardInterrupt: 