In [31]:
import torch
import numpy as np
from fairseq import checkpoint_utils, utils, options, tasks
from fairseq.logging import progress_bar
from fairseq.dataclass.utils import convert_namespace_to_omegaconf
import ogb
import sys
import os
from pathlib import Path
from sklearn.metrics import roc_auc_score, mean_squared_error, mean_absolute_percentage_error, mean_absolute_error

from os import path
# sys.path.append( path.dirname( path.dirname( path.abspath(__file__) ) ) )
import logging
from data_class import geo_Omsk, single_geo_Omsk, GraphormerPYGDataset_predict
import os.path as osp
from torch_geometric.data import Dataset
from functools import lru_cache
import torch_geometric.datasets
from ogb.graphproppred import PygGraphPropPredDataset
from ogb.lsc.pcqm4m_pyg import PygPCQM4MDataset
import pyximport
from torch_geometric.data import InMemoryDataset, download_url
import pandas as pd
from sklearn import preprocessing
pyximport.install(setup_args={'include_dirs': np.get_include()})
from torch_geometric.data import Data
import time
from torch_geometric.utils import add_self_loops, negative_sampling
import copy
from fairseq.data import (
    NestedDictionaryDataset,
    NumSamplesDataset,
)
sys.path.insert(2, '/home/jovyan/graphormer_v2/graphormer')
from pretrain import load_pretrained_model
from data.pyg_datasets.pyg_dataset import GraphormerPYGDataset
from data.dataset import (
    BatchedDataDataset,
    TargetDataset,
    GraphormerDataset)

def eval(args, use_pretrained, checkpoint_path=None, logger=None, data_name = None):
    cfg = convert_namespace_to_omegaconf(args)
    np.random.seed(cfg.common.seed)
    utils.set_torch_seed(cfg.common.seed)
    seed = 71
    
    ### data loading
    # name = 'omsk'
    if data_name == 'omsk':
        root = osp.join('dataset', data_name)
        raw_dir = osp.join(root, 'processed', 'data_omsk_1')
        data = single_geo_Omsk(root = raw_dir)
        GPYG = GraphormerPYGDataset_predict(data,seed,None,data,'omsk')
    batched_data = BatchedDataDataset(GPYG)
    data_sizes = np.array([128] * len(batched_data))
    dataset_total = NestedDictionaryDataset(
            {
                "nsamples": NumSamplesDataset(),
                "net_input": {"batched_data": batched_data},
                "target": batched_data,
            },
        sizes=data_sizes,
        )
    ###
    
    ### initialize task
    task = tasks.setup_task(cfg.task)
    model = task.build_model(cfg.model)
    batch_iterator = task.get_batch_iterator(
        dataset=dataset_total
    )
    itr = batch_iterator.next_epoch_itr(shuffle=False, set_dataset_epoch=False)
    progress = progress_bar.progress_bar(itr)
    ###
    
    ### load checkpoint
    model_state = torch.load(checkpoint_path)["model"]
    model.load_state_dict(model_state, strict=True, model_cfg=cfg.model)
    model.to(torch.cuda.current_device())
    del model_state
    ###
    
    ### prediction
    y_pred = []
    with torch.no_grad():
        model.eval()
        for i, sample in enumerate(progress):
            sample = utils.move_to_cuda(sample)
            y = model(**sample["net_input"])[:, 0, :].reshape(-1)
            y_pred.extend(y.detach().cpu())
            torch.cuda.empty_cache()
    ###
    
    # save predictions
    y_pred = torch.Tensor(y_pred)
    print(y_pred)
    
    return y_pred

 


def main():
    
    parser_dict = dict()
    parser_dict['num-atoms'] = str(6656)
    train_parser = options.get_training_parser()
    train_parser.add_argument(
            "--split",
            type=str,
        )
    train_parser.add_argument(
            "--dataset_name",
            type=str,
        )
    train_parser.add_argument(
            "--metric",
            type=str,
        )
    train_args = options.parse_args_and_arch(
        train_parser,
        [
            '--user-dir' , '../../graphormer',
            '--num-workers' , '10', 
            '--ddp-backend' , 'legacy_ddp', 
            '--dataset_name' , 'omsk', 
            '--dataset-source' , 'pyg', 
            '--num-atoms' , parser_dict['num-atoms'], 
            '--task' , 'graph_prediction', 
            '--criterion' , 'l1_loss', 
            '--arch' , 'graphormer_slim',
            '--num-classes' , '1', 
            '--batch-size' , '1', 
            '--save-dir' ,  '../../examples/georides/omsk/ckpts/',
            '--split' , 'valid', 
            '--metric' , 'rmse', 
        ]
    )
    args = train_args
    checkpoint_fname = 'checkpoint_best.pt'
    checkpoint_path = Path(args.save_dir) / checkpoint_fname
    y_preds = eval(args, False, checkpoint_path, None, args.dataset_name)
    return y_preds

In [32]:
y_preds = main()

2022-03-31 12:56:58 | INFO | graphormer.models.graphormer | Namespace(_name='graphormer_slim', act_dropout=0.0, activation_fn='gelu', all_gather_list_size=16384, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, apply_graphormer_init=True, arch='graphormer_slim', attention_dropout=0.1, azureml_logging=False, batch_size=1, batch_size_valid=1, best_checkpoint_metric='loss', bf16=False, bpe=None, broadcast_buffers=False, bucket_cap_mb=25, checkpoint_shard_count=1, checkpoint_suffix='', clip_norm=0.0, combine_valid_subsets=None, cpu=False, cpu_offload=False, criterion='l1_loss', curriculum=0, data_buffer_size=10, dataset_impl=None, dataset_name='omsk', dataset_source='pyg', ddp_backend='legacy_ddp', ddp_comm_hook='none', device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method=None, distributed_no_spawn=False, distributed_num_procs=1, distributed_port=-1, distributed_rank=0, distributed_world_size=1, dropout=0.1, edge_type='multi_h

idx 0
start find path dijkstra
dijkstra end with time 0.016715288162231445
start gen_edge_input dijkstra
gen_edge_input dijkstra end with time 0.010783910751342773
idx 1
start find path dijkstra
dijkstra end with time 0.03815126419067383
start gen_edge_input dijkstra
gen_edge_input dijkstra end with time 0.021805524826049805
idx 2
start find path dijkstra
dijkstra end with time 0.07062625885009766
start gen_edge_input dijkstra
gen_edge_input dijkstra end with time 0.057898759841918945
idx 3
start find path dijkstra
dijkstra end with time 0.009055376052856445
start gen_edge_input dijkstra
gen_edge_input dijkstra end with time 0.007422208786010742
idx 4
start find path dijkstra
dijkstra end with time 0.02142167091369629
start gen_edge_input dijkstra
gen_edge_input dijkstra end with time 0.01580667495727539
idx 5
start find path dijkstra
dijkstra end with time 0.16559171676635742
start gen_edge_input dijkstra
gen_edge_input dijkstra end with time 0.07932734489440918
idx 6
start find path 

In [33]:
y_preds

tensor([ 298.6819,  492.9816,  682.6969,  330.0759,  338.0249,  795.2327,
         347.4072,  320.3160, 1042.5276,  891.8836])

In [25]:
lolo = dict()
lolo['lev'] = 1
lolo.lev

AttributeError: 'dict' object has no attribute 'lev'

In [26]:
from fairseq import options

In [29]:
parser = dict()
parser['user-dir'] = '../../graphormer' 
parser['num-workers'] = 10 
parser['ddp-backend'] = 'legacy_ddp' 
parser['dataset_name'] = 'omsk' 
parser['dataset-source'] = 'pyg' 
parser['num-atoms'] = 6656 
parser['task'] = 'graph_prediction' 
parser['criterion'] = 'l1_loss' 
parser['arch'] = 'graphormer_slim'
parser['num-classes'] = 1 
parser['batch-size'] = 1 
parser['save-dir'] =  '../../examples/georides/omsk/ckpts/'
parser['split'] = 'valid' 
parser['metric'] = 'rmse' 
parser_2 = options.get_training_parser()

In [30]:
parser_2(parser)

TypeError: 'ArgumentParser' object is not callable

In [5]:
from fairseq import checkpoint_utils, utils, options, tasks

train_parser = options.get_training_parser()
train_parser.add_argument(
        "--split",
        type=str,
    )
train_parser.add_argument(
        "--dataset_name",
        type=str,
    )
train_parser.add_argument(
        "--metric",
        type=str,
    )
train_args = options.parse_args_and_arch(
    train_parser,
    [
        '--user-dir' , '../../graphormer',
        '--num-workers' , '10', 
        '--ddp-backend' , 'legacy_ddp', 
        '--dataset_name' , 'omsk', 
        '--dataset-source' , 'pyg', 
        '--num-atoms' , '6656', 
        '--task' , 'graph_prediction', 
        '--criterion' , 'l1_loss', 
        '--arch' , 'graphormer_slim',
        '--num-classes' , '1', 
        '--batch-size' , '1', 
        '--save-dir' ,  '../../examples/georides/omsk/ckpts/',
        '--split' , 'valid', 
        '--metric' , 'rmse', 
    ]
)


In [6]:
train_args

Namespace(act_dropout=0.0, activation_fn='gelu', all_gather_list_size=16384, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, apply_graphormer_init=True, arch='graphormer_slim', attention_dropout=0.1, azureml_logging=False, batch_size=1, batch_size_valid=1, best_checkpoint_metric='loss', bf16=False, bpe=None, broadcast_buffers=False, bucket_cap_mb=25, checkpoint_shard_count=1, checkpoint_suffix='', clip_norm=0.0, combine_valid_subsets=None, cpu=False, cpu_offload=False, criterion='l1_loss', curriculum=0, data_buffer_size=10, dataset_impl=None, dataset_name='omsk', dataset_source='pyg', ddp_backend='legacy_ddp', ddp_comm_hook='none', device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method=None, distributed_no_spawn=False, distributed_num_procs=1, distributed_port=-1, distributed_rank=0, distributed_world_size=1, dropout=0.1, edge_type='multi_hop', ema_decay=0.9999, ema_fp32=False, ema_seed_model=None, ema_start_update=0, ema_u

In [35]:
root = osp.join('dataset', 'omsk')
raw_dir = osp.join(root, 'processed', 'data_omsk_1')
data = single_geo_Omsk(root = raw_dir)

In [37]:
data[0]

Data(edge_attr=[53, 1], edge_index=[2, 53], x=[53, 13], y=[1])