In [1]:
from torch.nn.functional import normalize
from GraphAwareNestedCVEvaluation import GraphAwareNestedCVEvaluation
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from hyperopt import hp
import numpy as np
from tqdm.notebook import tqdm
from sklearn.linear_model import LogisticRegression

  _torch_pytree._register_pytree_node(


In [2]:
dataset_name = 'Cora'
split = "public"
dataset = Planetoid(root='data/', name=dataset_name, split=split)
dataset.transform = T.NormalizeFeatures()
dataset[0].edge_index = add_self_loops(dataset[0].edge_index)[0]

In [3]:
def norm_user_function(kwargs):
    return  normalize(kwargs["original_features"] + kwargs["summed_neighbors"], p=2.0, dim = 1)
    
def user_function(kwargs):
    return  kwargs["original_features"] + kwargs["summed_neighbors"]
    
class ModelSpace():
    def __init__(self):
        self.space = None
        self.initialize_space()

    def initialize_space(self):
        framework_choices = {
            'hops': [[0, 3, 8], [0, 1, 2]],
            'attention_config': [None,{'inter_layer_normalize': False,
                     'use_pseudo_attention':True,
                     'cosine_eps':.01,
                     'dropout_attn': None}, 
                     {'inter_layer_normalize': True,
                     'use_pseudo_attention':True,
                     'cosine_eps':.01,
                     'dropout_attn': None},
                     {'inter_layer_normalize': True,
                     'use_pseudo_attention':True,
                     'cosine_eps':.001,
                     'dropout_attn': None}],
            'user_function': [norm_user_function, user_function],
        }
         
        self.space = {
            **{key: hp.choice(key, value) for key, value in framework_choices.items()}
        }
        
    def add_choice(self, key, items):
        self.space[key] = hp.choice(key, items)
        
    def add_uniform(self, key, limits: tuple):
        self.space[key] = hp.uniform(key, limits[0], limits[1])
        
    def add_loguniform(self, key, limits: tuple):
        self.space[key] = hp.loguniform(key, np.log(limits[0]), np.log(limits[1]))
        
    def add_qloguniform(self, key, limits, q):
        self.space[key] = hp.qloguniform(key, low=np.log(limits[0]), high=np.log(limits[1]), q=q)

class LogitsticRegressionSpace(ModelSpace):
    def __init__(self):
        super().__init__()

    def get_space(self):
        self.add_loguniform('tol', [6e-3, 4e-2])
        self.add_uniform('C', [0, 10])
        return self.space    

In [4]:
data = dataset[0]

In [5]:
lr_space = LogitsticRegressionSpace()

In [7]:
graph_aware_nestedCV_evaluation = GraphAwareNestedCVEvaluation(2, LogisticRegression, data, max_evals= len(lr_space.get_space().keys())*20)
graph_aware_nestedCV_evaluation.nested_cross_validate(3, 3, lr_space.get_space())

0it [00:00, ?it/s]

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/07/08 10:52:46 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
  _torch_pytree._register_pytree_node(
  summed_exp_score = torch.zeros_like(exp_score).scatter(0, target,exp_score, reduce="add")
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _torch_pytree._register_pytree_node(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.ht


        Using a 3 x 3 nested StratifiedKFold Cross-Validation, we obtain:
        0.8826 +- 0.0070.

        self.outer_scores: [0.87264673 0.88815061 0.88691796]

        self.best_params_per_fold: [{'C': 6.486700942674761, 'attention_config': {'cosine_eps': 0.01, 'dropout_attn': None, 'inter_layer_normalize': False, 'use_pseudo_attention': True}, 'hops': (0, 3, 8), 'tol': 0.009164585326269714, 'user_function': <function norm_user_function at 0x7f0cd5c45630>}, {'C': 8.261013574098602, 'attention_config': {'cosine_eps': 0.01, 'dropout_attn': None, 'inter_layer_normalize': False, 'use_pseudo_attention': True}, 'hops': (0, 3, 8), 'tol': 0.019315059119996952, 'user_function': <function user_function at 0x7f0cd5e15480>}, {'C': 7.62433418734091, 'attention_config': None, 'hops': (0, 3, 8), 'tol': 0.015430834996030181, 'user_function': <function norm_user_function at 0x7f0cd5c45630>}]

        self.best_models: [<EnsembleFramework.Framework object at 0x7f0cd5e22890>, <EnsembleFramework.Fram

In [8]:
graph_aware_nestedCV_evaluation.nested_transd_cv.outer_scores.mean()

0.8825717674271276

In [9]:
graph_aware_nestedCV_evaluation.nested_transd_cv.inner_scores

array([[0.87541528, 0.87209302, 0.85524126],
       [0.86710963, 0.85548173, 0.85191348],
       [0.87873754, 0.87873754, 0.89036545]])

In [10]:
graph_aware_nestedCV_evaluation.nested_transd_cv.best_params_per_fold

[{'C': 6.486700942674761,
  'attention_config': {'cosine_eps': 0.01,
   'dropout_attn': None,
   'inter_layer_normalize': False,
   'use_pseudo_attention': True},
  'hops': (0, 3, 8),
  'tol': 0.009164585326269714,
  'user_function': <function __main__.norm_user_function(kwargs)>},
 {'C': 8.261013574098602,
  'attention_config': {'cosine_eps': 0.01,
   'dropout_attn': None,
   'inter_layer_normalize': False,
   'use_pseudo_attention': True},
  'hops': (0, 3, 8),
  'tol': 0.019315059119996952,
  'user_function': <function __main__.user_function(kwargs)>},
 {'C': 7.62433418734091,
  'attention_config': None,
  'hops': (0, 3, 8),
  'tol': 0.015430834996030181,
  'user_function': <function __main__.norm_user_function(kwargs)>}]

In [14]:
print(graph_aware_nestedCV_evaluation.nested_transd_cv)


        Using a 3 x 3 nested StratifiedKFold Cross-Validation, we obtain:
        0.8826 +- 0.0070.

        self.outer_scores: [0.87264673 0.88815061 0.88691796]

        self.best_params_per_fold: [{'C': 6.486700942674761, 'attention_config': {'cosine_eps': 0.01, 'dropout_attn': None, 'inter_layer_normalize': False, 'use_pseudo_attention': True}, 'hops': (0, 3, 8), 'tol': 0.009164585326269714, 'user_function': <function norm_user_function at 0x7f0cd5c45630>}, {'C': 8.261013574098602, 'attention_config': {'cosine_eps': 0.01, 'dropout_attn': None, 'inter_layer_normalize': False, 'use_pseudo_attention': True}, 'hops': (0, 3, 8), 'tol': 0.019315059119996952, 'user_function': <function user_function at 0x7f0cd5e15480>}, {'C': 7.62433418734091, 'attention_config': None, 'hops': (0, 3, 8), 'tol': 0.015430834996030181, 'user_function': <function norm_user_function at 0x7f0cd5c45630>}]

        self.best_models: [<EnsembleFramework.Framework object at 0x7f0cd5e22890>, <EnsembleFramework.Fram

In [12]:
graph_aware_nestedCV_evaluation.nested_transd_cv.best_models

[<EnsembleFramework.Framework at 0x7f0cd5e22890>,
 <EnsembleFramework.Framework at 0x7f0cd5e18910>,
 <EnsembleFramework.Framework at 0x7f0d8d104460>]