In [26]:
from torch.nn.functional import normalize
from GraphAwareNestedCVEvaluation import GraphAwareNestedCVEvaluation
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from hyperopt import hp
import numpy as np
from tqdm.notebook import tqdm
from sklearn.linear_model import LogisticRegression
import shap
import torch
from NestedCV import index_to_mask
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

In [27]:
cora_dataset = Planetoid(root='data/', name='Cora', split="public")
cora_dataset.transform = T.NormalizeFeatures()
cora_dataset[0].edge_index = add_self_loops(cora_dataset[0].edge_index)[0]

In [32]:
def user_function(kwargs):
    return  kwargs["original_features"] + kwargs["summed_neighbors"]
    
class ModelSpace():
    def __init__(self, hop):
        self.space = None
        self.hop = hop
        self.initialize_space()

    def initialize_space(self):
        framework_choices = {
            'hops': [[self.hop]],
            'attention_config': [None],
            'user_function': [user_function],
            'n_jobs': [-1],
        }
         
        self.space = {
            **{key: hp.choice(key, value) for key, value in framework_choices.items()}
        }
        
    def add_choice(self, key, items):
        self.space[key] = hp.choice(key, items)
        
    def add_uniform(self, key, limits: tuple):
        self.space[key] = hp.uniform(key, limits[0], limits[1])
        
    def add_loguniform(self, key, limits: tuple):
        self.space[key] = hp.loguniform(key, np.log(limits[0]), np.log(limits[1]))
        
    def add_qloguniform(self, key, limits, q):
        self.space[key] = hp.qloguniform(key, low=np.log(limits[0]), high=np.log(limits[1]), q=q)

class LogitsticRegressionSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_loguniform('tol', [6e-3, 4e-2])
        self.add_uniform('C', [0, 10])
        self.add_choice('penalty', ["l2"])
        self.add_choice('max_iter', [1_000])
        self.add_choice('´n_jobs', [-1])
        return self.space  
        
class SVCSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_uniform('C', [0, 150])
        self.add_choice('gamma', ["scale", "auto"])
        self.add_choice('probability', [True])
        return self.space  

class DecisionTreeSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_choice("criterion", ["gini"])
        self.add_choice("max_depth", [None, *[i**2 for i in range(5, 10)]])
        self.add_uniform('max_samples', (0.0, 1.0))
        self.add_uniform('min_samples_leaf', (0.0, 1.0))
        self.add_uniform('min_samples_split', (0.0, 1.0))
        self.add_uniform('max_features', (0.0, 1.0))
        return self.space  

class XGBoostSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_choice('booster', ["gbtree"])
        self.add_choice('n_estimators', [1_400, 1_600, 1_800, 2_000])
        self.add_choice('max_depth', [None,2, 3,4])
        self.add_choice('max_delta_step', [1,2,3])
        self.add_choice('min_child_weight', [None, *list(range(1,5,1))])
        self.add_choice('tree_method', ["hist"])
        self.add_loguniform("eta", (0.05, 0.7))
        self.add_uniform("subsample", (0.6, 1))
        self.add_choice('n_jobs', [-1])
        
        self.add_loguniform("reg_lambda", (0.01, 100))
        self.add_loguniform("reg_alpha", (0.01, 100))
        self.add_uniform("gamma", (0, 0.8))
        self.add_uniform("colsample_bytree", (0.6, 1))
        return self.space  

class RandomForestSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_choice('n_estimators', [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000])
        self.add_choice('max_depth', [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None])
        self.add_choice('min_samples_split', [2, 5, 10])
        self.add_choice('criterion', ["gini", "entropy", "log_loss"])
        self.add_choice('n_jobs', [-1])

        self.add_uniform('max_samples', (0.0, 1.0))
        self.add_uniform('min_samples_leaf', (0.0, 1.0))
        self.add_uniform('min_samples_split', (0.0, 1.0))
        self.add_uniform('max_features', (0.0, 1.0))
        return self.space  

In [37]:
LogisticRegression

'LogisticRegression'

In [33]:
classifiers = [LogisticRegression, DecisionTreeClassifier, SVC, RandomForestClassifier, XGBClassifier]
spaces = [LogitsticRegressionSpace, DecisionTreeSpace, SVCSpace, RandomForestSpace, XGBoostSpace]

In [34]:
classifier_store = dict({})

In [22]:
for classifier_it, classifier in enumerate(classifiers):
    data = cora_dataset[0]
    hop_store = dict({})
    for hop in range(4):
        space = spaces[classifier_it](hop).get_space()
        graph_aware_nestedCV_evaluation = GraphAwareNestedCVEvaluation(2, classifier, data, max_evals= len(space.keys())*20)
        graph_aware_nestedCV_evaluation.nested_cross_validate(3, 3, space)
        hop_store[hop] = graph_aware_nestedCV_evaluation.nested_transd_cv
    classifier_store[classifier().__class__.__name__] = hop_store

0it [00:00, ?it/s]

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _

0it [00:00, ?it/s]

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or 

In [23]:
store

{0: 
         Using a 3 x 3 nested StratifiedKFold Cross-Validation, we obtain:
         0.3028 +- 0.0013.
 
         self.outer_scores: [0.30232558 0.30454042 0.30155211]
 
         self.best_params_per_fold: [{'C': 0.02807828170690385, 'attention_config': None, 'hops': (0,), 'n_jobs': -1, 'tol': 0.03811053576985138, 'user_function': <function user_function at 0x7fc815d1c670>}, {'C': 0.1078321834052487, 'attention_config': None, 'hops': (0,), 'n_jobs': -1, 'tol': 0.03718391336894381, 'user_function': <function user_function at 0x7fc815d1c670>}, {'C': 0.04514900656588816, 'attention_config': None, 'hops': (0,), 'n_jobs': -1, 'tol': 0.023154799476494976, 'user_function': <function user_function at 0x7fc815d1c670>}]
 
         self.best_models: []
 
         ,
 1: 
         Using a 3 x 3 nested StratifiedKFold Cross-Validation, we obtain:
         0.3024 +- 0.0008.
 
         self.outer_scores: [0.30232558 0.303433   0.30155211]
 
         self.best_params_per_fold: [{'C': 0.005596095732

