In [1]:
from torch.nn.functional import normalize
from GraphAwareNestedCVEvaluation import GraphAwareNestedCVEvaluation
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from hyperopt import hp
import numpy as np
from tqdm.notebook import tqdm
from sklearn.linear_model import LogisticRegression
import shap
import torch
from NestedCV import index_to_mask
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from IPython.display import clear_output

  _torch_pytree._register_pytree_node(


In [2]:
cora_dataset = Planetoid(root='data/', name='Cora', split="public")
cora_dataset.transform = T.NormalizeFeatures()
cora_dataset[0].edge_index = add_self_loops(cora_dataset[0].edge_index)[0]

In [11]:
def user_function(kwargs):
    return  kwargs["original_features"] + kwargs["summed_neighbors"]
    
class ModelSpace():
    def __init__(self, hop):
        self.space = None
        self.hop = hop
        self.initialize_space()

    def initialize_space(self):
        framework_choices = {
            'hops': [[0, elf.hop]],
            'attention_config': [None],
            'user_function': [user_function],
            # 'n_jobs': [-1],
        }
         
        self.space = {
            **{key: hp.choice(key, value) for key, value in framework_choices.items()}
        }
        
    def add_choice(self, key, items):
        self.space[key] = hp.choice(key, items)
        
    def add_uniform(self, key, limits: tuple):
        self.space[key] = hp.uniform(key, limits[0], limits[1])
        
    def add_loguniform(self, key, limits: tuple):
        self.space[key] = hp.loguniform(key, np.log(limits[0]), np.log(limits[1]))
        
    def add_qloguniform(self, key, limits, q):
        self.space[key] = hp.qloguniform(key, low=np.log(limits[0]), high=np.log(limits[1]), q=q)

class LogitsticRegressionSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_loguniform('tol', [6e-3, 4e-2])
        self.add_uniform('C', [0, 10])
        self.add_choice('penalty', ["l2"])
        self.add_choice('max_iter', [1_000])
        self.add_choice('n_jobs', [-1])
        return self.space  
        
class SVCSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_uniform('C', [0, 150])
        self.add_choice('gamma', ["scale", "auto"])
        self.add_choice('probability', [True])
        return self.space  

class DecisionTreeSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_choice("criterion", ["gini"])
        self.add_choice("max_depth", [None, *[i**2 for i in range(5, 10)]])
        self.add_uniform('min_samples_leaf', (0.0, 1.0))
        self.add_uniform('min_samples_split', (0.0, 1.0))
        self.add_uniform('max_features', (0.0, 1.0))
        return self.space  

class XGBoostSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_choice('booster', ["gbtree"])
        self.add_choice('n_estimators', [1_400, 1_600, 1_800, 2_000])
        self.add_choice('max_depth', [None,2, 3,4])
        self.add_choice('max_delta_step', [1,2,3])
        self.add_choice('min_child_weight', [None, *list(range(1,5,1))])
        self.add_choice('tree_method', ["hist"])
        self.add_loguniform("eta", (0.05, 0.7))
        self.add_uniform("subsample", (0.6, 1))
        self.add_choice('n_jobs', [-1])
        
        self.add_loguniform("reg_lambda", (0.01, 100))
        self.add_loguniform("reg_alpha", (0.01, 100))
        self.add_uniform("gamma", (0, 0.8))
        self.add_uniform("colsample_bytree", (0.6, 1))
        return self.space  

class RandomForestSpace(ModelSpace):
    def __init__(self, hop):
        super().__init__(hop)

    def get_space(self):
        self.add_choice('n_estimators', [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000])
        self.add_choice('max_depth', [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None])
        self.add_choice('min_samples_split', [2, 5, 10])
        self.add_choice('criterion', ["gini", "entropy", "log_loss"])
        self.add_choice('n_jobs', [-1])

        self.add_uniform('max_samples', (0.0, 1.0))
        self.add_uniform('min_samples_leaf', (0.0, 1.0))
        self.add_uniform('min_samples_split', (0.0, 1.0))
        self.add_uniform('max_features', (0.0, 1.0))
        return self.space  

In [12]:
classifiers = [LogisticRegression, DecisionTreeClassifier, SVC, RandomForestClassifier, XGBClassifier]
spaces = [LogitsticRegressionSpace, DecisionTreeSpace, SVCSpace, RandomForestSpace, XGBoostSpace]

In [5]:
# classifier_store = dict({})

In [13]:
for classifier_it, classifier in tqdm(enumerate(classifiers)):
    if classifier_it == 0: continue
    data = cora_dataset[0]
    hop_store = dict({})
    for hop in tqdm(range(4)):
        space = spaces[classifier_it](hop).get_space()
        graph_aware_nestedCV_evaluation = GraphAwareNestedCVEvaluation(0, classifier, data, max_evals= len(space.keys())*20)
        graph_aware_nestedCV_evaluation.nested_cross_validate(3, 3, space)
        hop_store[hop] = graph_aware_nestedCV_evaluation.nested_transd_cv
        clear_output(wait=True)
        print(hop_store)
    print(classifier_it)
    classifier_store[classifier().__class__.__name__] = hop_store
    print(classifier_store)

{0: 
        Using a 3 x 3 nested StratifiedKFold Cross-Validation, we obtain:
        0.3021 +- 0.0004.

        self.outer_scores: [0.30232558 0.30232558 0.30155211]

        self.best_params_per_fold: [{'attention_config': None, 'criterion': 'entropy', 'hops': (0,), 'max_depth': 70, 'max_features': 0.4519470053357374, 'max_samples': 0.7471072864218784, 'min_samples_leaf': 0.6765427775627237, 'min_samples_split': 0.3959874048462251, 'n_estimators': 1400, 'n_jobs': -1, 'user_function': <function user_function at 0x7f484a738b80>}, {'attention_config': None, 'criterion': 'log_loss', 'hops': (0,), 'max_depth': 30, 'max_features': 0.5088390663474572, 'max_samples': 0.9202183024190197, 'min_samples_leaf': 0.7909067595511559, 'min_samples_split': 0.40540424546593223, 'n_estimators': 800, 'n_jobs': -1, 'user_function': <function user_function at 0x7f484a738b80>}, {'attention_config': None, 'criterion': 'gini', 'hops': (0,), 'max_depth': 50, 'max_features': 0.34957883670879675, 'max_samples':

0it [00:00, ?it/s]

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register

In [19]:
for key in classifier_store:
    print(key)
    for hop in classifier_store[key]:
        print(f"{key} with {hop} hops: {classifier_store[key][hop].outer_scores.mean():.4f}+-{classifier_store[key][hop].outer_scores.std():.4f} ({np.array(classifier_store[key][hop].train_times).mean():.4f})")

LogisticRegression
LogisticRegression with 0 hops: 0.3024+-0.0008 (0.5357)
LogisticRegression with 1 hops: 0.3080+-0.0071 (0.5519)
LogisticRegression with 2 hops: 0.4165+-0.0442 (0.7433)
LogisticRegression with 3 hops: 0.7116+-0.0651 (1.5062)
DecisionTreeClassifier
DecisionTreeClassifier with 0 hops: 0.3021+-0.0004 (0.0161)
DecisionTreeClassifier with 1 hops: 0.3113+-0.0080 (0.0319)
DecisionTreeClassifier with 2 hops: 0.3209+-0.0218 (0.0583)
DecisionTreeClassifier with 3 hops: 0.3021+-0.0004 (0.0817)
SVC
SVC with 0 hops: 0.1200+-0.0234 (10.4316)
SVC with 1 hops: 0.4484+-0.1319 (10.3249)
SVC with 2 hops: 0.6695+-0.0233 (10.1393)
SVC with 3 hops: 0.6418+-0.0233 (10.2694)


In [16]:
classifier_store["LogisticRegression"]

{0: 
         Using a 3 x 3 nested StratifiedKFold Cross-Validation, we obtain:
         0.3024 +- 0.0008.
 
         self.outer_scores: [0.30232558 0.303433   0.30155211]
 
         self.best_params_per_fold: [{'C': 0.030835896394627707, 'attention_config': None, 'hops': (0,), 'max_iter': 1000, 'n_jobs': -1, 'penalty': 'l2', 'tol': 0.020174708960991657, 'user_function': <function user_function at 0x7f46cefd6290>}, {'C': 0.07096243793877666, 'attention_config': None, 'hops': (0,), 'max_iter': 1000, 'n_jobs': -1, 'penalty': 'l2', 'tol': 0.021475448823134374, 'user_function': <function user_function at 0x7f46cefd6290>}, {'C': 0.035759525562881145, 'attention_config': None, 'hops': (0,), 'max_iter': 1000, 'n_jobs': -1, 'penalty': 'l2', 'tol': 0.01764981478570845, 'user_function': <function user_function at 0x7f46cefd6290>}]
 
         self.best_models: []
 
         ,
 1: 
         Using a 3 x 3 nested StratifiedKFold Cross-Validation, we obtain:
         0.3080 +- 0.0071.
 
         self