In [1]:
from torch.nn.functional import normalize
from GraphAwareNestedCVEvaluationInductive import GraphAwareNestedCVEvaluationInductive
from torch_geometric.datasets import PPI
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from hyperopt import hp
import numpy as np
from tqdm.notebook import tqdm
from sklearn.linear_model import LogisticRegression
import shap
import torch
from NestedCV import index_to_mask
from xgboost import XGBClassifier

  _torch_pytree._register_pytree_node(


In [2]:
dataset = PPI(root='data/PPI')
dataset.transform = T.NormalizeFeatures()
# for data in dataset:
#     data.edge_index = add_self_loops(data.edge_index)[0]

In [3]:
def user_function(kwargs):
    return  kwargs["original_features"] + kwargs["summed_neighbors"]
    
class ModelSpace():
    def __init__(self):
        self.space = None
        self.initialize_space()

    def initialize_space(self):
        framework_choices = {
            'hops': [[3]],
            'attention_config': [{'inter_layer_normalize': False,
    'use_pseudo_attention': True,
    'cosine_eps': 0.01,
    'dropout_attn': None}],
            'user_function': [user_function],
        }
         
        self.space = {
            **{key: hp.choice(key, value) for key, value in framework_choices.items()}
        }
        
    def add_choice(self, key, items):
        self.space[key] = hp.choice(key, items)
        
    def add_uniform(self, key, limits: tuple):
        self.space[key] = hp.uniform(key, limits[0], limits[1])
        
    def add_loguniform(self, key, limits: tuple):
        self.space[key] = hp.loguniform(key, np.log(limits[0]), np.log(limits[1]))
        
    def add_qloguniform(self, key, limits, q):
        self.space[key] = hp.qloguniform(key, low=np.log(limits[0]), high=np.log(limits[1]), q=q)

class XGBSpace(ModelSpace):
    def __init__(self):
        super().__init__()

    def get_space(self):
        self.add_choice("booster", ["gbtree"])
        self.add_choice("n_estimators", [1_400])
        self.add_choice("max_depth", [None])
        self.add_choice("max_delta_step", [1])
        self.add_choice("min_child_weight", [None])
        self.add_choice("device", ["cpu"])
        self.add_choice("tree_method", ["hist"])
        self.add_choice("scale_pos_weight", [2])

        self.add_choice("early_stopping_rounds", [10])
        self.add_choice("eval_metric", ["error"])
        
        self.add_loguniform("eta", (0.24, 0.24+0.01))
        self.add_loguniform("reg_lambda", (0.023, 0.023+0.01))
        self.add_loguniform("reg_alpha", (0.066, 0.066+0.01))

        self.add_uniform("subsample", (0.99, 1))
        self.add_uniform("gamma", (0.05, 0.05+0.01))
        self.add_uniform("colsample_bytree", (0.99, 1))
        return self.space   

In [4]:
xgb_space = XGBSpace()

In [15]:
store = dict({})

In [16]:
graph_aware_nestedCV_evaluation = GraphAwareNestedCVEvaluationInductive(0, XGBClassifier, dataset, max_evals= 1) #len(lr_space.get_space().keys())*20
graph_aware_nestedCV_evaluation.nested_cross_validate(5, 5, xgb_space.get_space())

0it [00:00, ?it/s]

  _torch_pytree._register_pytree_node(
  summed_exp_score = torch.zeros_like(exp_score).scatter(0, target,exp_score, reduce="add")
Total Trials: 1: 1 succeeded, 0 failed, 0 cancelled.                            
  _torch_pytree._register_pytree_node(
  summed_exp_score = torch.zeros_like(exp_score).scatter(0, target,exp_score, reduce="add")
Total Trials: 1: 1 succeeded, 0 failed, 0 cancelled.                            
  _torch_pytree._register_pytree_node(
  summed_exp_score = torch.zeros_like(exp_score).scatter(0, target,exp_score, reduce="add")
Total Trials: 1: 1 succeeded, 0 failed, 0 cancelled.                            
  _torch_pytree._register_pytree_node(
  summed_exp_score = torch.zeros_like(exp_score).scatter(0, target,exp_score, reduce="add")
Total Trials: 1: 1 succeeded, 0 failed, 0 cancelled.                            
  _torch_pytree._register_pytree_node(
  summed_exp_score = torch.zeros_like(exp_score).scatter(0, target,exp_score, reduce="add")
Total Trials: 1: 1 su

<NestedCV.NestedInductiveCV at 0x7fd74bdc6410>

In [17]:
graph_aware_nestedCV_evaluation.nested_inductive_cv.outer_scores.mean(), graph_aware_nestedCV_evaluation.nested_inductive_cv.outer_scores.std()

(0.9461399972963511, 0.017575904067168583)

In [11]:
graph_aware_nestedCV_evaluation.nested_inductive_cv.outer_scores.mean()

0.9502419058860636

In [12]:
graph_aware_nestedCV_evaluation.nested_inductive_cv.outer_scores.std()

0.0459691783473089

In [14]:
graph_aware_nestedCV_evaluation.nested_inductive_cv.inner_scores.mean(), graph_aware_nestedCV_evaluation.nested_inductive_cv.inner_scores.std()

(0.947062217740643, 0.04270757444487416)

In [10]:
##When a model is trained with early stopping, there is an inconsistent behavior between native Python interface and sklearn/R interfaces. By default on R and sklearn interfaces, the best_iteration is automatically used so prediction comes from the best model. But with the native Python interface xgboost.Booster.predict() and xgboost.Booster.inplace_predict() uses the full model. Users can use best_iteration attribute with iteration_range parameter to achieve the same behavior. Also the save_best parameter from xgboost.callback.EarlyStopping might be useful.