In [1]:
import sys
 
sys.path.insert(0, "../")

from EnsembleFramework import Framework

In [2]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops

dataset_name = 'Cora'
split = "public"
dataset = Planetoid(root='/tmp/Cora', name=dataset_name, split=split)
dataset.transform = T.NormalizeFeatures()

features =  dataset[0].x
y =  dataset[0].y

test =  dataset[0].test_mask
train = dataset[0].train_mask
val =  dataset[0].val_mask

edge_index = dataset[0].edge_index 
edge_index = add_self_loops(edge_index)[0]

In [3]:
from torch.nn.functional import normalize
def user_function(kwargs):
    return  normalize(kwargs["original_features"] + kwargs["summed_neighbors"], p=2.0, dim = 1)
    
hops_list = [0,1,2,3,4,5,6,7,8,9,10,15,20,30,40,50]
framework = Framework([user_function for i in hops_list], 
                     hops_list=hops_list, ## to obtain best for local neighborhood
                     clfs=[],
                     gpu_idx=0,
                     handle_nan=0.0,
                    attention_configs=[None for i in hops_list])
new_features_list = framework.get_features(features, edge_index, None)

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import ParameterGrid
from tqdm.notebook import tqdm
from hyperopt import fmin, tpe, hp,STATUS_OK

n_estimators = [50, 100,150, 200, 400, 500, 800, 1_000, 1_200]
max_depth = [None, 2,3,4,5,8,10]
# max_delta_step = [None, 1, 2, 3, 4]

lr_choices = {
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    # 'max_delta_step': max_delta_step,
}

space = {
    **{key: hp.choice(key, value) for key, value in lr_choices.items()},
    'min_child_weight': hp.uniform('min_child_weight', 0, 5),
    'eta': hp.loguniform('eta', -3, -.3),
    'subsample': hp.uniform('subsample', 0.6, 1),
    'reg_lambda': hp.uniform('reg_lambda', 0, 1),
    'alpha': hp.uniform('alpha', 0, 1),
}

clf = XGBClassifier
hops_to_params=dict({})
hops_to_score=dict({})

for i, hops in tqdm(enumerate(hops_list)):
    new_features = new_features_list[i].cpu()
    
    def objective(params):
        score = 0
        try:
            model = clf(**params, random_state = 42,  tree_method = "hist", device="cuda:0")
            model.fit(new_features[train], y[train])
            y_pred = model.predict(new_features[val])
            score = accuracy_score(y[val], y_pred)
        except Exception:
            print(Exception)
            print(params)
            score = -100
        return {'loss': -score, 'status': STATUS_OK}
    
    best_params = fmin(objective, space, algo=tpe.suggest, max_evals=1_000, verbose=1)

    for key,value in lr_choices.items():
        best_params[key] = value[best_params[key]]

    best_model = clf(**best_params, random_state = 42,  tree_method = "hist", device="cuda:0")
    best_model.fit(new_features[train], y[train])
    y_pred = best_model.predict(new_features[val])
    
    hops_to_params[hops] = best_params
    hops_to_score[hops] = accuracy_score(y[val], y_pred)

0it [00:00, ?it/s]


  0%|                                                                          | 0/1000 [00:00<?, ?trial/s, best loss=?][A
  0%|                                                            | 1/1000 [00:01<28:32,  1.71s/trial, best loss: -0.376][A
  0%|                                                             | 2/1000 [00:02<16:11,  1.03trial/s, best loss: -0.52][A
  0%|▏                                                           | 3/1000 [00:03<19:30,  1.17s/trial, best loss: -0.546][A
  0%|▏                                                           | 4/1000 [00:03<13:45,  1.21trial/s, best loss: -0.546][A
  0%|▎                                                           | 5/1000 [00:06<22:39,  1.37s/trial, best loss: -0.546][A
  1%|▎                                                           | 6/1000 [00:07<21:10,  1.28s/trial, best loss: -0.546][A
  1%|▍                                                           | 7/1000 [00:08<19:17,  1.17s/trial, best loss: -0.546][A
  1%|▍ 

In [None]:
import pandas as pd
df = pd.DataFrame(columns=["best_params"], index=hops_to_params.keys())
df["best_params"] = hops_to_params.values()
df.to_csv("normalized_origin_features_plus_summed_neighbors_wo_attn.csv")
df

In [5]:
hops_to_params

{0: {'alpha': 0.057112352931998595,
  'eta': 0.224759127835811,
  'max_depth': 4,
  'min_child_weight': 0.005455188525435611,
  'n_estimators': 500,
  'reg_lambda': 0.039123850816122165,
  'subsample': 0.6794312906906859},
 2: {'alpha': 0.0009390531976701372,
  'eta': 0.4935669432837852,
  'max_depth': 5,
  'min_child_weight': 0.0006811039486869647,
  'n_estimators': 500,
  'reg_lambda': 0.8817904171932709,
  'subsample': 0.8206963061299576},
 5: {'alpha': 0.03379312127278378,
  'eta': 0.1478835693028956,
  'max_depth': 5,
  'min_child_weight': 0.17793682726294463,
  'n_estimators': 1200,
  'reg_lambda': 0.062085093776013,
  'subsample': 0.639411536456597},
 10: {'alpha': 0.015029180611270726,
  'eta': 0.197321912545624,
  'max_depth': 10,
  'min_child_weight': 0.13865256338574472,
  'n_estimators': 1000,
  'reg_lambda': 0.15468170829057004,
  'subsample': 0.6080697474708397},
 15: {'alpha': 0.12674650663427506,
  'eta': 0.14367520667786737,
  'max_depth': 8,
  'min_child_weight': 0.61

In [6]:
hops_to_score

{0: 0.566, 2: 0.752, 5: 0.798, 10: 0.796, 15: 0.8, 20: 0.806}

In [None]:
raise Exception

In [None]:
hops_to_score

In [None]:
hops_to_params

In [None]:
##TODO Unnormalize!!!