In [1]:
import sys
 
sys.path.insert(0, "../")

from EnsembleFramework import Framework

In [2]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops

dataset_name = 'Cora'
split = "public"
dataset = Planetoid(root='/tmp/Cora', name=dataset_name, split=split)
dataset.transform = T.NormalizeFeatures()

features =  dataset[0].x
y =  dataset[0].y

test =  dataset[0].test_mask
train = dataset[0].train_mask
val =  dataset[0].val_mask

edge_index = dataset[0].edge_index 
edge_index = add_self_loops(edge_index)[0]

In [3]:
from torch.nn.functional import normalize
def user_function(kwargs):
    return  normalize(kwargs["original_features"] + kwargs["summed_neighbors"], p=2.0, dim = 1)
hops_list = [0,1,2,3,4,5,6,7,8,9,10,15,20,30,40,50]
framework = Framework([user_function for i in hops_list], 
                     hops_list=hops_list, ## to obtain best for local neighborhood
                     clfs=[],
                     gpu_idx=0,
                     handle_nan=0.0,
                    attention_configs=[None for i in hops_list])
new_features_list = framework.get_features(features, edge_index, None)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import ParameterGrid
from tqdm.notebook import tqdm
from hyperopt import fmin, tpe, hp,STATUS_OK

criterion = ["gini"]
max_depth = [None, *[i**2 for i in range(5, 10)]]
max_features = [None, "sqrt", "auto"]


lr_choices = {
    'criterion': criterion,
    'max_depth': max_depth,
    'max_features': max_features
}

space = {
    **{key: hp.choice(key, value) for key, value in lr_choices.items()},
    'min_samples_split': hp.uniform('min_samples_split', 0.0, 1.0),
    'min_samples_leaf': hp.uniform('min_samples_leaf', 0.0, 1.0),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0.0, 1.0),
}

clf = DecisionTreeClassifier
hops_to_params=dict({})
hops_to_score=dict({})

for i, hops in tqdm(enumerate(hops_list)):
    new_features = new_features_list[i].cpu()
    
    def objective(params):
        score = 0
        try:
            model = clf(**params, random_state = 42)
            model.fit(new_features[train], y[train])
            y_pred = model.predict(new_features[val])
            score = accuracy_score(y[val], y_pred)
        except Exception:
            print(Exception)
            print(params)
            score = -100
        return {'loss': -score, 'status': STATUS_OK}
    
    best_params = fmin(objective, space, algo=tpe.suggest, max_evals=1_000, verbose=1)

    for key,value in lr_choices.items():
        best_params[key] = value[best_params[key]]

    best_model = clf(**best_params, random_state = 42)
    best_model.fit(new_features[train], y[train])
    y_pred = best_model.predict(new_features[val])
    
    hops_to_params[hops] = best_params
    hops_to_score[hops] = accuracy_score(y[val], y_pred)

0it [00:00, ?it/s]


  0%|                                                                          | 0/1000 [00:00<?, ?trial/s, best loss=?][A
[A                                                                                                                     <class 'Exception'>

  0%|                                                                          | 0/1000 [00:00<?, ?trial/s, best loss=?][A
[A                                                                                                                     {'criterion': 'gini', 'max_depth': 64, 'max_features': None, 'min_samples_leaf': 0.4801680288353287, 'min_samples_split': 0.9078399469592726, 'min_weight_fraction_leaf': 0.5705270867252872}

  0%|                                                                          | 0/1000 [00:00<?, ?trial/s, best loss=?][A
[A                                                                                                                     <class 'Exception'>

  0%|                             

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[A
 97%|██████████████████████████████████████████████████████████  | 967/1000 [01:14<00:02, 15.47trial/s, best loss: -0.7][A
 97%|██████████████████████████████████████████████████████████▏ | 969/1000 [01:14<00:02, 10.73trial/s, best loss: -0.7][A
 97%|██████████████████████████████████████████████████████████▎ | 971/1000 [01:15<00:02, 10.17trial/s, best loss: -0.7][A
 97%|██████████████████████████████████████████████████████████▍ | 973/1000 [01:15<00:02, 10.42trial/s, best loss: -0.7][A
[A                                                                                                                     <class 'Exception'>

 97%|██████████████████████████████████████████████████████████▍ | 973/1000 [01:15<00:02, 10.42trial/s, best loss: -0.7][A
[A                                                                                                                     {'criterion': 'gini', 'max_depth': 49, 'max_features': 'auto', 'min_samples_leaf': 0.22596995564398434, 'min_sample

In [None]:
import pandas as pd
df = pd.DataFrame(columns=["best_params"], index=hops_to_params.keys())
df["best_params"] = hops_to_params.values()
df.to_csv("normalized_origin_features_plus_summed_neighbors_wo_attn.csv")
df

In [None]:
hops_to_score

In [None]:
hops_to_params