# Evaluation graph awareness along different algorithms

## Read data

In [35]:
from torch_geometric.datasets import Planetoid
from EnsembleFramework import Framework
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops

dataset_name = 'Cora'
split = "public"
dataset = Planetoid(root='/tmp/Cora', name=dataset_name, split=split)
dataset.transform = T.NormalizeFeatures()

features =  dataset[0].x
y =  dataset[0].y

test =  dataset[0].test_mask
train = dataset[0].train_mask
val =  dataset[0].val_mask

edge_index = dataset[0].edge_index 
edge_index = add_self_loops(edge_index)[0]

## Define Hyperparameter spaces

### Logistic regression hyperparameter space

In [80]:
from hyperopt import hp

lr_choices = {
    'penalty': ["l2"],
    'max_iter': [2**i for i in range(6, 15)],
}

lr_space = {
    **{key: hp.choice(key, value) for key, value in lr_choices.items()},
    'tol': hp.loguniform('tol', -11, -3),
    'C': hp.uniform('C', 0.0, 10)
}

### Support Vector classfiier hyperparameter space

In [81]:
from hyperopt import hp

svc_choices = {
    'gamma': ["scale", "auto"],
    "probability": [True]
}

svc_space = {
    **{key: hp.choice(key, value) for key, value in svc_choices.items()},
    'C': hp.uniform('C', 0.0, 150)
}

### Decision tree hyperparameter space

In [82]:
from hyperopt import hp

dt_choices = {
    'criterion': ["gini"],
    'max_depth': [None, *[i**2 for i in range(5, 10)]]
}

dt_space = {
    **{key: hp.choice(key, value) for key, value in dt_choices.items()},
    'min_samples_split': hp.uniform('min_samples_split', 0.0, 1.0),
    'min_samples_leaf': hp.uniform('min_samples_leaf', 0.0, .5),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0.0, 0.5),
    'max_features': hp.uniform('max_features', 0.0, 1.0),
}

### XGBoost hyperparameter space

In [83]:
from hyperopt import hp

booster_self = ["gbtree"]
n_estimators_self = [1_400, 1_600, 1_800, 2_000]
max_depth_self = [None,2, 3,4]
max_delta_step_self = [1,2,3]
min_child_weight_self = [None, *list(range(1,5,1))]

xb_choices = {
    'booster': booster_self,
    'n_estimators': n_estimators_self,
    'max_depth': max_depth_self,
    'max_delta_step': max_delta_step_self,
    'min_child_weight': min_child_weight_self,
    # 'device': ["cuda:2"],
    "tree_method": ["hist"]
}
 
xb_space = {
    **{key: hp.choice(key, value) for key, value in xb_choices.items()},
    'eta': hp.loguniform('eta', -3, -.4),
    'subsample': hp.uniform('subsample', 0.6, 1),
    'reg_lambda': hp.loguniform('reg_lambda',-5, 5),
    'reg_alpha': hp.loguniform('reg_alpha',-3, 1),
    'gamma': hp.uniform('gamma', 0, .8),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1),
}

### Random forest hyperparameter space

In [84]:
from hyperopt import hp

rf_choices = {
    'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000],
    'max_depth':  [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
    'min_samples_split': [2, 5, 10],
    "criterion":  ["gini", "entropy", "log_loss"]
}

rf_space = {
    **{key: hp.choice(key, value) for key, value in rf_choices.items()},
    'max_samples': hp.uniform('max_samples', 0.0, 1),
    'min_samples_leaf': hp.uniform('min_samples_leaf', 0.0, 1.0),
    'min_samples_split': hp.uniform('min_samples_split', 0.0, 1.0),
     'max_features': hp.uniform('max_features', 0.0, 1.0),
}

In [108]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

clfs_space = dict({})
clfs_space["RandomForestClassifier"] = rf_space
clfs_space["LogisticRegression"] = lr_space
clfs_space["DecisionTreeClassifier"] = dt_space
clfs_space["XGBClassifier"] = xb_space
clfs_space["SVC"] = svc_space

clfs = [RandomForestClassifier, LogisticRegression, DecisionTreeClassifier, XGBClassifier, SVC]
clfs = [RandomForestClassifier]

## Convert data in format for AutoTune

In [109]:
cora_set = dict({})
cora_set["X"] = features
cora_set["y"] = y
cora_set["test"] = test
cora_set["train"] = train
cora_set["val"] = val
cora_set["edge_index"] = edge_index

## Start AutoTune search

In [110]:
from AutoTune2 import AutoSearch
from sklearn.metrics import accuracy_score
from torch.nn.functional import normalize

def user_function(kwargs):
    return  normalize(kwargs["original_features"] + kwargs["summed_neighbors"], p=2.0, dim = 1)

searcher = AutoSearch(cora_set, max_evals=500, pred_metric = accuracy_score, parallelism=50)
hops = [0,1,2,3]
store = searcher.search(clfs, clfs_space, hops=hops, user_functions= [user_function],
                        attention_configs = [None])

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Total Trials: 500: 500 succeeded, 0 failed, 0 cancelled.                        


  0%|          | 0/1 [00:00<?, ?it/s]

Total Trials: 500: 500 succeeded, 0 failed, 0 cancelled.                        


  0%|          | 0/1 [00:00<?, ?it/s]

Total Trials: 500: 500 succeeded, 0 failed, 0 cancelled.                        


  0%|          | 0/1 [00:00<?, ?it/s]

Total Trials: 500: 500 succeeded, 0 failed, 0 cancelled.                        


## Print results

In [113]:
for clf in store:
    print(clf)
    for hop in store[clf]:
        print(str(hop) + "\t" + str( store[clf][hop]["test_acc"]))        

RandomForestClassifier
0	0.603
1	0.784
2	0.79
3	0.79
LogisticRegression
0	0.594
1	0.777
2	0.812
3	0.827
DecisionTreeClassifier
0	0.472
1	0.595
2	0.664
3	0.677
XGBClassifier
0	0.561
1	0.749
2	0.779
3	0.801
SVC
0	0.573
1	0.778
2	0.81
3	0.823


In [125]:
clf_hop_to_params = dict({})
for clf in store:
    print(50*"#")
    clf_hop_to_params[clf] = dict({})
    for hop in store[clf]:
        print(f"Classifier {clf} with {hop} hops:")
        print(store[clf][hop]["model"].get_params())
        clf_hop_to_params[clf][hop] = store[clf][hop]["model"].get_params()

##################################################
Classifier RandomForestClassifier with 0 hops:
{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 70, 'max_features': 0.017454264780728222, 'max_leaf_nodes': None, 'max_samples': 0.8144623806543172, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 0.001206120249848465, 'min_samples_split': 0.30634714958041454, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 1200, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
Classifier RandomForestClassifier with 1 hops:
{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 70, 'max_features': 0.02213797323722393, 'max_leaf_nodes': None, 'max_samples': 0.8904645788222673, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 0.01604692824272263, 'min_samples_split': 0.32146517860140683, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 1400, 'n_jobs': None, 'oob_score': False, 

In [127]:
import pandas as pd
pd.DataFrame.from_dict(clf_hop_to_params).to_csv("Hyperparameters_Cora_WithoutAttention.csv")