In [16]:
!pip install torch_geometric
!pip install hyperopt
!pip install tqdm
!pip install -U ipywidgets
!pip install -U jupyter

Collecting jupyter
  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting qtconsole (from jupyter)
  Downloading qtconsole-5.5.0-py3-none-any.whl.metadata (5.1 kB)
Collecting jupyter-console (from jupyter)
  Downloading jupyter_console-6.6.3-py3-none-any.whl (24 kB)
Collecting qtpy>=2.4.0 (from qtconsole->jupyter)
  Downloading QtPy-2.4.1-py3-none-any.whl.metadata (12 kB)
Downloading qtconsole-5.5.0-py3-none-any.whl (123 kB)
   ---------------------------------------- 123.4/123.4 kB 1.8 MB/s eta 0:00:00
Downloading QtPy-2.4.1-py3-none-any.whl (93 kB)
   ---------------------------------------- 93.5/93.5 kB 5.6 MB/s eta 0:00:00
Installing collected packages: qtpy, qtconsole, jupyter-console, jupyter
Successfully installed jupyter-1.0.0 jupyter-console-6.6.3 qtconsole-5.5.0 qtpy-2.4.1


In [1]:
import sys
 
sys.path.insert(0, "../")

from EnsembleFramework import Framework

In [2]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops

dataset_name = 'Cora'
split = "public"
dataset = Planetoid(root='/tmp/Cora', name=dataset_name, split=split)
dataset.transform = T.NormalizeFeatures()

features =  dataset[0].x
y =  dataset[0].y

test =  dataset[0].test_mask
train = dataset[0].train_mask
val =  dataset[0].val_mask

edge_index = dataset[0].edge_index 
edge_index = add_self_loops(edge_index)[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [3]:
from torch.nn.functional import normalize
def user_function(kwargs):
    return  normalize(kwargs["original_features"] + kwargs["summed_neighbors"], p=2.0, dim = 1)
    
hops_list = [2,3] #[0,1,2,3,4,5,6,7,8,9,10,15,20,30,40,50]
framework = Framework([user_function for i in hops_list], 
                     hops_list=hops_list, ## to obtain best for local neighborhood
                     clfs=[],
                     gpu_idx=None,
                     handle_nan=0.0,
                    attention_configs=[None for i in hops_list])
new_features_list = framework.get_features(features, edge_index, None)

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import ParameterGrid
from hyperopt import fmin, tpe, hp,STATUS_OK

n_estimators = [50, 100,150, 200, 400, 500, 800, 1_000, 1_200]
max_depth = [None, 2,3,4,5,8,10]
max_leaf_nodes = [None, 50, 100, 200]
# bootrtrap = [True, False]
# oob_score = [True, False]

lr_choices = {
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'max_leaf_nodes': max_leaf_nodes,
    # 'bootrtrap': bootrtrap,
    # 'oob_score': oob_score,
}

space = {
    **{key: hp.choice(key, value) for key, value in lr_choices.items()},
    'max_samples': hp.uniform('max_samples', 0.6, 0.8),
    'min_samples_leaf': hp.uniform('min_samples_leaf', 0.0, 0.8),
}

clf = RandomForestClassifier
hops_to_params=dict({})
hops_to_score=dict({})

for i, hops in enumerate(hops_list):
    print(f"Process: {i*100/len(hops_list)} %")
    new_features = new_features_list[i].cpu()
    
    def objective(params):
        score = 0
        try:
            model = clf(**params, random_state = 42,  n_jobs=48)
            model.fit(new_features[train], y[train])
            y_pred = model.predict(new_features[val])
            score = accuracy_score(y[val], y_pred)
        except Exception as e:
            print(e)
            print(params)
            score = -100
        return {'loss': -score, 'status': STATUS_OK}
    
    best_params = fmin(objective, space, algo=tpe.suggest, max_evals=1_000, verbose=False)

    for key,value in lr_choices.items():
        best_params[key] = value[best_params[key]]

    best_model = clf(**best_params, random_state = 42,  n_jobs=-1)
    best_model.fit(new_features[train], y[train])
    y_pred = best_model.predict(new_features[val])
    
    hops_to_params[hops] = best_params
    hops_to_score[hops] = accuracy_score(y[val], y_pred)

Process: 0.0 %
Process: 50.0 %


In [None]:
import pandas as pd
df = pd.DataFrame(columns=["best_params"], index=hops_to_params.keys())
df["best_params"] = hops_to_params.values()
df.to_csv("normalized_origin_features_plus_summed_neighbors_wo_attn.csv")
df

In [5]:
hops_to_params

{2: {'max_depth': None,
  'max_leaf_nodes': 50,
  'max_samples': 0.7060426014940925,
  'min_samples_leaf': 0.009056388688139162,
  'n_estimators': 800},
 3: {'max_depth': None,
  'max_leaf_nodes': 50,
  'max_samples': 0.7635587578926725,
  'min_samples_leaf': 0.012526205528464372,
  'n_estimators': 800}}

In [6]:
hops_to_score

{2: 0.792, 3: 0.758}

In [None]:
n_estimators = [50, 100,150, 200, 400, 500, 800, 1_000, 1_200]
max_depth = [None, 2,3,4,5,8,10]
max_leaf_nodes = [None, 50, 100, 200]

choices = {
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'max_leaf_nodes': max_leaf_nodes,
    # 'bootrtrap': bootrtrap,
    # 'oob_score': oob_score,
}

space = {
    **{key: hp.choice(key, value) for key, value in choices.items()},
    'max_samples': hp.uniform('max_samples', 0.6, 0.8),
    'min_samples_leaf': hp.uniform('min_samples_leaf', 0.0, 0.8),
}

clfs_space = dict({})
clfs_space["RandomForestClassifier"] = space

clfs = [RandomForestClassifier]

In [None]:
complete_data = {"X_train": train_set.x,
                "X_test": test_set.x,
                "X_val": val_set.x,
                "y_train": train_set.y,
                "y_test": test_set.y,
                "y_val": val_set.y,
                "edge_index_train": train_set.edge_index,
                "edge_index_test": test_set.edge_index,
                "edge_index_val": val_set.edge_index}

In [None]:
from AutoTune2 import AutoSearch
from sklearn.metrics import f1_score

searcher = AutoSearch(complete_data, multi_target_class=True, max_evals=45, pred_metric = f1_score, pred_metric_kwargs={"average":"micro"},
                      is_transductive = True, parallelism = 8)
store = searcher.search(clfs, clfs_space, hops=[3])