In [1]:
import sys
 
sys.path.insert(0, "../")

from EnsembleFramework import Framework

In [2]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops

dataset_name = 'Cora'
split = "public"
dataset = Planetoid(root='/tmp/Cora', name=dataset_name, split=split)
dataset.transform = T.NormalizeFeatures()

features =  dataset[0].x
y =  dataset[0].y

test =  dataset[0].test_mask
train = dataset[0].train_mask
val =  dataset[0].val_mask

edge_index = dataset[0].edge_index 
edge_index = add_self_loops(edge_index)[0]

In [3]:
from torch.nn.functional import normalize
def user_function(kwargs):
    return  normalize(kwargs["original_features"] + kwargs["summed_neighbors"], p=2.0, dim = 1) #normalize(kwargs["original_features"] + kwargs["summed_neighbors"], p=2.0, dim = 1)
hops_list = [0,1,2,3,4,5,6,7,8,9,10,15,20,30,40,50]
framework = Framework([user_function for i in hops_list], 
                     hops_list=hops_list, ## to obtain best for local neighborhood
                     clfs=[],
                     gpu_idx=0,
                     handle_nan=0.0,
                    attention_configs=[None for i in hops_list])
new_features_list = framework.get_features(features, edge_index, None)

In [4]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import ParameterGrid
from tqdm.notebook import tqdm
from hyperopt import fmin, tpe, hp,STATUS_OK

kernel = ["linear", "poly", "rbf", "sigmoid"]
degree = [i for i in range(2, 5)]
gamma = ["scale", "auto"]

lr_choices = {
    'kernel': kernel,
    'degree': degree,
    'gamma': gamma,
}

space = {
    **{key: hp.choice(key, value) for key, value in lr_choices.items()},
    'tol': hp.loguniform('tol', -11, -3),
    'C': hp.uniform('C', 0.0, 150)
}

clf = SVC
hops_to_params=dict({})
hops_to_score=dict({})

for i, hops in tqdm(enumerate(hops_list)):
    new_features = new_features_list[i].cpu()
    
    def objective(params):
        score = 0
        try:
            model = clf(**params, random_state = 42, probability= True, cache_size=800)
            model.fit(new_features[train], y[train])
            y_pred = model.predict(new_features[val])
            score = accuracy_score(y[val], y_pred)
        except Exception:
            print(Exception)
            print(params)
            score = -100
        return {'loss': -score, 'status': STATUS_OK}
    
    best_params = fmin(objective, space, algo=tpe.suggest, max_evals=1_00, verbose=1)

    for key,value in lr_choices.items():
        best_params[key] = value[best_params[key]]

    best_model = clf(**best_params, random_state = 42, probability= True, cache_size=800)
    best_model.fit(new_features[train], y[train])
    y_pred = best_model.predict(new_features[val])
    
    hops_to_params[hops] = best_params
    hops_to_score[hops] = accuracy_score(y[val], y_pred)

0it [00:00, ?it/s]


  0%|                                                                           | 0/100 [00:00<?, ?trial/s, best loss=?][A
  1%|▌                                                            | 1/100 [00:00<00:10,  9.32trial/s, best loss: -0.316][A
  2%|█▏                                                           | 2/100 [00:00<00:10,  9.60trial/s, best loss: -0.316][A
  3%|█▊                                                           | 3/100 [00:00<00:10,  9.56trial/s, best loss: -0.568][A
  4%|██▍                                                          | 4/100 [00:00<00:10,  9.35trial/s, best loss: -0.568][A
  5%|███                                                           | 5/100 [00:00<00:10,  9.33trial/s, best loss: -0.57][A
  6%|███▋                                                          | 6/100 [00:00<00:10,  9.21trial/s, best loss: -0.57][A
  7%|████▎                                                         | 7/100 [00:00<00:10,  9.10trial/s, best loss: -0.57][A
  8%|██

In [5]:
import pandas as pd
df = pd.DataFrame(columns=["best_params"], index=hops_to_params.keys())
df["best_params"] = hops_to_params.values()
df.to_csv("normalized_origin_features_plus_summed_neighbors_wo_attn.csv")
df

Unnamed: 0,best_params
0,"{'C': 147.40101960911528, 'degree': 4, 'gamma'..."
1,"{'C': 90.60017807898211, 'degree': 4, 'gamma':..."
2,"{'C': 117.92124212871212, 'degree': 3, 'gamma'..."
3,"{'C': 2.7748532252182674, 'degree': 2, 'gamma'..."
4,"{'C': 5.669251187871323, 'degree': 4, 'gamma':..."
5,"{'C': 6.588290370053819, 'degree': 2, 'gamma':..."
6,"{'C': 3.4290311790983417, 'degree': 4, 'gamma'..."
7,"{'C': 5.815909573106612, 'degree': 4, 'gamma':..."
8,"{'C': 5.909413674525595, 'degree': 2, 'gamma':..."
9,"{'C': 4.989748336002604, 'degree': 3, 'gamma':..."


In [5]:
hops_to_score

{0: 0.606, 2: 0.814, 5: 0.806, 10: 0.806, 15: 0.814, 20: 0.814, 30: 0.808}

In [6]:
hops_to_params

{0: {'C': 16.446210093451946,
  'degree': 3,
  'gamma': 'scale',
  'kernel': 'linear',
  'tol': 0.000135351820310177},
 2: {'C': 2.2454653587741507,
  'degree': 3,
  'gamma': 'scale',
  'kernel': 'rbf',
  'tol': 0.00019691417556260917},
 5: {'C': 2.592997015572764,
  'degree': 3,
  'gamma': 'scale',
  'kernel': 'rbf',
  'tol': 0.04866716101055986},
 10: {'C': 3.3515654989687267,
  'degree': 2,
  'gamma': 'auto',
  'kernel': 'linear',
  'tol': 0.0029514082294456625},
 15: {'C': 5.04509676482287,
  'degree': 4,
  'gamma': 'auto',
  'kernel': 'linear',
  'tol': 0.008655874875022741},
 20: {'C': 6.086161177881024,
  'degree': 2,
  'gamma': 'auto',
  'kernel': 'linear',
  'tol': 0.001121164624982405},
 30: {'C': 28.162688966389688,
  'degree': 3,
  'gamma': 'scale',
  'kernel': 'linear',
  'tol': 0.01697468319698367}}

In [7]:
82

82