In [1]:
!pip install torchmetrics
!pip install comet_ml torch torchvision tqdm

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/usr/bin/python -m pip install --upgrade pip[0m
Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/usr/bin/python -m pip install --upgrade pip[0m


In [1]:
import sys
 
sys.path.insert(0, "../")

from EnsembleFramework import Framework

In [2]:
import torch
from torch import nn
class NeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, out_dim, dropout=.0):
        super().__init__()
        self.lin1 = nn.Linear(input_dim , hidden_dim)
        self.lin2 = nn.Linear(hidden_dim, out_dim)

        torch.nn.init.xavier_uniform_(self.lin1.weight) 
        torch.nn.init.xavier_uniform_(self.lin2.weight) 
        self.lin1.bias.data.fill_(0.0)
        self.lin2.bias.data.fill_(0.0)
        self.dropout = nn.Dropout(p=dropout)
    
    def forward(self, x):
        torch.manual_seed(1)
        torch.cuda.manual_seed(1)
        x = self.dropout(x)
        x = self.lin1(x)
        x = nn.functional.elu(x)
        torch.manual_seed(1)
        torch.cuda.manual_seed(1)
        x = self.dropout(x)
        x = self.lin2(x)
        x = nn.functional.softmax(x, dim = 1)
        return x

In [3]:
import copy
from torchmetrics import Accuracy
import matplotlib.pyplot as plt

class Classifier():
    device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
    metrics = dict({})
    metrics["train"] = []
    metrics["val"] = []
    metrics["test"] = []
    
    losses = dict({})
    losses["train"] = []
    losses["val"] = []
    losses["test"] = []
    
    models = []
    state_dicts = []
    
    best_state_dict = None
    best_metric = dict({})
    best_loss = dict({})
    best_model = None
    
    
    def __init__(self, input_dim, num_classes,lr=3e-2,weight_decay=1e-4, epochs=100_000, patience = 100, dropout=.6):
        assert num_classes == 7
        self.model = NeuralNet(input_dim, 16, num_classes, dropout).to(Classifier.device)

        self.input_dim = input_dim
        self.num_classes = num_classes
        self.lr = lr
        self.weight_decay = weight_decay
        self.epochs = epochs
        self.patience = patience
        self.dropout = dropout
        self.optim = torch.optim.Adam(self.model.parameters(), lr=lr, weight_decay= weight_decay)
        self.metric_fn = Accuracy(task="multiclass", num_classes=num_classes).to(Classifier.device)
        self.loss_fn = nn.CrossEntropyLoss(reduction='mean')
        self.enough_training_for_today = False
        self.empty()

    def empty(self):
        for key in self.metrics:
            Classifier.metrics[key] = []
        for key in self.losses:
            Classifier.losses[key] = []
        Classifier.models = []
        Classifier.state_dicts = []
        
        Classifier.best_state_dict = None
        Classifier.best_metric = dict({})
        Classifier.best_loss = dict({})
        Classifier.best_model = None

    def fit(self, X_train, y_train, X=None, y=None):
        X = Classifier.dict_to_device(X)
        y = Classifier.dict_to_device(y)
        if not torch.is_tensor(X_train):
            X_train = torch.from_numpy(X_train)
        if not torch.is_tensor(y_train):
            y_train = torch.from_numpy(y_train)
        X_train = X_train.to(self.device)
        y_train = y_train.to(self.device)
        for epoch in range(self.epochs):
            self.model.train()
            logits = self.model(X_train)
            self.optim.zero_grad()
            loss = self.loss_fn(logits, y_train)
            loss.backward()
            self.optim.step()
            
            self.eval_all(X, y)
    
            if self.enough_training_for_today:
                self.store_best(self.patience)
                break
        self.store_best(0)

    def eval_all(self, X, y):
        self.evaluate(X["train"], y["train"], "train")
        self.evaluate(X["val"], y["val"], "val")
        self.evaluate(X["test"], y["test"], "test")

    def evaluate(self, x, y, set_name):
        with torch.inference_mode():
            self.model.eval()
            logits = self.model(x)
            loss = self.loss_fn(logits, y)
            metric = self.metric_fn(y, logits.argmax(1))
            if set_name == "val" and len(Classifier.losses[set_name]) >= self.patience:
                last_metrics = Classifier.metrics[set_name][-self.patience:]
                last_losses = Classifier.losses[set_name][-self.patience:]
                
                if all([(m >= metric.item()) for m in last_metrics]) or all([(l <= loss.item()) for l in last_losses]):
                    self.enough_training_for_today = True
    
            Classifier.metrics[set_name].append(metric.item())
            Classifier.losses[set_name].append(loss.item())
            Classifier.state_dicts.append(self.model.state_dict().copy())
            Classifier.models.append(copy.deepcopy(self.model))

    def store_best(self, offset):
        for key, value in Classifier.metrics.items():
                    Classifier.best_metric[key] = value[-1-offset]
        for key, value in Classifier.losses.items():
            Classifier.best_loss[key] = value[-1-offset]
        Classifier.best_state_dict = Classifier.state_dicts[-1-offset]
        Classifier.best_model = Classifier.models[-1-offset]

    @staticmethod
    def dict_to_device(store_dict):
        for key in store_dict:
            store_dict[key] = store_dict[key].to(Classifier.device)
        return store_dict

    @staticmethod
    def plot(store_dict, title):
        for key, value in store_dict.items():
            plt.plot(value)
        plt.legend(store_dict.keys())
        plt.title(title)
        plt.show()

    def get_params(self, **kwargs):
        return {'lr': self.lr, 'weight_decay': self.weight_decay, 'patience': self.patience, 'dropout': self.dropout, 'input_dim': self.input_dim, 'num_classes':self.num_classes}

    def set_params(self, **kwargs):
        self.lr = kwargs["lr"]
        self.weight_decay = kwargs["weight_decay"]
        self.patience = kwargs["patience"]
        self.dropout = kwargs["dropout"]
        # self.input_dim = kwargs["input_dim"]
        # self.num_classes = kwargs["num_classes"]

    def predict_proba(self, X):
        if not torch.is_tensor(X):
            X = torch.from_numpy(X)
        X = X.to(self.device)
        with torch.inference_mode():
            self.model.eval()
            logits = self.model(X)
            return logits.cpu().numpy()

    def predict(self, X):
        return self.predict_proba(X).argmax(1)

In [4]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops

dataset_name = 'Cora'
split = "public"
dataset = Planetoid(root='/tmp/Cora', name=dataset_name, split=split)
dataset.transform = T.NormalizeFeatures()

features =  dataset[0].x
labels =  dataset[0].y

test =  dataset[0].test_mask
train = dataset[0].train_mask
val =  dataset[0].val_mask

edge_index = dataset[0].edge_index 
edge_index = add_self_loops(edge_index)[0]

X = dict({})
X["train"] = features[train]
X["val"] = features[val]
X["test"] = features[test]

y = dict({})
y["train"] = labels[train]
y["val"] = labels[val]
y["test"] = labels[test]

In [5]:
from torch.nn.functional import normalize
def user_function(kwargs):
    return  normalize(kwargs["original_features"] + kwargs["summed_neighbors"], p=2.0, dim = 1)
hops_list = [0,1,2,3,4,5,6,7,8,9,10,15,20,30,40,50]
framework = Framework([user_function for i in hops_list], 
                     hops_list=hops_list, ## to obtain best for local neighborhood
                     clfs=[],
                     gpu_idx=0,
                     handle_nan=0.0,
                    attention_configs=[None for i in hops_list])
new_features_list = framework.get_features(features, edge_index, None)


In [6]:
hops_list

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40, 50]

In [None]:
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.model_selection import ParameterGrid
from tqdm.notebook import tqdm
import time 

space = {
    'lr': np.linspace(1e-5, 1e-2, 30, endpoint=True),
    'weight_decay': np.linspace(1e-5, 1e-2, 30, endpoint=True),
    'patience': [20, 100],#100
    'dropout':[0, 0.1, 0.2,.3,.4,.5,.6]#
}

def transform_kwargs_fit(framework, kwargs, i):
    new_kwargs = dict({})
    new_kwargs["X"]= dict({})
    new_kwargs["y"]= dict({})
    
    new_kwargs["X"]["train"] = framework.get_features(kwargs["X"], kwargs["edge_index"], kwargs["train"])[i]
    new_kwargs["X"]["test"] = framework.get_features(kwargs["X"], kwargs["edge_index"], kwargs["test"])[i]
    new_kwargs["X"]["val"] = framework.get_features(kwargs["X"], kwargs["edge_index"], kwargs["val"])[i]
    new_kwargs["y"]["train"] = kwargs["y"][kwargs["train"]]
    new_kwargs["y"]["test"] = kwargs["y"][kwargs["test"]]
    new_kwargs["y"]["val"] = kwargs["y"][kwargs["val"]]
    return new_kwargs

hops_to_score=dict({})
hops_to_params=dict({})
for i, hops in enumerate(hops_list):
    new_features = new_features_list[i]
    num_classes = torch.unique(labels).shape[0]

    param_grid = ParameterGrid(space)
    best_score = 0
    best_params = None
    for params in tqdm(param_grid.__iter__()):
        framework = Framework([user_function], 
                         hops_list=[hops], ## to obtain best for local neighborhood
                         clfs=[Classifier(features.shape[1], num_classes,epochs=100_000, **params)],
                         gpu_idx=0,
                         handle_nan=0.0,
                        attention_configs=[None])
        models = framework.fit(X_train = features,
                edge_index = edge_index,
                y_train = labels,
                train_mask = train,
                kwargs_fit_list = [dict({
                    "X": features,
                    "edge_index": edge_index,
                    "y": labels,
                    "train": train,
                    "test": test,
                    "val": val,
                })],
                transform_kwargs_fit = transform_kwargs_fit)  
        score = models[0].best_metric["val"]
        if score > best_score:
            best_score = score
            best_params = params
            print(best_score)
            
    hops_to_score[hops] = best_score
    hops_to_params[hops] = best_params
    start = time.time()
   
    X = dict({})
    X["train"] = new_features[train]
    X["val"] = new_features[val]
    X["test"] = new_features[test]
    
    y = dict({})
    y["train"] = labels[train]
    y["val"] = labels[val]
    y["test"] = labels[test]
    

0it [00:00, ?it/s]

0.13199999928474426
0.14399999380111694
0.1599999964237213
0.2540000081062317
0.25600001215934753
0.4819999933242798
0.4860000014305115
0.5339999794960022
0.5540000200271606
0.5740000009536743
0.578000009059906
0.5799999833106995
0.5839999914169312
0.5860000252723694


0it [00:00, ?it/s]

0.24400000274181366
0.6039999723434448
0.6859999895095825
0.7059999704360962
0.7279999852180481
0.7400000095367432
0.75
0.7639999985694885
0.7680000066757202
0.7699999809265137
0.7720000147819519
0.7739999890327454
0.777999997138977
0.7799999713897705
0.7820000052452087
0.7839999794960022


In [None]:
import pandas as pd
df = pd.DataFrame(columns=["best_params"], index=hops_to_params.keys())
df["best_params"] = hops_to_params.values()
df.to_csv("normalized_origin_features_plus_summed_neighbors_wo_attn.csv")
df

In [None]:
hops_to_params

In [None]:
hops_to_score

In [None]:
raise Exception

In [34]:
hops_to_params

{0: {'dropout': 0,
  'lr': 0.021631578947368422,
  'patience': 100,
  'weight_decay': 7.631578947368422e-05},
 1: {'dropout': 0.1,
  'lr': 0.0006989655172413794,
  'patience': 100,
  'weight_decay': 0.001043448275862069},
 2: {'dropout': 0,
  'lr': 0.004488275862068966,
  'patience': 100,
  'weight_decay': 0.004143793103448276},
 3: {'dropout': 0,
  'lr': 0.0006989655172413794,
  'patience': 100,
  'weight_decay': 0.002076896551724138},
 5: {'dropout': 0,
  'lr': 0.001043448275862069,
  'patience': 100,
  'weight_decay': 0.003110344827586207},
 10: {'dropout': 0,
  'lr': 0.003110344827586207,
  'patience': 100,
  'weight_decay': 0.0013879310344827587}}

In [9]:
hops_to_score

{1: 0.7760000228881836,
 2: 0.8019999861717224,
 3: 0.8119999766349792,
 5: 0.8180000185966492,
 10: 0.8220000267028809}

In [42]:
hops_to_params = {
    0:{'dropout': 0,
 'lr': 0.021631578947368422,
 'patience': 100,
 'weight_decay': 7.631578947368422e-05},
    1: {'dropout': 0.1,
  'lr': 0.0006989655172413794,
  'patience': 100,
  'weight_decay': 0.001043448275862069},
 2: {'dropout': 0,
  'lr': 0.004488275862068966,
  'patience': 100,
  'weight_decay': 0.004143793103448276},
 3: {'dropout': 0,
  'lr': 0.0006989655172413794,
  'patience': 100,
  'weight_decay': 0.002076896551724138},
 5: {'dropout': 0,
  'lr': 0.001043448275862069,
  'patience': 100,
  'weight_decay': 0.003110344827586207},
 10: {'dropout': 0,
  'lr': 0.003110344827586207,
  'patience': 100,
  'weight_decay': 0.0013879310344827587}}

In [89]:
from sklearn.metrics import accuracy_score
import time
def transform_kwargs_fit(framework, kwargs, i):
    new_kwargs = dict({})
    new_kwargs["X"]= dict({})
    new_kwargs["y"]= dict({})
    
    new_kwargs["X"]["train"] = framework.get_features(kwargs["X"], kwargs["edge_index"], kwargs["train"])[i]
    new_kwargs["X"]["test"] = framework.get_features(kwargs["X"], kwargs["edge_index"], kwargs["test"])[i]
    new_kwargs["X"]["val"] = framework.get_features(kwargs["X"], kwargs["edge_index"], kwargs["val"])[i]
    new_kwargs["y"]["train"] = kwargs["y"][kwargs["train"]]
    new_kwargs["y"]["test"] = kwargs["y"][kwargs["test"]]
    new_kwargs["y"]["val"] = kwargs["y"][kwargs["val"]]
    return new_kwargs

hops_lists =[[0,0], [0,1], [0,2], [0,3], [0,5], [0,10], [0,2,10]]
for hops_list in hops_lists:
    num_classes = torch.unique(labels).shape[0]
    
    framework = Framework([user_function for i in hops_list], 
                         hops_list=hops_list, ## to obtain best for local neighborhood
                         clfs=[Classifier(features.shape[1], num_classes,epochs=100_000, **hops_to_params[hops]) for hops in hops_list],
                         gpu_idx=0,
                         handle_nan=0.0,
                        attention_configs=[None for i in hops_list])
    start = time.time()
    models = framework.fit(X_train = features,
                edge_index = edge_index,
                y_train = labels,
                train_mask = train,
                kwargs_fit_list = [dict({
                    "X": features,
                    "edge_index": edge_index,
                    "y": labels,
                    "train": train,
                    "test": test,
                    "val": val,
                }) for i in hops_list],
                transform_kwargs_fit = transform_kwargs_fit)    
    print(f"Required training time: {str(time.time() - start)}")
    y_pred = framework.predict(features, edge_index, test)
    y_pred_val = framework.predict(features, edge_index, val)
    print(f"Hops-list {str(hops_list)}; Test-acc {str(accuracy_score(labels[test], y_pred))}; Val acc {str(accuracy_score(labels[val], y_pred_val))}")

Required training time: 3.185173749923706
Hops-list [0, 0]; Test-acc 0.595; Val acc 0.574
Required training time: 4.078294038772583
Hops-list [0, 1]; Test-acc 0.719; Val acc 0.722
Required training time: 3.6962010860443115
Hops-list [0, 2]; Test-acc 0.771; Val acc 0.752
Required training time: 3.051548957824707
Hops-list [0, 3]; Test-acc 0.764; Val acc 0.748
Required training time: 4.860649824142456
Hops-list [0, 5]; Test-acc 0.79; Val acc 0.766
Required training time: 4.759060859680176
Hops-list [0, 10]; Test-acc 0.798; Val acc 0.784
Required training time: 7.882010221481323
Hops-list [0, 2, 10]; Test-acc 0.82; Val acc 0.794
