# Experiments

### imports

In [2]:
import torch
from torch_geometric.data import Data
from torch_geometric.transforms import BaseTransform
from torch_geometric.datasets import TUDataset, ZINC
from ogb.graphproppred import PygGraphPropPredDataset
import torch_geometric.utils as uts
from torch_geometric.utils import remove_self_loops, to_undirected

import numpy as np
import matplotlib.pyplot as plt
import itertools
from tqdm import tqdm

In [2]:
# Graph utilities
import networkx as nx
import graph_encoding.encoding as encoding 


In [3]:
from importlib import reload 

encoding = reload(encoding)


In [4]:
# sklearn imports
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

## Helper functions

In [5]:
# set up embedding 

def add_testgraphs(encoded_data, limit_vertex = None,
            n_trees = 4, limit_trees = 10000,
            n_cycles = 4,limit_cycles = 10000, 
            n_cliques = 4, limit_cliques = 100):
  
    encoded_data.clear_all_testgraphs()
    encoded_data.add_single_vertex(limit = limit_vertex)
    encoded_data.add_trees(stop = n_trees, limit = limit_trees)
    encoded_data.add_cycles(stop = n_cycles, limit = limit_cycles)
    encoded_data.add_cliques(stop = n_cliques, limit = limit_cliques)
  

  

helper functions for model fitting:

In [24]:
# calculate fit and plot scores

def calculate_single_split_score(clf, X, y, cv_num, random_state,
                                 scoring='accuracy', test_size=0.25):
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                      test_size=test_size, random_state=random_state)
    clf.fit(X_train, y_train)

    train_score = clf.score(X_train, y_train)
    test_score = clf.score(X_test, y_test)

    return {'train_score' : train_score,'test_score': test_score}
    

def calculate_cv_scores(clf, X, y, cv_num, scoring='accuracy'):
  cv = cv_num
  scores = cross_val_score(clf, X, y, cv=cv_num, scoring = scoring)

  return scores

def plot_cv_scores(scores, clf_name, cv_num):
  width = 0.35
  labels = [f'G{n}' for n in range(1,cv_num+1)]
  fig = plt.figure()
  ax = fig.add_subplot()
  ax.bar(labels, scores, width)
  ax.set_ylabel('Scores')
  ax.set_title('Cross validation scores for '+clf_name)
  plt.axhline(y = scores.mean(), c = 'black', linewidth = 0.7, 
            label = f'Err = {scores.mean():.2f}' + u"\u00B1" + f'{scores.std():.2f}')
  ax.legend()

  plt.show()
  print(f'Validation error = {scores.mean():.2f}' + u"\u00B1" + f'{scores.std():.2f}')

## Experiments: graph classification tasks

### First experiment: MUTAG:

In [None]:
# load the data:

dataset = TUDataset(root='data/TUDataset', name='MUTAG')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')


Dataset: MUTAG(188):
Number of graphs: 188
Number of features: 7
Number of classes: 2

Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])


In [None]:
# pre-processing MUTAG
Encoded_Dataset = [encode(data) for data in tqdm(dataset)]

100%|██████████| 188/188 [00:00<00:00, 2890.32it/s]


In [None]:
# get representation MUTAG
# labels
y = np.array([data.pyg_graph().y.detach().numpy() for data in Encoded_Dataset])
# vectors
X = np.array([data.ghc_encoder(format = 'numpy')  for data in tqdm(Encoded_Dataset)])
#%timeit Encoded_Dataset[0].ghc_encoder(format = 'numpy')

100%|██████████| 188/188 [00:01<00:00, 97.29it/s]


In [None]:
y = y.reshape(188)

### Experiment: "ogbg-molhiv" 

In [None]:
# setup the provided node encoder
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
emb_dim = 10
atom_encoder = AtomEncoder(emb_dim)
bond_encoder = BondEncoder(emb_dim)

class atom_transform(BaseTransform):
  def __call__(self, data):
    newdata = data.clone()
    newdata.x = atom_encoder(data.x)
    return newdata

transform = atom_transform()

In [None]:
# load the data 
dataset = PygGraphPropPredDataset(name = "ogbg-molhiv", root = 'dataset/', transform = transform)

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[7]  # Get the first graph object.

print()
print(data)
print('=============================================================')


Dataset: PygGraphPropPredDataset(41127):
Number of graphs: 41127
Number of features: 10
Number of classes: 2

Data(edge_index=[2, 38], edge_attr=[38, 3], x=[18, 10], y=[1, 1], num_nodes=18)


In [None]:
encoded_dataset = [encoding.grandEmbedding(data) for data in tqdm(dataset)]

In [None]:
def single_graph_data(i, encoder, file_name,
                      n_cliques, n_cycles, n_trees):
  add_graphs = lambda x: add_testgraphs(encoded_data = x,                                           n_trees= n_trees, limit_trees= 10000,                                              n_cycles=n_cycles, limit_cycles = 10000,
                                      n_cliques= n_cliques, limit_cliques=100)
  add_to_Dataset = [add_graphs(data) for data in tqdm(encoded_dataset)]
  # labels
  y = np.array([data.pyg_graph().y[0,0].detach().numpy() for data in tqdm(encoded_dataset)])
  #vectors
  X = np.array([encoder(data)  for data in tqdm(encoded_dataset)])
  nums = np.array([n_cliques, n_cycles, n_trees])
  np.save(file_name +f'{i}_X.npy', X )
  np.save(file_name + f'{i}_y.npy', y )
  np.save(file_name +f'{i}_nums.npy', nums)


def gather_graph_data(encoder, file_name, cliques_limit = 5, 
                      cycles_limit = 6, trees_limit = 6):
  i = 0
  for n_cliques in range(4,cliques_limit):
    for n_cycles in range(3,cycles_limit):
        for n_trees in range(2, trees_limit):

          add_graphs = lambda x: add_testgraphs(encoded_data = x, 
                                                n_trees= n_trees, limit_trees= 10000,
                                                n_cycles=n_cycles, limit_cycles = 10000,
                                                n_cliques= n_cliques, limit_cliques=100)
          add_to_Dataset = [add_graphs(data) for data in tqdm(encoded_dataset)]
          # labels
          y = np.array([data.pyg_graph().y[0,0].detach().numpy() for data in tqdm(encoded_dataset)])
          #vectors
          X = np.array([encoder(data)  for data in tqdm(encoded_dataset)])
          nums = np.array([n_cliques, n_cycles, n_trees])
          np.save(file_name +f'{i}_X.npy', X )
          np.save(file_name + f'{i}_y.npy', y )
          np.save(file_name +f'{i}_nums.npy', nums)
          i+=1   


In [None]:
# GHC:
file_name = 'Experiments/ogbg-molhiv/GHC_encoded_data/experiment_'
encdoer = lambda x: x.ghc_encoder(format = 'numpy')
gather_graph_data(encdoer, file_name)

In [None]:
# GHC with augmentation:
file_name = 'Experiments/ogbg-molhiv/ghc_aug/experiment_'
ghc = lambda x: x.ghc_encoder(format = 'numpy')
num_enc = lambda x: x.num_encoder(format = 'numpy')
encoder = lambda x: np.concatenate((ghc(x),num_enc(x)), axis = 0)
gather_graph_data(encdoer, file_name)

In [None]:
# Lagrangian with augmentation:
file_name = 'Experiments/ogbg-molhiv/lagrangian_aug/experiment_'
raw_encoder = lambda x: x.lagrangian_encoder(format = 'numpy')
num_enc = lambda x: x.num_encoder(format = 'numpy')
encoder = lambda x: np.concatenate((raw_encoder(x),num_enc(x)), axis = 0)
single_graph_data(800, encoder, file_name,
                  n_cliques = 4, n_cycles = 5, n_trees = 2)
#gather_graph_data(encdoer, file_name)

100%|██████████| 41127/41127 [00:02<00:00, 16755.95it/s]
100%|██████████| 41127/41127 [00:00<00:00, 46463.91it/s]
100%|██████████| 41127/41127 [06:54<00:00, 99.15it/s]


#### Evaluation

In [None]:
def set_up_experiment(X_train, y_train, X_valid, y_valid):
    C_array = [10**3, 10**2, 10** 1, 10**0, 10**-1, 10**-2, 10**-3]

    max_score = 0
    C_max = 0
    for C in C_array:
        clf = make_pipeline(StandardScaler(), SVC(kernel='rbf', C = 1, probability = True, random_state=42))
        # fit model
        clf.fit(X_train, y_train)
        # Calculate predictions 
        y_valid_pred = clf.predict_proba(X_valid)

        y_pred_valid = y_valid_pred[:,1].reshape(4113,1)
        y_true_valid = y_valid.reshape(4113,1)

        input_valid_dict = {'y_true': y_true_valid, 'y_pred': y_pred_valid}
        valid_score = evaluator.eval(input_valid_dict)['rocauc']

        if valid_score > max_score:
            max_score = valid_score
            C_max = C
        return {'max_score': max_score, 'C_max' : C_max}

In [None]:
from ogb.graphproppred import Evaluator

evaluator = Evaluator(name = 'ogbg-molhiv')
print(evaluator.expected_input_format) 
print(evaluator.expected_output_format)  

==== Expected input format of Evaluator for ogbg-molhiv
{'y_true': y_true, 'y_pred': y_pred}
- y_true: numpy ndarray or torch tensor of shape (num_graph, num_task)
- y_pred: numpy ndarray or torch tensor of shape (num_graph, num_task)
where y_pred stores score values (for computing AUC score),
num_task is 1, and each row corresponds to one graph.
nan values in y_true are ignored during evaluation.

==== Expected output format of Evaluator for ogbg-molhiv
{'rocauc': rocauc}
- rocauc (float): ROC-AUC score averaged across 1 task(s)



In [None]:
#usual GHC
X_list = [np.load(f'Experiments/ogbg-molhiv/GHC_encoded_data/experiment_{i}_X.npy') for i in range(12)]
y_list = [np.load(f'Experiments/ogbg-molhiv/GHC_encoded_data/experiment_{i}_y.npy') for i in range(12)]
nums_list = [np.load(f'Experiments/ogbg-molhiv/GHC_encoded_data/experiment_{i}_nums.npy') for i in range(12)]


In [None]:
# GHC augmented
X_list = [np.load(f'Experiments/ogbg-molhiv/ghc_aug/experiment_{i}_X.npy') for i in range(12)]
y_list = [np.load(f'Experiments/ogbg-molhiv/ghc_aug/experiment_{i}_y.npy') for i in range(12)]
nums_list = [np.load(f'Experiments/ogbg-molhiv/ghc_aug/experiment_{i}_nums.npy') for i in range(12)]


In [None]:
X_list[0].shape, len(y_list), len(nums_list)

((41127, 11), 12, 12)

In [None]:
# Using the given train-test split

split_idx = dataset.get_idx_split()

train_idx = split_idx["train"]
valid_idx = split_idx["valid"]
test_idx  = split_idx["test"]

In [None]:
list(map(lambda x: x.shape, X_list))

[(41127, 11),
 (41127, 22),
 (41127, 33),
 (41127, 55),
 (41127, 22),
 (41127, 33),
 (41127, 44),
 (41127, 66),
 (41127, 33),
 (41127, 44),
 (41127, 55),
 (41127, 77)]

In [None]:
nums_list[11]

array([4, 5, 5])

In [None]:
def calculate_score_SVM(i):
  X = X_list[i]
  y = y_list[i]

  X_train , y_train = X[train_idx], y[train_idx]
  X_valid , y_valid = X[valid_idx], y[valid_idx]
  X_test , y_test = X[test_idx], y[test_idx]

  result_dict = set_up_experiment(X_train, y_train, X_valid, y_valid)

  best_val_score = result_dict['max_score']
  C_max = result_dict['C_max']

  clf = make_pipeline(StandardScaler(), SVC(kernel='rbf', C = C_max, probability = True, random_state=42))
  # fit model
  clf.fit(X_train, y_train)

  # calculate test auroc
  y_test_pred = clf.predict_proba(X_test)
  y_pred_test = y_test_pred[:,1].reshape(4113,1)
  y_true_test = y_test.reshape(4113,1)

  input_test_dict = {'y_true': y_true_test, 'y_pred': y_pred_test}
  test_score = evaluator.eval(input_test_dict)['rocauc']

  # calculate train auroc
  y_train_pred = clf.predict_proba(X_train)
  y_pred_train = y_train_pred[:,1].reshape(32901,1)
  y_true_train = y_train.reshape(32901,1)
  
  input_train_dict = {'y_true': y_true_train, 'y_pred': y_pred_train}
  train_score = evaluator.eval(input_train_dict)['rocauc']

  return [train_score, test_score, best_val_score, C_max]


In [None]:
for i in tqdm(range(12)): 
  svm_data = calculate_score_SVM(i)
  svm_data_array = np.array(svm_data)
  np.save(f'Experiments/ogbg-molhiv/svm_data_{i}.npy', svm_data_array)

100%|██████████| 12/12 [3:50:44<00:00, 1153.69s/it]


In [None]:
# load single experiment Lag
X = np.load(f'Experiments/ogbg-molhiv/lagrangian_aug/experiment_800_X.npy')
y = np.load(f'Experiments/ogbg-molhiv/lagrangian_aug/experiment_800_y.npy')
nums_list = np.load(f'Experiments/ogbg-molhiv/lagrangian_aug/experiment_800_nums.npy')

In [None]:
X.shape, y.shape, nums_list

((41127, 33), (41127,), array([4, 5, 2]))

#### Random forest eval:

In [None]:
# try PCA before evaluation
from sklearn.decomposition import PCA, IncrementalPCA

One random forest measuremnt:

> Indented block



In [None]:
def molhiv_calculate_score_single_forest(X, y):
  X_train , y_train = X[train_idx], y[train_idx]
  X_valid , y_valid = X[valid_idx], y[valid_idx]
  X_test , y_test = X[test_idx], y[test_idx]

  # preprocess pca
  if i > 1:
    pca = PCA(n_components = 20)
  else:
    pca = PCA()
  pca.fit(X_train, y_train)

  X_train_new = pca.transform(X_train)
  X_valid_new = pca.transform(X_valid)
  X_test_new = pca.transform(X_test)

  clf = make_pipeline(StandardScaler(), RandomForestClassifier(random_state=42))
  # fit model
  clf.fit(X_train_new, y_train)
  
  # calculate test auroc
  y_test_pred = clf.predict_proba(X_test_new)
  y_pred_test = y_test_pred[:,1].reshape(4113,1)
  y_true_test = y_test.reshape(4113,1)

  input_test_dict = {'y_true': y_true_test, 'y_pred': y_pred_test}
  test_score = evaluator.eval(input_test_dict)['rocauc']

  # calculate valid auroc
  y_valid_pred = clf.predict_proba(X_valid_new)
  y_pred_valid = y_valid_pred[:,1].reshape(4113,1)
  y_true_valid = y_valid.reshape(4113,1)

  input_valid_dict = {'y_true': y_true_valid, 'y_pred': y_pred_valid}
  valid_score = evaluator.eval(input_valid_dict)['rocauc']

  # calculate train auroc
  y_train_pred = clf.predict_proba(X_train_new)
  y_pred_train = y_train_pred[:,1].reshape(32901,1)
  y_true_train = y_train.reshape(32901,1)
  
  input_train_dict = {'y_true': y_true_train, 'y_pred': y_pred_train}
  train_score = evaluator.eval(input_train_dict)['rocauc']

  return [train_score, valid_score, test_score]

In [None]:
def molhiv_calculate_score_random_forest(i):
  X = X_list[i]
  y = y_list[i]

  return molhiv_calculate_score_single_forest(X,y)

In [None]:
# run random forest
for i in tqdm(range(12)): 
  random_forest_data = molhiv_calculate_score_random_forest(i)
  random_forest_data_array = np.array(random_forest_data)
  np.save(f'Experiments/ogbg-molhiv/random_forest_scores_{i}.npy', random_forest_data_array)

100%|██████████| 12/12 [04:23<00:00, 21.93s/it]


In [82]:
# load scores:
[np.load(f'Experiments/ogbg-molhiv/ghc_aug/random_forest_scores_{i}.npy') for i in range(12)]

[array([0.99999981, 0.74252882]),
 array([1.        , 0.74369629]),
 array([1.        , 0.73329632]),
 array([1.        , 0.73594604]),
 array([0.99999956, 0.73366326]),
 array([1.        , 0.74253752]),
 array([1.        , 0.71936886]),
 array([1.        , 0.71889279]),
 array([0.99999924, 0.75866761]),
 array([0.99999999, 0.71003013]),
 array([1.        , 0.72188049]),
 array([1.        , 0.69132757])]

In [83]:
[np.load(f'Experiments/ogbg-molhiv/ghc_aug/experiment_{i}_nums.npy') for i in range(12)]

[array([4, 3, 2]),
 array([4, 3, 3]),
 array([4, 3, 4]),
 array([4, 3, 5]),
 array([4, 4, 2]),
 array([4, 4, 3]),
 array([4, 4, 4]),
 array([4, 4, 5]),
 array([4, 5, 2]),
 array([4, 5, 3]),
 array([4, 5, 4]),
 array([4, 5, 5])]

### Experiment: NCI-1

In [84]:
class nci1_transform(BaseTransform):
   def __call__(self, data):
     new_data = data.clone()
     w = torch.rand(37)
     new_data.x = torch.unsqueeze(torch.tensordot(data.x, w,  dims=([1], [0])),1)
     return new_data
     
transform = nci1_transform()


In [85]:
# load the data:

dataset = TUDataset(root='data_local/TUDataset', name='NCI1', transform=transform)

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')


Dataset: NCI1(4110):
Number of graphs: 4110
Number of features: 1
Number of classes: 2

Data(edge_index=[2, 42], x=[21, 1], y=[1])


In [86]:
encoded_dataset = [encoding.grandEmbedding(data) for data in tqdm(dataset)]

100%|██████████| 4110/4110 [00:02<00:00, 1923.32it/s]


In [12]:
encoded_dataset[0].pyg_graph().x[0]

tensor([0.1136])

In [87]:
def TU_graph_data(i, encoder, file_name,
                      n_cliques, n_cycles, n_trees):
  add_graphs = lambda x: add_testgraphs(encoded_data = x,                                           n_trees= n_trees, limit_trees= 10000,                                              n_cycles=n_cycles, limit_cycles = 10000,
                                      n_cliques= n_cliques, limit_cliques=100)
  add_to_Dataset = [add_graphs(data) for data in tqdm(encoded_dataset)]
  # labels
  y = np.array([data.pyg_graph().y.detach().numpy() for data in tqdm(encoded_dataset)])
  #vectors
  X = np.array([encoder(data)  for data in tqdm(encoded_dataset)])
  nums = np.array([n_cliques, n_cycles, n_trees])
  np.save(file_name +f'{i}_X.npy', X )
  np.save(file_name + f'{i}_y.npy', y )
  np.save(file_name +f'{i}_nums.npy', nums)

In [None]:
# ghc:
file_name = 'Experiments/NCI1/ghc/experiment_'
ghc_encoder = lambda x: x.ghc_encoder(format = 'numpy')
#num_enc = lambda x: x.num_encoder(format = 'numpy')
#encoder = lambda x: np.concatenate((raw_encoder(x),num_enc(x)), axis = 0)
TU_graph_data(800, ghc_encoder, file_name,
                  n_cliques = 6, n_cycles = 6, n_trees = 8)
#gather_graph_data(encdoer, file_name)

100%|██████████| 4110/4110 [00:03<00:00, 1306.56it/s]
100%|██████████| 4110/4110 [00:00<00:00, 108540.31it/s]
100%|██████████| 4110/4110 [13:21<00:00,  5.13it/s]


In [None]:
# Lagrangian gather data:
file_name = 'Experiments/NCI1/lagrangian_aug/experiment_'
pure_encoder = lambda x: x.lagrangian_encoder(format = 'numpy')
num_enc = lambda x: x.num_encoder(format = 'numpy')
encoder = lambda x: np.concatenate((pure_encoder(x),num_enc(x)), axis = 0)

i = 0
for n_cliques in range(4,6):
  for n_cycles in range(3,10):
      for n_trees in range(2, 12):
        TU_graph_data(i, encoder, file_name,
                 n_cliques = n_cliques, n_cycles = n_cycles, n_trees = n_trees)
        i+=1

In [15]:
# load single experiment Lag
def load_data_nci1(i, file_name):
  X = np.load(file_name+f'{i}_X.npy')
  y = np.load(file_name+f'{i}_y.npy').reshape(4110,)
  nums = np.load(file_name+f'{i}_nums.npy')
  return {'X': X, 'y': y, 'nums': nums}

In [97]:
# Lagrangian load data:
num_of_exps = 8
file_name = 'Experiments/NCI1/lagrangian_aug/experiment_'
X_list_lag = [load_data_nci1(i, file_name)['X'] for i in range(num_of_exps)]
y_list_lag = [load_data_nci1(i, file_name)['y'] for i in range(num_of_exps)]
nums_list_lag = [load_data_nci1(i, file_name)['nums'] for i in range(num_of_exps)]

In [17]:
X_list_lag[1].shape, y_list_lag[0].shape, nums_list_lag[0].shape

((4110, 76), (4110,), (3,))

In [88]:
# ghc load data:
num_of_exps = 37
file_name = 'Experiments/NCI1/ghc/experiment_'
X_list_ghc = [load_data_nci1(i, file_name)['X'] for i in range(num_of_exps)]
y_list_ghc = [load_data_nci1(i, file_name)['y'] for i in range(num_of_exps)]
nums_list_ghc = [load_data_nci1(i, file_name)['nums'] for i in range(num_of_exps)]

In [64]:
# lagrangian:
X_list = X_list_lag
y_list = y_list_lag
nums_list = nums_list_lag

In [89]:
# ghc:
X_list = X_list_ghc
y_list = y_list_ghc
nums_list = nums_list_ghc

In [65]:
nums_list

[array([4, 3, 2]),
 array([4, 3, 3]),
 array([4, 3, 4]),
 array([4, 3, 5]),
 array([4, 3, 6]),
 array([4, 3, 7]),
 array([4, 3, 8]),
 array([4, 3, 9])]

In [98]:
# 
components = [100 if i>1 else 30 for i in range(num_of_exps)]
base_clf = RandomForestClassifier(random_state=42)
#clf_list = [make_pipeline(StandardScaler(), PCA(n_components = n),base_clf) for n in components]
clf_list = [make_pipeline(StandardScaler(), base_clf) for n in components]

cv_scores = [calculate_cv_scores(clf_list[i], X_list[i], y_list[i], 10)
              for i in range(num_of_exps)]

In [30]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [95]:
experiment_name = 'lagrangian_aug'
model_type = '/cv_scores_RF.npy'
scors_arr = np.zeros((num_of_exps, 2))
for i, scores in enumerate(cv_scores):
  mean = scores.mean()
  std = scores.std()
  scors_arr[i] = np.array([mean, std])
  #print(experiment_name + f'{i}, Error = {mean:.2f}' + u"\u00B1" + f'{std:.2f} \n')

np.save('Experiments/NCI1/'+experiment_name+model_type, scors_arr)

In [96]:
z = np.load('Experiments/NCI1/'+experiment_name+model_type)
z.max()

0.6209245742092457

In [75]:
expr = 6
clf = clf_list[expr]
X = X_list[expr]
y = y_list[expr]
calculate_single_split_score(clf, X, y, cv_num = 10, random_state = 25)

{'train_score': 0.9990266060999351, 'test_score': 0.806420233463035}

In [None]:
#pca = PCA(n_components = 100)
#pca.fit(X_train, y_train)

#X_train_new = pca.transform(X_train)
#X_test_new = pca.transform(X_test)

#clf = make_pipeline(StandardScaler(), SVC(random_state=42, C = 100))
#clf = make_pipeline(StandardScaler(), RandomForestClassifier(random_state=42))
# fit model
#clf.fit(X_train_new, y_train)

#train_score = clf.score(X_train_new, y_train)
#test_score = clf.score(X_test_new, y_test)

In [55]:
dtry = np.array({'1':torch.zeros(3), '2':2}, dtype = object)
np.save('trying.npy', dtry)

In [56]:
import numpy as np
numsarr = np.load('pattern_graphs/PROTEINS/lagrangian_aug/.npy', allow_pickle=True)
s

array({'1': tensor([0., 0., 0.]), '2': 2}, dtype=object)

In [8]:
import networkx as nx
sm_graph = uts.from_networkx(nx.complete_graph(3))
sm_graph.edge_index.t()

tensor([[0, 1],
        [0, 2],
        [1, 0],
        [1, 2],
        [2, 0],
        [2, 1]])

In [9]:
sm_graph.edge_attr = torch.tensor([[1], [2], [1], [3], [2], [3]])

In [11]:
sm_graph.edge_attr

tensor([[1],
        [2],
        [1],
        [3],
        [2],
        [3]])