# Experiments

## Colab setup

In [1]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

1.11.0+cu113
[K     |████████████████████████████████| 7.9 MB 2.6 MB/s 
[K     |████████████████████████████████| 3.5 MB 2.7 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [2]:
!pip install ogb
!pip install grandiso

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ogb
  Downloading ogb-1.3.3-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 3.1 MB/s 
Collecting outdated>=0.2.0
  Downloading outdated-0.2.1-py3-none-any.whl (7.5 kB)
Collecting littleutils
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
Building wheels for collected packages: littleutils
  Building wheel for littleutils (setup.py) ... [?25l[?25hdone
  Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7048 sha256=056ee6b6ac9516eed981966dd40a05091ba635452fe6dde31460f2ccae105f9e
  Stored in directory: /root/.cache/pip/wheels/d6/64/cd/32819b511a488e4993f2fab909a95330289c3f4e0f6ef4676d
Successfully built littleutils
Installing collected packages: littleutils, outdated, ogb
Successfully installed littleutils-0.2.2 ogb-1.3.3 outdated-0.2.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/pub

In [4]:
from google.colab import drive
drive.mount('/content/drive')
# Mount the current directory
%cd /content/drive/My\ Drive//CS159_project/Graph_homomorphism/graph_homomorphism

Mounted at /content/drive
/content/drive/My Drive/CS159_project/Graph_homomorphism/graph_homomorphism


### imports

In [5]:
import numpy as np
import torch
from torch_geometric.data import Data
from torch_geometric.transforms import BaseTransform
from torch_geometric.datasets import TUDataset, ZINC
from ogb.graphproppred import PygGraphPropPredDataset

from typing import Set
import itertools
from functools import partial

from tqdm import tqdm

# sklearn imports
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

# ogb
from ogb.graphproppred import Evaluator

In [6]:
# Graph utilities
import networkx as nx
import experiments

In [7]:
from importlib import reload 

experiments = reload(experiments)

## try running all experiments

In [8]:
########## proteins##############
dataset = TUDataset(root='data/TUDataset', name='PROTEINS', use_node_attr= True)
# Set up experiment: PROTEINS lagrangian_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/PROTEINS', 
    encoder_name = 'lagrangian_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

In [None]:
########## ogbg-molhiv
dataset = PygGraphPropPredDataset(name = "ogbg-molhiv", root = 'dataset/', transform = transform)
# Set up experiment: ogbg-molhiv ghc_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/ogbg-molhiv', 
    encoder_name = 'ghc_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()
# Set up experiment: ogbg-molhiv lagrangian_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/ogbg-molhiv', 
    encoder_name = 'lagrangian_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

## Experiments: choice of pattern graphs for graph classification tasks

### First experiment: MUTAG:

In [None]:
# load the data:

dataset = TUDataset(root='data/TUDataset', name='MUTAG')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')


Dataset: MUTAG(188):
Number of graphs: 188
Number of features: 7
Number of classes: 2

Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])


In [None]:
# Set up experiment: MUTAG ghc_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/MUTAG', 
    encoder_name = 'ghc_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()


In [None]:
# Set up experiment: MUTAG lagrangian_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/MUTAG', 
    encoder_name = 'lagrangian_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

### Experiment: PROTEINS

In [None]:
dataset = TUDataset(root='data/TUDataset', name='PROTEINS', use_node_attr= True)

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')


Dataset: PROTEINS(1113):
Number of graphs: 1113
Number of features: 4
Number of classes: 2

Data(edge_index=[2, 162], x=[42, 4], y=[1])


In [None]:
# Set up experiment: PROTEINS ghc_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/PROTEINS', 
    encoder_name = 'ghc_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

In [None]:
# Set up experiment: PROTEINS lagrangian_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/PROTEINS', 
    encoder_name = 'lagrangian_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

### Experiment: NCI1

In [None]:
dataset = TUDataset(root='data/TUDataset', name='NCI1')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

In [None]:
# Set up experiment: NCI1 ghc_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/NCI1', 
    encoder_name = 'ghc_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

In [None]:
# Set up experiment: NCI1 lagrangian_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/NCI1', 
    encoder_name = 'lagrangian_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

### Experiments: ZINC

In [None]:
dataset = ZINC(root='data/ZINC',subset=True)

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

In [None]:
# Set up experiment: ZINC ghc_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/ZINC', 
    encoder_name = 'ghc_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

In [None]:
# Set up experiment: ZINC lagrangian_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/ZINC', 
    encoder_name = 'lagrangian_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

### Experiment: ogbg-molhiv

In [8]:
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
emb_dim = 10
atom_encoder = AtomEncoder(emb_dim)
bond_encoder = BondEncoder(emb_dim)

class atom_transform(BaseTransform):
  def __call__(self, data):
    newdata = data.clone()
    newdata.x = atom_encoder(data.x)
    return newdata

transform = atom_transform()

In [9]:
dataset = PygGraphPropPredDataset(name = "ogbg-molhiv", root = 'dataset/', transform = transform)

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')


Dataset: PygGraphPropPredDataset(41127):
Number of graphs: 41127
Number of features: 10
Number of classes: 2

Data(edge_index=[2, 40], edge_attr=[40, 3], x=[19, 10], y=[1, 1], num_nodes=19)


In [None]:
# Set up experiment: ogbg-molhiv ghc_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/ogbg-molhiv', 
    encoder_name = 'ghc_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()

In [None]:
# Set up experiment: ogbg-molhiv lagrangian_aug
experiment = experiments.patternExperiment(
    dataset, folder_name = 'pattern_graphs/ogbg-molhiv', 
    encoder_name = 'lagrangian_aug', n_trees=7, n_cycles=7, n_cliques=6)
experiment.run()