In [None]:
# Install required packages.
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

In [None]:
import torch
from torch_geometric.data import Data
from torch_geometric.datasets import TUDataset
import torch_geometric.utils as uts

import numpy as np
import matplotlib.pyplot as plt
import itertools
from tqdm import tqdm

import networkx as nx 
from networkx.algorithms.isomorphism import ISMAGS, GraphMatcher

Copy the simple encoder 

In [None]:
def Simple_Encoder(G, F_set ,symmetry = True, line = True, format = 'Torch'):
    '''
    This function returns the subgraph isomorphism vector in R^|F_set| given by (subgraphIso(F,G)) 
    
    Inputs:
        G: Graph
        F_set (N, ): The set of test subgraphs, with each subgraph specifing full or line
        symmetry (default = True) : counting embeddings F->G up to automorphisms of F or not. 
        line (default = True) : whether to consider full subgraphs of line graph
        format (default = Torch): desired output type
    Output:
        Enc(G): (N, ) vector of subgraph isomorphism counting
    '''

    Isom = lambda x,y: ISMAGS(nx.line_graph(x),nx.line_graph(y)) if line else ISMAGS(x,y)
    Isom_G = (Isom(G,F) for F in F_set)
    numIso_G = map(lambda x: len(list(x.find_isomorphisms(symmetry))), Isom_G)

    # match format 
    if format == 'Torch':
        return torch.tensor(list(numIso_G))
    if format == 'numpy':
        return np.array((list(numIso_G)))
    pass 

Define the graph datasets

In [None]:
cycles = (nx.cycle_graph(n) for n in range(4,10))
trees = itertools.chain.from_iterable([nx.nonisomorphic_trees(n) for n in range(2,10)])
cliques = (nx.complete_graph(n) for n in range(3,8))

F_iter = itertools.chain(trees,cycles,cliques)

F_list = list(F_iter)

## Start SVM experiment

set up environment 

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

from yellowbrick.model_selection import CVScores
from sklearn.model_selection import StratifiedKFold

## First experiment: MUTAG:

Load the data

In [None]:
dataset = TUDataset(root='data/TUDataset', name='MUTAG')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

Inspect:

In [None]:
to_kx = lambda x: uts.to_networkx(x,to_undirected = True)

G = to_kx(dataset[0])
nx.draw(G)

Process the data

In [None]:
# For MUTAG

# lables
y = np.array([x for x in itertools.chain.from_iterable((data.y.detach().numpy() for data in dataset))])

# vectors:
encode = lambda x: Simple_Encoder(to_kx(x),F_list, line = False, format ='numpy')
#X = [encode(data) for data in tqdm(dataset)]
#%timeit X = encode(dataset[0])

In [None]:
# Run classifier - MUTAG

clf = SVC(kernel='rbf')


cv = StratifiedKFold(n_splits=5, shuffle=True)
visualizer = CVScores(clf, cv=cv, scoring='f1_weighted', random_state=42)
visualizer.fit(X, y)        
visualizer.show()   

## Experiment: IMDB 

In [None]:
Load the data

In [None]:
dataset = TUDataset(root='data/IMDB-BINARY', name='IMDB-BINARY')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

Inspect:

In [None]:
to_kx = lambda x: uts.to_networkx(x,to_undirected = True)

G = to_kx(dataset[2])
nx.draw(G)

In [None]:
# For IMDB-BIN

# lables
y = np.array([x for x in itertools.chain.from_iterable((data.y.detach().numpy() for data in dataset))])

# vectors:
encode = lambda x: Simple_Encoder(to_kx(x),F_list, line = False, format ='numpy')
X = [encode(data) for data in tqdm(dataset)]
#%timeit X = encode(dataset[0])
#encode(dataset[6])

In [None]:
# Run classifier - IMDB-BIN

clf = SVC(kernel='rbf')


cv = StratifiedKFold(n_splits=5, shuffle=True)
visualizer = CVScores(clf, cv=cv, scoring='f1_weighted', random_state=42)
visualizer.fit(X, y)        
visualizer.show()   