In [2]:
import networkx as nx
import numpy as np

from sklearn.model_selection import train_test_split
from grakel.kernels import ShortestPath, PyramidMatch, RandomWalk, VertexHistogram, WeisfeilerLehman
from grakel import graph_from_networkx
from grakel.datasets import fetch_dataset
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [33]:
############## Question 1
# Generate simple dataset

Gs = [nx.cycle_graph(i) for i in range(3,103)] + [nx.path_graph(i) for i in range(3,103)]
y  = np.concatenate((np.zeros(100), np.ones(100)))

In [37]:
############## Question 2
# Classify the synthetic graphs using graph kernels
# Split dataset into a training and a test set with the train_test_split function of scikit-learn

G_train, G_test, y_train, y_test = train_test_split(Gs, y, test_size=0.1)

# Transform NetworkX graphs to objects that can be processed by GraKeL
G_train = list(graph_from_networkx(G_train))
G_test = list(graph_from_networkx(G_test))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37]
[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (12, 13), (13, 14), (14, 15), (15, 16), (16, 17), (17, 18), (18, 19), (19, 20), (20, 21), (21, 22), (22, 23), (23, 24), (24, 25), (25, 26), (26, 27), (27, 28), (28, 29), (29, 30), (30, 31), (31, 32), (32, 33), (33, 34), (34, 35), (35, 36), (36, 37)]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71]
[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (12, 13), (13, 14), (14, 15), (15, 16), (16, 17), (17, 18), (18, 19), (19, 20), (20, 21), (21, 22), (22, 23), (23, 24), (24,

In [46]:
# Use the shortest path kernel to generate the two kernel matrices ("K_train" and "K_test")
# The graphs do not contain node labels -> Set the with_labels argument of the the shortest path kernel to False

gk = ShortestPath(with_labels=False)
K_train = gk.fit_transform(G_train)
K_test  = gk.transform(G_test)

clf = SVC(kernel='precomputed', C=1) # Initialize SVM
clf.fit(K_train, y_train) # Train SVM
y_pred = clf.predict(K_test) # Predict

# Compute the classification accuracy, using the accuracy_score function of scikit-learn
print("Shortest Path Accuracy :", accuracy_score(y_test, y_pred))

Shortest Path Accuracy : 1.0


In [47]:
# Use the random walk kernel and the pyramid match graph kernel to perform classification

gk = PyramidMatch(with_labels=False)
K_train = gk.fit_transform(G_train)
K_test  = gk.transform(G_test)

clf = SVC(kernel='precomputed', C=1) # Initialize SVM
clf.fit(K_train, y_train) # Train SVM
y_pred = clf.predict(K_test) # Predict

# Compute the classification accuracy, using the accuracy_score function of scikit-learn
print("Pyramid Match Accuracy :", accuracy_score(y_test, y_pred))

Pyramid Match Accuracy : 1.0


In [49]:
gk = RandomWalk()
K_train = gk.fit_transform(G_train)
K_test  = gk.transform(G_test)

clf = SVC(kernel='precomputed', C=1) # Initialize SVM
clf.fit(K_train, y_train) # Train SVM
y_pred = clf.predict(K_test) # Predict

print("Random Walk Accuracy :", accuracy_score(y_test, y_pred))

Random Walk Accuracy : 1.0


In [65]:
############## Question 3
# Classify the graphs of a real-world dataset using graph kernels

# Load the MUTAG dataset, using the fetch_dataset function of GraKeL
mutag = fetch_dataset("MUTAG", verbose=False)
G, y = mutag.data, mutag.target

# Split dataset into a training and a test set, using the train_test_split function of scikit-learn
G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1)

# Perform graph classification using different kernels and evaluate performance
def run(cname):
    
    if cname == "VertexHistogram":
        gk = VertexHistogram()
    elif cname == "ShortestPath":
        gk = ShortestPath(with_labels=False)
    elif cname == "PyramidMatch":
        gk = PyramidMatch(with_labels=False)
    elif cname == "WeisfeilerLehman":
        gk = WeisfeilerLehman(base_kernel=VertexHistogram)

    K_train = gk.fit_transform(G_train)
    K_test  = gk.transform(G_test)

    clf = SVC(kernel='precomputed', C=1) # Initialize SVM
    clf.fit(K_train, y_train) # Train SVM
    y_pred = clf.predict(K_test) # Predict

    return accuracy_score(y_test, y_pred)

In [66]:
names = ["VertexHistogram", "ShortestPath", "PyramidMatch", "WeisfeilerLehman"]

results = [run(cname) for cname in names]

In [75]:
print(sorted(zip(results,names)))

[(0.8421052631578947, 'ShortestPath'), (0.8947368421052632, 'PyramidMatch'), (0.8947368421052632, 'VertexHistogram'), (0.8947368421052632, 'WeisfeilerLehman')]
