In [1]:
import networkx as nx
import pandas as pd
import numpy as np

# Independent Variables: X = G = (V, E)

In [2]:
# Adjacency
path_to_files = "../datasets/MUTAG/"
G = nx.read_edgelist(path_to_files + "MUTAG_A.txt", delimiter=',', nodetype=int, encoding="utf-8")

In [3]:
# Component/Graph Indicators
components = pd.read_csv(path_to_files + "MUTAG_graph_indicator.txt", header=None)
components.index += 1
components = components.rename(columns={0: "component"}).to_dict()['component']

nx.set_node_attributes(G=G, values=components, name='component')

In [4]:
# Node Labels
node_labels = pd.read_csv(path_to_files + "MUTAG_node_labels.txt", header=None)
node_labels.index += 1
node_labels = node_labels.rename(columns={0:"label"})['label'].map({ \
        0: "C", \
        1: "N", \
        2: "O", \
        3: "F", \
        4: "I", \
        5: "Cl", \
        6: "Br" \
                                                         }).to_dict()

nx.set_node_attributes(G=G, values=node_labels, name='label')

In [5]:
# Edge Labels
edges = pd.read_csv(path_to_files+"MUTAG_A.txt", header=None).rename(columns={0:"src", 1:"dst"})
edge_labels = pd.read_csv(path_to_files+"MUTAG_edge_labels.txt", header=None)
edges.index += 1; edge_labels.index += 1
edges['label'] = edge_labels[0]
edges = edges.set_index(['src', 'dst'])
edges = edges['label'].apply(lambda x: "e"+str(int(x)))

nx.set_edge_attributes(G=G, values=edges, name='label')

In [6]:
nx.set_edge_attributes(G=G, values=1.0, name='weight')

# Dependent Variable - *y*

In [7]:
# Graph Labels (y)
graphs = pd.read_csv(path_to_files + "MUTAG_graph_labels.txt", header=None)
graphs.index += 1
graphs = graphs.rename(columns={0: "graph"}).to_dict()['graph']

# Using WalkAsString

In [8]:
from module import get_structural_signatures, walk_as_string

In [14]:
newGraph, pca, km  = get_structural_signatures(networkXGraph=G)

In [15]:
walks = walk_as_string(networkXGraph=newGraph, graphComponentLabels=graphs, featuresToUse={
                       "nodes": ['structure', 'label'], "edges": ['label']})

Walk iteration:
('1', '/', '20')
('2', '/', '20')
('3', '/', '20')
('4', '/', '20')
('5', '/', '20')
('6', '/', '20')
('7', '/', '20')
('8', '/', '20')
('9', '/', '20')
('10', '/', '20')
('11', '/', '20')
('12', '/', '20')
('13', '/', '20')
('14', '/', '20')
('15', '/', '20')
('16', '/', '20')
('17', '/', '20')
('18', '/', '20')
('19', '/', '20')
('20', '/', '20')


In [23]:
walks.tail()

Unnamed: 0,walk,label,start_node,component
67415,0 O e1 1 N 1 N e1 2 C 2 C e2 1 C 1 C e2 2 C 2 ...,-1,3371,188
67416,0 O e1 1 N 1 N e1 2 C 2 C e1 3 C 3 C e1 2 C 2 ...,-1,3371,188
67417,0 O e1 1 N 1 N e1 2 C 2 C e2 1 C 1 C e2 2 C 2 ...,-1,3371,188
67418,0 O e1 1 N 1 N e1 0 O 0 O e1 1 N 1 N e1 2 C 2 ...,-1,3371,188
67419,0 O e1 1 N 1 N e1 0 O 0 O e1 1 N 1 N e1 2 C 2 ...,-1,3371,188
