# Example 6: Loading data in different ways

Here we provide some examples of data loading into hcga to provide some flexibility to the user.

We focus here on loading into the graph collection object which is used by hcga.


In [1]:
import numpy as np
import networkx as nx
import pandas as pd
import scipy as sc

from hcga.graph import Graph, GraphCollection


## Loading networkx graphs

In [2]:
# defining a list of networkx graphs
graph_1 = nx.karate_club_graph()
graph_2 = nx.davis_southern_women_graph()
graph_3 = nx.florentine_families_graph()

# combine into list
graphs = [graph_1, graph_2, graph_3]

In [3]:
gc = GraphCollection()
gc.add_graph_list(graphs)

In [4]:
print('There are {} graphs'.format(len(gc.graphs)))
print('There graph ids are: {}'.format(gc.get_graph_ids()))
print('There are {} features per node'.format(gc.get_n_node_features()))

There are 3 graphs
There graph ids are: [0, 1, 2]
There are 0 features per node


In [5]:
# lets add another graph
graph_4 = nx.les_miserables_graph()
gc.add_graph(graph_4)

In [6]:
print('There are {} graphs'.format(len(gc.graphs)))
print('There are {} features per node'.format(gc.get_n_node_features()))

There are 4 graphs
There are 0 features per node


## Loading networkx graphs with node features

In [7]:
# defining a list of networkx graphs
graph_1 = nx.karate_club_graph()
graph_2 = nx.davis_southern_women_graph()
graph_3 = nx.florentine_families_graph()

# combine into list
graphs = [graph_1, graph_2, graph_3]

# node_features - two features: an all zeros feature and all ones feature
node_features_graph_1 = np.array([np.zeros(len(graph_1)),np.ones(len(graph_1))]).T
node_features_graph_2 = np.array([np.zeros(len(graph_2)),np.ones(len(graph_2))]).T
node_features_graph_3 = np.array([np.zeros(len(graph_3)),np.ones(len(graph_3))]).T

# combine node features into list
node_features = [node_features_graph_1,node_features_graph_2,node_features_graph_3]


In [8]:
gc = GraphCollection()
gc.add_graph_list(graphs, node_features)

In [9]:
print('There are {} graphs'.format(len(gc.graphs)))
print('There are {} features per node'.format(gc.get_n_node_features()))

There are 3 graphs
There are 2 features per node


In [10]:
# lets add another graph
graph_4 = nx.les_miserables_graph()
node_features_graph_4 = np.array([np.zeros(len(graph_4)),np.ones(len(graph_4))]).T
gc.add_graph(graph_4, node_features_graph_4)

In [11]:
print('There are {} graphs'.format(len(gc.graphs)))
print('There are {} features per node'.format(gc.get_n_node_features()))

There are 4 graphs
There are 2 features per node


## Loading graphs with labels

In [12]:
# defining a list of networkx graphs
graph_1 = nx.karate_club_graph()
graph_2 = nx.davis_southern_women_graph()
graph_3 = nx.florentine_families_graph()

# combine into list
graphs = [graph_1, graph_2, graph_3]

# define graph labels
labels = [0,1,1]


In [13]:
gc = GraphCollection()
gc.add_graph_list(graphs, graph_labels=labels)

In [14]:
print('There are {} graphs'.format(len(gc.graphs)))
print('There are {} features per node'.format(gc.get_n_node_features()))

There are 3 graphs
There are 0 features per node


## Load numpy arrays 

In [15]:
# creating some random numpy arrays as adjacency matrices
graph_1 = np.random.randint(2, size=(10,10))
graph_2 = np.random.randint(2, size=(10,10))
graph_3 = np.random.randint(2, size=(10,10))


# combine into list
graphs = [graph_1, graph_2, graph_3]


# node_features - two features: an all zeros feature and all ones feature
node_features_graph_1 = np.array([np.zeros(len(graph_1)),np.ones(len(graph_1))]).T
node_features_graph_2 = np.array([np.zeros(len(graph_2)),np.ones(len(graph_2))]).T
node_features_graph_3 = np.array([np.zeros(len(graph_3)),np.ones(len(graph_3))]).T

# combine node features into list
node_features = [node_features_graph_1,node_features_graph_2,node_features_graph_3]

# define graph labels
labels = [0,1,1]

In [16]:
gc = GraphCollection()
gc.add_graph_list(graphs, node_features, graph_labels=labels)

In [17]:
print('There are {} graphs'.format(len(gc.graphs)))
print('There are {} features per node'.format(gc.get_n_node_features()))

There are 3 graphs
There are 2 features per node


## Load directed graphs 

In [18]:
# creating some random numpy arrays as adjacency matrices
graph_1 = np.random.randint(2, size=(10,10))
graph_2 = np.random.randint(2, size=(10,10))
graph_3 = np.random.randint(2, size=(10,10))


# combine into list
graphs = [graph_1, graph_2, graph_3]


# node_features - two features: an all zeros feature and all ones feature
node_features_graph_1 = np.array([np.zeros(len(graph_1)),np.ones(len(graph_1))]).T
node_features_graph_2 = np.array([np.zeros(len(graph_2)),np.ones(len(graph_2))]).T
node_features_graph_3 = np.array([np.zeros(len(graph_3)),np.ones(len(graph_3))]).T

# combine node features into list
node_features = [node_features_graph_1,node_features_graph_2,node_features_graph_3]

# define graph labels
labels = [0,1,1]

gc = GraphCollection()
gc.add_graph_list(graphs, node_features, graph_labels=labels, graph_type='directed')

In [19]:
print('There are {} graphs'.format(len(gc.graphs)))
print('There are {} features per node'.format(gc.get_n_node_features()))

There are 3 graphs
There are 2 features per node


In [20]:
print(' The graph type is: {}'.format(gc.graphs[0].graph_type))

 The graph type is: directed


## Saving loaded dataset 

In [21]:
# we can save this if we want to and run everything from the command line
from hcga.io import save_dataset
save_dataset(gc, 'custom_dataset', folder='./datasets/custom_dataset')


## Load saved dataset

In [22]:
from hcga.io import load_dataset
gc = load_dataset(filename='./datasets/custom_dataset/custom_dataset.pkl')
