Notebook to create a large graph by merging multiple subgraphs based on shared nodes and new connections. 

In [3]:
from pathlib import Path
import numpy as np
import json
import networkx as nx


def find_node_with_attribute(graph, lat, lon):
    nodes = list(filter(lambda items: ((items[1]['latitude'] == lat) & (items[1]['longitude']==lon)),graph.nodes(data=True)))
    return nodes

G1 = nx.DiGraph()
processed = 0
for file in Path("./networks/subgraphs/").glob('*'):
    processed +=1 
    print("Processing network {}".format(processed))

    data = json.load(open(file))

    edges = data.get("edges")
    nodes = data.get("nodes")

    G2 = nx.DiGraph()
    for edge in edges:
        source = int(edge.get("source"))
        target = int(edge.get("target"))
        time = float(edge.get("time"))
        distance = float(edge.get("length"))
        G2.add_edge(source,target, time = time, distance = distance)

    nodes_coordinates = { int(node.get('id')) : { 'latitude': node.get('latitude'), 'longitude':node.get('longitude') } for node in nodes }    
    nx.set_node_attributes(G2,nodes_coordinates)    
    
    for n2 in G2.nodes(data=True):
    
        latitude = n2[1]['latitude']
        longitude = n2[1]['longitude']
        n1_id_list = find_node_with_attribute(G1,latitude,longitude)

        if(len(n1_id_list)>0):
            # A node with the attributes of n2 already exists in G1. Still, out edges from that node in G2 might not exist in G1 and must be added together with the target nodes.
            for e2 in G2.edges(nbunch=n2[0]):
                # edge format: (source, target)
                target2 = e2[1]
                latitude = nx.get_node_attributes(G2,'latitude')[target2]
                longitude = nx.get_node_attributes(G2,'longitude')[target2]
                target1_id_list = find_node_with_attribute(G1,latitude,longitude)

                if len(target1_id_list)>0:
                    # The target node already exists in G1. Since a node with the attributes of the source node n2 already existed in G1, an edge between n2 and target1 might exist and should be verified before adding. 
                    if not G1.has_edge(n1_id_list[0][0],target1_id_list[0][0]):
                        time = nx.get_edge_attributes(G2,"time")[e2]
                        distance = nx.get_edge_attributes(G2,"distance")[e2]
                        G1.add_edge(n1_id_list[0][0],target1_id_list[0][0],time=time,distance=distance) 
                     
                else:
                    # The target node does not exist in G1, but G1 can already have a node with that same id. In that case we need to change the id of the target node and keep its attributes. Else we keep the node as it is. 
                    time = nx.get_edge_attributes(G2,"time")[e2]
                    distance = nx.get_edge_attributes(G2,"distance")[e2]

                    if G1.has_node(target2):
                        corrected_target_id = max(G1.nodes) + target2
                        G1.add_nodes_from([(corrected_target_id, {'latitude':latitude,'longitude':longitude})])
                        G1.add_edge(n1_id_list[0][0],corrected_target_id,time=time,distance=distance)
                    else:
                        G1.add_nodes_from([(target2,{'latitude':latitude,'longitude':longitude})])
                        G1.add_edge(n1_id_list[0][0],target2,time=time,distance=distance)
        
        else: 
            # A node with the attributes of n2 does not exist in G1 and thus it should be added and also the edges. 

            # print(n2)
            # Even if a node with those attributes does not exist in G1, there could be a node with the same id.
            if G1.has_node(n2[0]):
                corrected_source_id = max(G1.nodes) + n2[0]
            else:
                # Keep the same id but create variable to use later.
                corrected_source_id = n2[0]
            
            G1.add_nodes_from([(corrected_source_id,n2[1])])    

            # After adding the node, all the edges with their respective target nodes are added. 
            for e2 in G2.edges(nbunch=n2[0]):
                # edge format: (source, target)
                target2 = e2[1]
                latitude = nx.get_node_attributes(G2,'latitude')[target2]
                longitude = nx.get_node_attributes(G2,'longitude')[target2]
                target1_id_list = find_node_with_attribute(G1,latitude,longitude)

                time = nx.get_edge_attributes(G2,"time")[e2]
                distance = nx.get_edge_attributes(G2,"distance")[e2]

                if len(target1_id_list)>0:
                    # The target node already exists in G1. Since a node with the attributes of the source node n2 did not exist in G1, an edge between n2 and target1 cannot exist and must be added. The source(n2) id must be the corrected id.
                    G1.add_edge(corrected_source_id,target1_id_list[0][0],time=time,distance=distance) 
                     
                else:
                    # The target node does not exist in G1, but G1 can already have a node with that same id. In that case we need to change the id of the target node and keep its attributes. Else we keep the node as it is. 
                    if G1.has_node(target2):
                        corrected_target_id = max(G1.nodes) + target2
                        G1.add_nodes_from([(corrected_target_id, {'latitude':latitude,'longitude':longitude})])
                        G1.add_edge(corrected_source_id,corrected_target_id,time=time, distance=distance)
                    else:
                        G1.add_nodes_from([(target2,{'latitude':latitude,'longitude':longitude})])
                        G1.add_edge(corrected_source_id,target2,time=time,distance=distance)
    

node_list = G1.nodes
new_ids = [id for id in range(1,len(node_list)+1)]

reindex_dict = dict(zip(node_list,new_ids))
G1 = nx.relabel_nodes(G1,reindex_dict)

cities = list( set( [ e[0] for e in G1.edges ] ) )
recreation_areas = list( set( [ e[1] for e in G1.edges ] ) )
cities = { c : { "bipartite" : "city" } for c in cities}
recreation_areas = { ra : {"bipartite" : "recarea"} for ra in recreation_areas  }
bipartite = cities | recreation_areas

nx.set_node_attributes(G1, bipartite)

data = nx.cytoscape_data(G1)
print(data)

with open('./networks/outdoor_recreation_network_USA.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=4)


  

Processing network 1
Processing network 2
Processing network 3
Processing network 4
Processing network 5
Processing network 6
Processing network 7
Processing network 8
Processing network 9
Processing network 10
Processing network 11
Processing network 12
Processing network 13
Processing network 14
Processing network 15
Processing network 16
Processing network 17
Processing network 18
Processing network 19
{'data': [], 'directed': True, 'multigraph': False, 'elements': {'nodes': [{'data': {'latitude': 39.0229714, 'longitude': -94.7145415, 'bipartite': 'city', 'id': '1', 'value': 1, 'name': '1'}}, {'data': {'latitude': 38.312105, 'longitude': -95.962768, 'bipartite': 'recarea', 'id': '2', 'value': 2, 'name': '2'}}, {'data': {'latitude': 38.92333, 'longitude': -95.33, 'bipartite': 'recarea', 'id': '3', 'value': 3, 'name': '3'}}, {'data': {'latitude': 38.654905, 'longitude': -94.900047, 'bipartite': 'recarea', 'id': '4', 'value': 4, 'name': '4'}}, {'data': {'latitude': 38.514529, 'longitud

In [79]:
l = [i for i in range(1,11)]
m = [i for i in range(2,12)]
n = dict(zip(l,m))
print(n)

{1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11}
