In [4]:
import networkx as nx
from collections import defaultdict
import heapq as hp
import folium
from folium import plugins
import matplotlib.pyplot as plt 
import pandas as pd

In [37]:
G = nx.Graph()

In [38]:
with open(r'USA-road-d.CAL.co','r') as f1:
    for line in f1:
        if line[0] == 'v':
            n,la,lo = list(map(int, line[2:].split()))
            G.add_node(n,latitude = la/1000000,longitude = lo/1000000)

In [39]:
#Creating a dictionary that stores the set of adjacent nodes for each node in G (node b is in adj[a] if there exists an
#edge between a and b, and also a is in adj[b])

adj = defaultdict(set)
with open(r'USA-road-d.CAL.gr','r') as f:
    for line in f:
        if line[0] == 'a':
            n1,n2, d =  list(map(int, line[2:].split()))
            G.add_edge(n1,n2,distance = d,weight = 1)
            adj[n1].add(n2)
            adj[n2].add(n1)

In [40]:
with open(r'USA-road-t.CAL.gr','r') as f2:
    for line in f2:
        if line[0] == 'a':
            n1,n2, t =  list(map(int, line[2:].split()))
            G.add_edge(n1,n2,time = t)

## Functionality 2

In [442]:
#n_trees keeps the count of recursive calls of the function, and gives the number of different trees built
def function2(nodes, d, n_trees = 1):
    # First we check if all nodes have at least one edge conecting them to one of the other nodes passed in input.
    # Lonely notes are stored in a list
    loners = set()
    for n in nodes:
        if adj[n].intersection(nodes) == set():
            loners.add(n)
    nodes = nodes - loners 
    # Now we are sure that the nodes for which we want to find networks are at least connected to another node
    # of the input set. This means that we may find more than one tree, because there might still be unconnected
    # groups of nodes, that will form separated networks. Instead of finding one minimum spanning tree, here we look
    # for a forest of minimum spanning trees that we can create from any set of nodes passed in input
    
    # We store all edges and edges' lenghts in a heap structure, so we can then take the global minimum edge.
    edges = []
    hp.heapify(edges)
    newset = nodes.copy()
    for i in nodes:
        #newset is necessary in order to take only once each minimal path, so we don't take the same edges twice
        newset.remove(i)
        for j in newset:
            if j in adj[i]:
                edge = [i,j]
                length = G[i][j][d]
                hp.heappush(edges, (length, edge))
                
    #The algorithm starts taking the minimum edge between all edges that connect nodes from the input, and storing
    #a set of visited nodes starting with the first two, and a set of edges starting with the first minimum edge
    if edges == []:
        return "Nodes are not connected"
    
    visited = set()
    new_edges = edges.copy()
    e = new_edges.pop(0)[1]
    visited = visited.union(set(e))
    out = {tuple(e)}
    
    #This condition checks if we already reached all the connected nodes (in one of the recursions), at the first step
    if visited == nodes:
            max_trees = n_trees
    while visited != nodes:
        #the loop goes on until all nodes given in input are visited, and takes always the minimum of the 
        #remaining edges that connects only one of the visited nodes to one of the not visited; this way, we can
        #be sure that we are connecting these minimal paths to the same network, without forming cycles
        e = new_edges.pop(0)[1]
        if len(set(e).intersection(visited)) == 1:
            out = out.union({tuple(e)})
            visited = visited.union(set(e))
            new_edges = edges.copy()
            
        # The following condition is used to to check if there's more than one connected group of nodes. If that's the
        # case, so we have checked all of the paths connected to the starting edge, but we didn't manage to visit 
        # all the nodes, than we reapply the same function on the subset of nodes not yet visited, and by recursion
        # build all possible trees if there's more than one, until we visit all the nodes passed in input
        # (minus the loners, that we already took away from nodes)
        if len(new_edges) == 0:
            new_tree = function2(nodes-visited, d, n_trees = n_trees +1)
            out = out.union(new_tree)
            visited = visited.union({node for tup in new_tree for node in tup})
        
        #This will be updated only in the recursion step in which the function doesn't need recursion, so the final
        #step, and if we reach this condition we can store the number of total trees built through the whole function
        elif visited == nodes:
            max_trees = n_trees
    # The output is the set of edges that connect all the nodes of each tree (not all edges  will belong to the same 
    # tree). These are the edges that make possible to visit all (connected) nodes with minimum cost, starting from
    # any other node ( if connected to the same tree).
    if loners != set():    
        print('In the set of nodes given there were {} unconnected nodes!\n'.format(len(loners)))
    
    #Try... except is needed in order to skip the recursive steps in which we have not yet visited all the nodes,
    #so we don't have the total number of trees yet
    try:
        print('Total number of trees built was {}!'.format(max_trees))
    except:
        pass
    return out


In [449]:
function2({1,2,4, 5, 6,3929,67,45}, 'weight')

In the set of nodes given there were 6 unconnected nodes!

Total number of trees built was 1!


{(5, 6)}

In [446]:
# if no weight is inserted in the query, the default weight will be the network distance
def fun2_visual(subset, w = 'weight'):
    
    if len(subset) <= 1000:
        
        try:
            edges = function2(subset, w)
            if type(edges) == str:
                return 'No connection found between points given in input'
            # just taking the first node from the edges set, in order to locate the map on one of the trees
            map_loc = next(iter(edges))[0]
        except:
            return 'No connection found between points given in input'

        coor = G.nodes[map_loc]
        ourmap = folium.Map(location=[coor['longitude'],coor['latitude']], zoom_start=8)

        #This chunk of code is used to represent also unconnected points, but folium doesn't support
        #large number of markers. If the set of nodes in input is small, this can be executed, otherwise
        #we only represent the trees (edges). Also, for large sets of nodes, there would be many unconnected nodes
        #and the visualiation would be too confused

        if len(subset) <= 500:
            for node in subset:
                coor = G.nodes[node]
                folium.Marker(location=[coor['longitude'],coor['latitude']], popup = str(node),
                              icon=plugins.BeautifyIcon(icon_shape = 'circle-dot'
                                ,border_color = 'cyan')).add_to(ourmap)
        else:
            print('Visualizing only connected nodes, given high number of unconnected nodes:')

        for edge in edges:
            points = []
            for node in edge:
                coor = G.nodes[node]
                folium.Marker(location=[coor['longitude'],coor['latitude']], popup = str(node),
                              icon=plugins.BeautifyIcon(icon_shape = 'circle-dot'
                                ,border_color = 'Blue')).add_to(ourmap)

                points.append((coor['longitude'], coor['latitude']))
            if w == 'distance':
                folium.vector_layers.PolyLine(points, color = 'Red').add_to(ourmap)            
            elif w == 'time':
                folium.vector_layers.PolyLine(points, color = 'Green').add_to(ourmap)
            elif w == 'weight':
                folium.vector_layers.PolyLine(points, color = 'Purple').add_to(ourmap)
        return ourmap
    else:
        return 'Too many nodes given in input, visualization not possible!'


In [448]:
subset = {i for i in range(100000,101000)}
fun2_visual(subset, 'time')

Total number of trees built was 184!
In the set of nodes given there were 262 unconnected nodes!

Visualizing only connected nodes, given high number of unconnected nodes:


In [436]:
subset = {i for i in range(200000,200100)}
fun2_visual(subset, 'distance')

Total number of trees built was 16!
In the set of nodes given there were 36 unconnected nodes!

